From 14a40169643e2fc740de75e896b53d55dc75dfe0 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Fri, 8 Mar 2024 09:35:05 +0900 Subject: [PATCH 01/14] Update README.md (#166) underying -> underlying --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2756292..52927d0 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ If an application fails to start under ZLUDA or crashes please check [Known Issu - If both integrated AMD GPU and dedicated AMD GPU are present in the system, ZLUDA uses the integrated GPU. - This is a bug in underying ROCm/HIP runtime. You can work around it by disabling the integrated GPU. + This is a bug in underlying ROCm/HIP runtime. You can work around it by disabling the integrated GPU. On Windows we recommend you use environment variable `HIP_VISIBLE_DEVICES=1` environment variable (more [here](https://rocmdocs.amd.com/en/latest/conceptual/gpu-isolation.html#hip-visible-devices)) or disable it system-wide in Device Manager. From f47a93a9512fbc91ab614e118abfa7378bf1f4b9 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sun, 17 Mar 2024 01:32:48 +0100 Subject: [PATCH 02/14] Fix reported build errors (#178) --- ext/llvm-sys.rs/build.rs | 82 +++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/ext/llvm-sys.rs/build.rs b/ext/llvm-sys.rs/build.rs index a7363a9..c83930f 100644 --- a/ext/llvm-sys.rs/build.rs +++ b/ext/llvm-sys.rs/build.rs @@ -3,7 +3,7 @@ extern crate convert_case; use convert_case::{Case, Casing, StateConverter}; use std::{ - env, + env, io, path::PathBuf, process::{Command, Stdio}, }; @@ -17,8 +17,9 @@ fn main() { .map(|comp| comp.from_case(Case::Snake)); let msvc = is_msvc(); let (llvm_dir, additonal_cmake_file) = get_llvm_dir(); - let out_dir = build_cmake_targets(llvm_components.clone(), llvm_dir, additonal_cmake_file); - emit_compile_and_linking_information(llvm_components, out_dir, msvc) + let (cmake_profile, out_dir) = + build_cmake_targets(llvm_components.clone(), llvm_dir, additonal_cmake_file); + emit_compile_and_linking_information(llvm_components, cmake_profile, out_dir, msvc) } fn is_msvc() -> bool { @@ -41,11 +42,14 @@ fn build_cmake_targets<'a>( components: impl Iterator>, llvm_dir: PathBuf, additional_cmake_file: PathBuf, -) -> PathBuf { +) -> (String, PathBuf) { let mut cmake = Config::new(llvm_dir); use_ninja(&mut cmake); cmake .always_configure(true) + // Should be detected automatically, but we have reports of + // LLVM fiding ZLIB on Windows and then failing to link it + .define("LLVM_ENABLE_ZLIB", "OFF") .define("LLVM_ENABLE_TERMINFO", "OFF") .define("LLVM_BUILD_TOOLS", "OFF") .define("LLVM_TARGETS_TO_BUILD", "") @@ -57,7 +61,10 @@ fn build_cmake_targets<'a>( .build_target(&format!("LLVM{}", component.to_case(Case::Pascal))) .build(); } - cmake.build_target("llvm-config").build() + ( + cmake.get_profile().to_string(), + cmake.build_target("llvm-config").build(), + ) } fn use_ninja(cmake: &mut Config) { @@ -76,31 +83,27 @@ fn use_ninja(cmake: &mut Config) { } fn emit_compile_and_linking_information<'a>( - llvm_components: impl Iterator>, + llvm_components: impl Iterator> + Clone, + cmake_profile: String, out_dir: PathBuf, is_msvc: bool, ) { - let mut llvm_config_path = out_dir.clone(); - llvm_config_path.push("build"); - llvm_config_path.push("bin"); - llvm_config_path.push("llvm-config"); - let mut llvm_config_cmd = Command::new(&llvm_config_path); - llvm_config_cmd.args([ - "--cxxflags", - "--ldflags", - "--libdir", - "--libnames", - "--system-libs", - "--link-static", - ]); - for component in llvm_components { - llvm_config_cmd.arg(&component.to_case(Case::Flat)); - } - let llvm_config_output = llvm_config_cmd - .stdin(Stdio::null()) - .stderr(Stdio::null()) - .output() - .unwrap(); + // MSBuild uses didfferent output path from ninja or Makefile. + // Not sure how to query CMake about it, so we just try once with + // ninja/Makefile path and then once with MSBuild path + let llvm_config_output = execute_llvm_config( + &out_dir, + &["build", "bin", "llvm-config"], + llvm_components.clone(), + ) + .or_else(|_| { + execute_llvm_config( + &out_dir, + &["build", &*cmake_profile, "bin", "llvm-config"], + llvm_components, + ) + }) + .unwrap(); if !llvm_config_output.status.success() { panic!() } @@ -138,3 +141,28 @@ fn emit_compile_and_linking_information<'a>( println!("cargo:rustc-link-lib=stdc++"); } } + +fn execute_llvm_config<'a>( + out_dir: &PathBuf, + llvm_config_exe_relative: &[&str], + llvm_components: impl Iterator>, +) -> io::Result { + let mut llvm_config_path = out_dir.clone(); + llvm_config_path.extend(llvm_config_exe_relative); + let mut llvm_config_cmd = Command::new(&llvm_config_path); + llvm_config_cmd.args([ + "--cxxflags", + "--ldflags", + "--libdir", + "--libnames", + "--system-libs", + "--link-static", + ]); + for component in llvm_components { + llvm_config_cmd.arg(&component.to_case(Case::Flat)); + } + llvm_config_cmd + .stdin(Stdio::null()) + .stderr(Stdio::null()) + .output() +} From 1ede61c6963ea4909412d9d2f69cba189765e3e1 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sun, 17 Mar 2024 14:53:15 +0100 Subject: [PATCH 03/14] Disable even more optional LLVM components (#179) --- ext/llvm-sys.rs/build.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ext/llvm-sys.rs/build.rs b/ext/llvm-sys.rs/build.rs index c83930f..9b43c8b 100644 --- a/ext/llvm-sys.rs/build.rs +++ b/ext/llvm-sys.rs/build.rs @@ -48,8 +48,14 @@ fn build_cmake_targets<'a>( cmake .always_configure(true) // Should be detected automatically, but we have reports of - // LLVM fiding ZLIB on Windows and then failing to link it + // LLVM fiding ZLIB on Windows and then failing to link it. + // Out of caution we explicitly disable all autodetectable components + .define("LLVM_ENABLE_LIBXML2", "OFF") .define("LLVM_ENABLE_ZLIB", "OFF") + .define("LLVM_ENABLE_ZSTD", "OFF") + .define("LLVM_ENABLE_CURL", "OFF") + .define("LLVM_ENABLE_HTTPLIB", "OFF") + .define("LLVM_ENABLE_LIBEDIT", "OFF") .define("LLVM_ENABLE_TERMINFO", "OFF") .define("LLVM_BUILD_TOOLS", "OFF") .define("LLVM_TARGETS_TO_BUILD", "") From 7d4147c8b2cc31422a73f5db3ec065db8af82246 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 28 Mar 2024 17:12:10 +0100 Subject: [PATCH 04/14] Add Blender 4.2 support (#184) Redo primary context and fix various long-standing bugs around this API --- zluda/src/cuda.rs | 5 ++ zluda/src/impl/context.rs | 126 ++++++++++++++++++++++++--------- zluda/src/impl/dark_api.rs | 84 ++++++++++++---------- zluda/src/impl/device.rs | 105 +++++++++++---------------- zluda/src/impl/mod.rs | 4 ++ zluda/src/impl/module.rs | 20 +++--- zluda/src/impl/stream.rs | 20 +++--- zluda/tests/primary_context.rs | 84 ++++++++++++++++++++++ 8 files changed, 288 insertions(+), 160 deletions(-) create mode 100644 zluda/tests/primary_context.rs diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 898d732..1d054c3 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -69,6 +69,7 @@ cuda_function_declarations!( cuCtxGetDevice, cuCtxGetLimit, cuCtxSetLimit, + cuCtxSetFlags, cuCtxGetStreamPriorityRange, cuCtxSynchronize, cuCtxSetCacheConfig, @@ -485,6 +486,10 @@ mod definitions { context::set_limit(limit, value) } + pub(crate) unsafe fn cuCtxSetFlags(flags: u32) -> Result<(), CUresult> { + context::set_flags(flags) + } + pub(crate) unsafe fn cuCtxGetStreamPriorityRange( leastPriority: *mut ::std::os::raw::c_int, greatestPriority: *mut ::std::os::raw::c_int, diff --git a/zluda/src/impl/context.rs b/zluda/src/impl/context.rs index 429338b..d1b3e7b 100644 --- a/zluda/src/impl/context.rs +++ b/zluda/src/impl/context.rs @@ -7,7 +7,7 @@ use cuda_types::*; use hip_runtime_sys::*; use rustc_hash::{FxHashMap, FxHashSet}; use std::ptr; -use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::atomic::AtomicU32; use std::sync::Mutex; use std::{cell::RefCell, ffi::c_void}; @@ -28,57 +28,104 @@ impl ZludaObject for ContextData { const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_CONTEXT; fn drop_with_result(&mut self, _: bool) -> Result<(), CUresult> { - let mutable = self - .mutable - .get_mut() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - fold_cuda_errors(mutable.streams.iter().copied().map(|s| { - unsafe { LiveCheck::drop_box_with_result(s, true)? }; - Ok(()) - })) + self.with_inner_mut(|mutable| { + fold_cuda_errors( + mutable + .streams + .iter() + .copied() + .map(|s| unsafe { LiveCheck::drop_box_with_result(s, true) }), + ) + })? } } pub(crate) struct ContextData { - pub(crate) flags: AtomicU32, - is_primary: bool, - pub(crate) ref_count: AtomicU32, pub(crate) device: hipDevice_t, - pub(crate) mutable: Mutex, + pub(crate) variant: ContextVariant, +} + +pub(crate) enum ContextVariant { + NonPrimary(NonPrimaryContextData), + Primary(Mutex), +} + +pub(crate) struct PrimaryContextData { + pub(crate) ref_count: u32, + pub(crate) flags: u32, + pub(crate) mutable: ContextInnerMutable, +} + +pub(crate) struct NonPrimaryContextData { + flags: AtomicU32, + mutable: Mutex, } impl ContextData { - pub(crate) fn new( - flags: u32, - device: hipDevice_t, - is_primary: bool, - initial_refcount: u32, - ) -> Result { - Ok(ContextData { - flags: AtomicU32::new(flags), + pub(crate) fn new_non_primary(flags: u32, device: hipDevice_t) -> Self { + Self { device, - ref_count: AtomicU32::new(initial_refcount), - is_primary, - mutable: Mutex::new(ContextDataMutable::new()), + variant: ContextVariant::NonPrimary(NonPrimaryContextData { + flags: AtomicU32::new(flags), + mutable: Mutex::new(ContextInnerMutable::new()), + }), + } + } + + pub(crate) fn new_primary(device: hipDevice_t) -> Self { + Self { + device, + variant: ContextVariant::Primary(Mutex::new(PrimaryContextData { + ref_count: 0, + flags: 0, + mutable: ContextInnerMutable::new(), + })), + } + } + + pub(crate) fn with_inner_mut( + &self, + fn_: impl FnOnce(&mut ContextInnerMutable) -> T, + ) -> Result { + Ok(match self.variant { + ContextVariant::Primary(ref mutex_over_primary_ctx_data) => { + let mut primary_ctx_data = mutex_over_primary_ctx_data + .lock() + .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + fn_(&mut primary_ctx_data.mutable) + } + ContextVariant::NonPrimary(NonPrimaryContextData { ref mutable, .. }) => { + let mut ctx_data_mutable = + mutable.lock().map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + fn_(&mut ctx_data_mutable) + } }) } } -pub(crate) struct ContextDataMutable { +pub(crate) struct ContextInnerMutable { pub(crate) streams: FxHashSet<*mut stream::Stream>, pub(crate) modules: FxHashSet<*mut module::Module>, // Field below is here to support CUDA Driver Dark API pub(crate) local_storage: FxHashMap<*mut c_void, LocalStorageValue>, } -impl ContextDataMutable { - fn new() -> Self { - ContextDataMutable { +impl ContextInnerMutable { + pub(crate) fn new() -> Self { + ContextInnerMutable { streams: FxHashSet::default(), modules: FxHashSet::default(), local_storage: FxHashMap::default(), } } + pub(crate) fn drop_with_result(&mut self) -> Result<(), CUresult> { + fold_cuda_errors( + self.streams + .iter() + .copied() + .map(|s| unsafe { LiveCheck::drop_box_with_result(s, true) }), + ) + } } pub(crate) struct LocalStorageValue { @@ -94,7 +141,7 @@ pub(crate) unsafe fn create( if pctx == ptr::null_mut() { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } - let context_box = Box::new(LiveCheck::new(ContextData::new(flags, dev, false, 1)?)); + let context_box = Box::new(LiveCheck::new(ContextData::new_non_primary(flags, dev))); let context_ptr = Box::into_raw(context_box); *pctx = context_ptr; push_context_stack(context_ptr) @@ -105,7 +152,7 @@ pub(crate) unsafe fn destroy(ctx: *mut Context) -> Result<(), CUresult> { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } let ctx_ref = LiveCheck::as_result(ctx)?; - if ctx_ref.is_primary { + if let ContextVariant::Primary { .. } = ctx_ref.variant { return Err(CUresult::CUDA_ERROR_INVALID_CONTEXT); } CONTEXT_STACK.with(|stack| { @@ -175,14 +222,25 @@ pub(crate) fn set_limit(limit: hipLimit_t, value: usize) -> Result<(), CUresult> Ok(()) } +pub(crate) fn set_flags(flags: u32) -> Result<(), CUresult> { + with_current(|ctx| match ctx.variant { + ContextVariant::NonPrimary(ref context) => { + context + .flags + .store(flags, std::sync::atomic::Ordering::SeqCst); + Ok(()) + } + // This looks stupid, but this is an actual CUDA behavior, + // see primary_context.rs test + ContextVariant::Primary(_) => Ok(()), + })? +} + pub(crate) unsafe fn get_api_version(ctx: *mut Context, version: *mut u32) -> Result<(), CUresult> { if ctx == ptr::null_mut() { return Err(CUresult::CUDA_ERROR_INVALID_CONTEXT); } - let ctx = LiveCheck::as_result(ctx)?; - if ctx.ref_count.load(Ordering::Acquire) == 0 { - return Err(CUresult::CUDA_ERROR_INVALID_CONTEXT); - } + //let ctx = LiveCheck::as_result(ctx)?; //TODO: query device for properties roughly matching CUDA API version *version = 3020; Ok(()) diff --git a/zluda/src/impl/dark_api.rs b/zluda/src/impl/dark_api.rs index c3f4fca..c3b596c 100644 --- a/zluda/src/impl/dark_api.rs +++ b/zluda/src/impl/dark_api.rs @@ -121,20 +121,27 @@ impl CudaDarkApi for CudaDarkApiZluda { value: *mut c_void, dtor_callback: Option, ) -> CUresult { - with_context_or_current(cu_ctx, |ctx| { - let mut ctx_mutable = ctx - .mutable - .lock() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - ctx_mutable.local_storage.insert( - key, - LocalStorageValue { - value, - _dtor_callback: dtor_callback, - }, - ); - Ok(()) - }) + unsafe fn context_local_storage_insert_impl( + cu_ctx: cuda_types::CUcontext, + key: *mut c_void, + value: *mut c_void, + dtor_callback: Option< + extern "system" fn(cuda_types::CUcontext, *mut c_void, *mut c_void), + >, + ) -> Result<(), CUresult> { + with_context_or_current(cu_ctx, |ctx| { + ctx.with_inner_mut(|ctx_mutable| { + ctx_mutable.local_storage.insert( + key, + LocalStorageValue { + value, + _dtor_callback: dtor_callback, + }, + ); + }) + })? + } + context_local_storage_insert_impl(cu_ctx, key, value, dtor_callback).into_cuda() } // TODO @@ -143,29 +150,30 @@ impl CudaDarkApi for CudaDarkApiZluda { } unsafe extern "system" fn context_local_storage_get( - result: *mut *mut c_void, + cu_result: *mut *mut c_void, cu_ctx: cuda_types::CUcontext, key: *mut c_void, ) -> CUresult { - let mut cu_result = None; - let query_cu_result = with_context_or_current(cu_ctx, |ctx| { - let ctx_mutable = ctx - .mutable - .lock() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - cu_result = ctx_mutable.local_storage.get(&key).map(|v| v.value); - Ok(()) - }); - if query_cu_result != CUresult::CUDA_SUCCESS { - query_cu_result - } else { - match cu_result { - Some(value) => { - *result = value; - CUresult::CUDA_SUCCESS - } - None => CUresult::CUDA_ERROR_INVALID_VALUE, + unsafe fn context_local_storage_get_impl( + cu_ctx: cuda_types::CUcontext, + key: *mut c_void, + ) -> Result<*mut c_void, CUresult> { + with_context_or_current(cu_ctx, |ctx| { + ctx.with_inner_mut(|ctx_mutable| { + ctx_mutable + .local_storage + .get(&key) + .map(|v| v.value) + .ok_or(CUresult::CUDA_ERROR_INVALID_VALUE) + })? + })? + } + match context_local_storage_get_impl(cu_ctx, key) { + Ok(result) => { + *cu_result = result; + CUresult::CUDA_SUCCESS } + Err(err) => err, } } @@ -386,14 +394,14 @@ impl CudaDarkApi for CudaDarkApiZluda { } } -unsafe fn with_context_or_current( +unsafe fn with_context_or_current( ctx: CUcontext, - f: impl FnOnce(&context::ContextData) -> Result<(), CUresult>, -) -> CUresult { + fn_: impl FnOnce(&context::ContextData) -> T, +) -> Result { if ctx == ptr::null_mut() { - context::with_current(|c| f(c)).into_cuda() + context::with_current(|c| fn_(c)) } else { let ctx = FromCuda::from_cuda(ctx); - LiveCheck::as_result(ctx).map(f).into_cuda() + Ok(fn_(LiveCheck::as_result(ctx)?)) } } diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs index 59201e2..c7e8190 100644 --- a/zluda/src/impl/device.rs +++ b/zluda/src/impl/device.rs @@ -1,6 +1,8 @@ +use super::context::{ContextInnerMutable, ContextVariant, PrimaryContextData}; use super::{ - context, LiveCheck, GLOBAL_STATE, + context, LiveCheck, GLOBAL_STATE }; +use crate::r#impl::context::ContextData; use crate::{r#impl::IntoCuda, hip_call_cuda}; use crate::hip_call; use cuda_types::{CUdevice_attribute, CUdevprop, CUuuid_st, CUresult}; @@ -10,11 +12,7 @@ use paste::paste; use std::{ mem, os::raw::{c_char, c_uint}, - ptr, - sync::{ - atomic::AtomicU32, - Mutex, - }, ops::AddAssign, ffi::CString, + ptr,ffi::CString, }; const ZLUDA_SUFFIX: &'static [u8] = b" [ZLUDA]\0"; @@ -28,9 +26,7 @@ pub const COMPUTE_CAPABILITY_MINOR: u32 = 8; pub(crate) struct Device { pub(crate) compilation_mode: CompilationMode, pub(crate) comgr_isa: CString, - // Primary context is lazy-initialized, the mutex is here to secure retain - // from multiple threads - primary_context: Mutex>, + primary_context: context::Context, } impl Device { @@ -48,7 +44,7 @@ impl Device { Ok(Self { compilation_mode, comgr_isa, - primary_context: Mutex::new(None), + primary_context: LiveCheck::new(ContextData::new_primary(index as i32)), }) } } @@ -516,38 +512,29 @@ unsafe fn primary_ctx_get_or_retain( if pctx == ptr::null_mut() { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } - let ctx = primary_ctx(hip_dev, |ctx| { - let ctx = match ctx { - Some(ref mut ctx) => ctx, - None => { - ctx.insert(LiveCheck::new(context::ContextData::new(0, hip_dev, true, 0)?)) - }, - }; - if increment_refcount { - ctx.as_mut_unchecked().ref_count.get_mut().add_assign(1); + let ctx = primary_ctx(hip_dev, |ctx, raw_ctx| { + if increment_refcount || ctx.ref_count == 0 { + ctx.ref_count += 1; } - Ok(ctx as *mut _) + Ok(raw_ctx.cast_mut()) })??; *pctx = ctx; Ok(()) } pub(crate) unsafe fn primary_ctx_release(hip_dev: hipDevice_t) -> Result<(), CUresult> { - primary_ctx(hip_dev, move |maybe_ctx| { - if let Some(ctx) = maybe_ctx { - let ctx_data = ctx.as_mut_unchecked(); - let ref_count = ctx_data.ref_count.get_mut(); - *ref_count -= 1; - if *ref_count == 0 { - //TODO: fix - //ctx.try_drop(false) - Ok(()) - } else { - Ok(()) - } - } else { - Err(CUresult::CUDA_ERROR_INVALID_CONTEXT) + primary_ctx(hip_dev, |ctx, _| { + if ctx.ref_count == 0 { + return Err(CUresult::CUDA_ERROR_INVALID_CONTEXT); } + ctx.ref_count -= 1; + if ctx.ref_count == 0 { + // Even if we encounter errors we can't really surface them + ctx.mutable.drop_with_result().ok(); + ctx.mutable = ContextInnerMutable::new(); + ctx.flags = 0; + } + Ok(()) })? } @@ -566,53 +553,43 @@ pub(crate) unsafe fn primary_ctx_set_flags( hip_dev: hipDevice_t, flags: ::std::os::raw::c_uint, ) -> Result<(), CUresult> { - primary_ctx(hip_dev, move |maybe_ctx| { - if let Some(ctx) = maybe_ctx { - let ctx = ctx.as_mut_unchecked(); - ctx.flags = AtomicU32::new(flags); - Ok(()) - } else { - Err(CUresult::CUDA_ERROR_INVALID_CONTEXT) - } + primary_ctx(hip_dev, |ctx, _| { + ctx.flags = flags; + // TODO: actually use flags + Ok(()) })? } pub(crate) unsafe fn primary_ctx_get_state( hip_dev: hipDevice_t, - flags_ptr: *mut ::std::os::raw::c_uint, - active_ptr: *mut ::std::os::raw::c_int, + flags_ptr: *mut u32, + active_ptr: *mut i32, ) -> Result<(), CUresult> { if flags_ptr == ptr::null_mut() || active_ptr == ptr::null_mut() { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } - let maybe_flags = primary_ctx(hip_dev, move |maybe_ctx| { - if let Some(ctx) = maybe_ctx { - let ctx = ctx.as_mut_unchecked(); - Some(*ctx.flags.get_mut()) - } else { - None - } + let (flags, active) = primary_ctx(hip_dev, |ctx, _| { + (ctx.flags, (ctx.ref_count > 0) as i32) })?; - if let Some(flags) = maybe_flags { - *flags_ptr = flags; - *active_ptr = 1; - } else { - *flags_ptr = 0; - *active_ptr = 0; - } + *flags_ptr = flags; + *active_ptr = active; Ok(()) } pub(crate) unsafe fn primary_ctx( dev: hipDevice_t, - f: impl FnOnce(&mut Option) -> T, + fn_: impl FnOnce(&mut PrimaryContextData, *const LiveCheck) -> T, ) -> Result { let device = GLOBAL_STATE.get()?.device(dev)?; - let mut maybe_primary_context = device - .primary_context - .lock() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - Ok(f(&mut maybe_primary_context)) + let raw_ptr = &device.primary_context as *const _; + let context = device.primary_context.as_ref_unchecked(); + match context.variant { + ContextVariant::Primary(ref mutex_over_primary_ctx) => { + let mut primary_ctx = mutex_over_primary_ctx.lock().map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + Ok(fn_(&mut primary_ctx, raw_ptr)) + }, + ContextVariant::NonPrimary(..) => Err(CUresult::CUDA_ERROR_UNKNOWN) + } } pub(crate) unsafe fn get_name(name: *mut i8, len: i32, device: i32) -> hipError_t { diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index 88a95c4..34566af 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -148,6 +148,10 @@ impl LiveCheck { outer_ptr as *mut Self } + pub unsafe fn as_ref_unchecked(&self) -> & T { + &self.data + } + pub unsafe fn as_mut_unchecked(&mut self) -> &mut T { &mut self.data } diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index 6a6911a..8a49d43 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -31,13 +31,11 @@ impl ZludaObject for ModuleData { let deregistration_err = if !by_owner { if let Some(ctx) = self.owner { let ctx = unsafe { LiveCheck::as_result(ctx.as_ptr())? }; - let mut ctx_mutable = ctx - .mutable - .lock() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - ctx_mutable - .modules - .remove(&unsafe { LiveCheck::from_raw(self) }); + ctx.with_inner_mut(|ctx_mutable| { + ctx_mutable + .modules + .remove(&unsafe { LiveCheck::from_raw(self) }); + })?; } Ok(()) } else { @@ -104,11 +102,9 @@ pub(crate) unsafe fn load_impl( isa, input, )?); - let mut ctx_mutable = ctx - .mutable - .lock() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - ctx_mutable.modules.insert(module); + ctx.with_inner_mut(|ctx_mutable| { + ctx_mutable.modules.insert(module); + })?; *output = module; Ok(()) })? diff --git a/zluda/src/impl/stream.rs b/zluda/src/impl/stream.rs index fb53510..71ed20b 100644 --- a/zluda/src/impl/stream.rs +++ b/zluda/src/impl/stream.rs @@ -21,13 +21,11 @@ impl ZludaObject for StreamData { if !by_owner { let ctx = unsafe { LiveCheck::as_result(self.ctx)? }; { - let mut ctx_mutable = ctx - .mutable - .lock() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - ctx_mutable - .streams - .remove(&unsafe { LiveCheck::from_raw(&mut *self) }); + ctx.with_inner_mut(|ctx_mutable| { + ctx_mutable + .streams + .remove(&unsafe { LiveCheck::from_raw(&mut *self) }); + })?; } } hip_call_cuda!(hipStreamDestroy(self.base)); @@ -59,11 +57,9 @@ pub(crate) unsafe fn create_with_priority( ctx: ptr::null_mut(), }))); let ctx = context::with_current(|ctx| { - let mut ctx_mutable = ctx - .mutable - .lock() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - ctx_mutable.streams.insert(stream); + ctx.with_inner_mut(|ctx_mutable| { + ctx_mutable.streams.insert(stream); + })?; Ok(LiveCheck::from_raw(ctx as *const _ as _)) })??; (*stream).as_mut_unchecked().ctx = ctx; diff --git a/zluda/tests/primary_context.rs b/zluda/tests/primary_context.rs new file mode 100644 index 0000000..f72c7b1 --- /dev/null +++ b/zluda/tests/primary_context.rs @@ -0,0 +1,84 @@ +use crate::common::CudaDriverFns; +use cuda_types::*; +use std::{mem, ptr}; +mod common; + +cuda_driver_test!(primary_context); + +unsafe fn primary_context(cuda: T) { + assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); + let mut flags = 0; + let mut active = 0; + assert_eq!( + cuda.cuDevicePrimaryCtxGetState(CUdevice_v1(0), &mut flags, &mut active), + CUresult::CUDA_SUCCESS + ); + assert_eq!((0, 0), (flags, active)); + assert_eq!( + cuda.cuDevicePrimaryCtxSetFlags_v2(CUdevice_v1(0), 1), + CUresult::CUDA_SUCCESS + ); + assert_eq!( + cuda.cuDevicePrimaryCtxGetState(CUdevice_v1(0), &mut flags, &mut active), + CUresult::CUDA_SUCCESS + ); + assert_eq!((1, 0), (flags, active)); + let mut primary_ctx = ptr::null_mut(); + assert_eq!( + cuda.cuDevicePrimaryCtxRetain(&mut primary_ctx, CUdevice_v1(0)), + CUresult::CUDA_SUCCESS + ); + assert_eq!( + cuda.cuCtxPushCurrent_v2(primary_ctx), + CUresult::CUDA_SUCCESS + ); + assert_eq!(cuda.cuCtxSetFlags(2), CUresult::CUDA_SUCCESS); + assert_eq!( + cuda.cuCtxSetCurrent(ptr::null_mut()), + CUresult::CUDA_SUCCESS + ); + assert_eq!( + cuda.cuDevicePrimaryCtxGetState(CUdevice_v1(0), &mut flags, &mut active), + CUresult::CUDA_SUCCESS + ); + assert_eq!((1, 1), (flags, active)); + assert_ne!(primary_ctx, ptr::null_mut()); + let mut active_ctx = ptr::null_mut(); + assert_eq!( + cuda.cuCtxGetCurrent(&mut active_ctx), + CUresult::CUDA_SUCCESS + ); + assert_eq!(active_ctx, ptr::null_mut()); + assert_ne!(primary_ctx, active_ctx); + assert_eq!( + cuda.cuDevicePrimaryCtxGetState(CUdevice_v1(0), &mut flags, &mut active), + CUresult::CUDA_SUCCESS + ); + assert_eq!((1, 1), (flags, active)); + let mut buffer = mem::zeroed(); + assert_eq!( + cuda.cuCtxPushCurrent_v2(primary_ctx), + CUresult::CUDA_SUCCESS + ); + assert_eq!(cuda.cuMemAlloc_v2(&mut buffer, 4), CUresult::CUDA_SUCCESS); + assert_eq!( + cuda.cuDevicePrimaryCtxRelease_v2(CUdevice_v1(0)), + CUresult::CUDA_SUCCESS + ); + assert_eq!( + cuda.cuDevicePrimaryCtxGetState(CUdevice_v1(0), &mut flags, &mut active), + CUresult::CUDA_SUCCESS + ); + assert_ne!( + cuda.cuDevicePrimaryCtxRelease_v2(CUdevice_v1(0)), + CUresult::CUDA_SUCCESS + ); + assert_eq!((0, 0), (flags, active)); + // Already freed on context destruction + // TODO: reenable when we start tracking allocations inside context + //assert_ne!(cuda.cuMemFree_v2(buffer), CUresult::CUDA_SUCCESS); + assert_eq!( + cuda.cuDevicePrimaryCtxReset_v2(CUdevice_v1(0)), + CUresult::CUDA_SUCCESS + ); +} From b695f44c188efc8df8e2e2c149904bb82d2dc58b Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 29 Mar 2024 02:03:23 +0100 Subject: [PATCH 05/14] Support old PTX compression scheme (#188) --- zluda_dark_api/Cargo.toml | 1 + zluda_dark_api/src/lib.rs | 65 +++++++++++++++++++++++---------------- zluda_dump/src/log.rs | 8 ++--- 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/zluda_dark_api/Cargo.toml b/zluda_dark_api/Cargo.toml index 0aef25e..8266e36 100644 --- a/zluda_dark_api/Cargo.toml +++ b/zluda_dark_api/Cargo.toml @@ -14,6 +14,7 @@ either = "1.9" bit-vec = "0.6.3" paste = "1.0" lz4-sys = "1.9" +cloudflare-zlib = "0.2.10" thread-id = "4.1.0" # we don't need elf32, but goblin has a bug where elf64 does not build without elf32 goblin = { version = "0.5.1", default-features = false, features = ["elf64", "elf32"] } diff --git a/zluda_dark_api/src/lib.rs b/zluda_dark_api/src/lib.rs index 6849e0e..15c6091 100644 --- a/zluda_dark_api/src/lib.rs +++ b/zluda_dark_api/src/lib.rs @@ -687,13 +687,19 @@ pub enum FatbinModule { pub struct FatbinFile { data: *const u8, pub kind: FatbinFileKind, - pub compressed: bool, + pub compression: FatbinCompression, pub sm_version: u32, padded_payload_size: usize, payload_size: usize, uncompressed_payload: usize, } +pub enum FatbinCompression { + None, + Zlib, + Lz4, +} + impl FatbinFile { unsafe fn try_new(fatbin_file: &FatbinFileHeader) -> Result { let fatbin_file_version = fatbin_file.version; @@ -719,22 +725,19 @@ impl FatbinFile { }); } }; - if fatbin_file + let compression = if fatbin_file .flags .contains(FatbinFileHeaderFlags::CompressedOld) { - return Err(UnexpectedFieldError { - name: "FATBIN_FILE_HEADER_FLAGS", - expected: vec![ - AnyUInt::U64(FatbinFileHeaderFlags::empty().bits()), - AnyUInt::U64(FatbinFileHeaderFlags::CompressedNew.bits()), - ], - observed: AnyUInt::U64(fatbin_file.flags.bits()), - }); - } - let compressed = fatbin_file + FatbinCompression::Zlib + } else if fatbin_file .flags - .contains(FatbinFileHeaderFlags::CompressedNew); + .contains(FatbinFileHeaderFlags::CompressedNew) + { + FatbinCompression::Lz4 + } else { + FatbinCompression::None + }; let data = (fatbin_file as *const _ as *const u8).add(fatbin_file.header_size as usize); let padded_payload_size = fatbin_file.padded_payload_size as usize; let payload_size = fatbin_file.payload_size as usize; @@ -743,7 +746,7 @@ impl FatbinFile { Ok(Self { data, kind, - compressed, + compression, padded_payload_size, payload_size, uncompressed_payload, @@ -753,28 +756,36 @@ impl FatbinFile { // Returning static lifetime here because all known uses of this are related to fatbin files that // are constants inside files - pub unsafe fn get_or_decompress(&self) -> Result, Lz4DecompressionFailure> { - if self.compressed { - match self.decompress_kernel_module() { - Some(mut decompressed) => { - if self.kind == FatbinFileKind::Ptx { - decompressed.pop(); // remove trailing zero + pub unsafe fn get_or_decompress(&self) -> Result, DecompressionFailure> { + match self.compression { + FatbinCompression::Lz4 => { + match self.decompress_kernel_module_lz4() { + Some(mut decompressed) => { + if self.kind == FatbinFileKind::Ptx { + decompressed.pop(); // remove trailing zero + } + Ok(Cow::Owned(decompressed)) } - Ok(Cow::Owned(decompressed)) + None => Err(DecompressionFailure), } - None => Err(Lz4DecompressionFailure), } - } else { - Ok(Cow::Borrowed(slice::from_raw_parts( + FatbinCompression::Zlib => { + let compressed = + std::slice::from_raw_parts(self.data.cast(), self.padded_payload_size); + Ok(Cow::Owned( + cloudflare_zlib::inflate(compressed).map_err(|_| DecompressionFailure)?, + )) + } + FatbinCompression::None => Ok(Cow::Borrowed(slice::from_raw_parts( self.data, self.padded_payload_size as usize, - ))) + ))), } } const MAX_MODULE_DECOMPRESSION_BOUND: usize = 64 * 1024 * 1024; - unsafe fn decompress_kernel_module(&self) -> Option> { + unsafe fn decompress_kernel_module_lz4(&self) -> Option> { let decompressed_size = usize::max(1024, self.uncompressed_payload as usize); let mut decompressed_vec = vec![0u8; decompressed_size]; loop { @@ -801,7 +812,7 @@ impl FatbinFile { } #[derive(Debug)] -pub struct Lz4DecompressionFailure; +pub struct DecompressionFailure; pub fn anti_zluda_hash AntiZludaHashInputDevice>( return_known_value: bool, diff --git a/zluda_dump/src/log.rs b/zluda_dump/src/log.rs index 2cfbda6..7777a61 100644 --- a/zluda_dump/src/log.rs +++ b/zluda_dump/src/log.rs @@ -19,7 +19,7 @@ use std::path::PathBuf; use std::str::Utf8Error; use zluda_dark_api::AnyUInt; use zluda_dark_api::FatbinFileKind; -use zluda_dark_api::Lz4DecompressionFailure; +use zluda_dark_api::DecompressionFailure; use zluda_dark_api::UnexpectedFieldError; const LOG_PREFIX: &[u8] = b"[ZLUDA_DUMP] "; @@ -447,7 +447,7 @@ impl Display for LogEntry { file_name ) } - LogEntry::Lz4DecompressionFailure => write!(f, "LZ4 decompression failure"), + LogEntry::Lz4DecompressionFailure => write!(f, "Decompression failure"), LogEntry::UnknownExportTableFn => write!(f, "Unknown export table function"), LogEntry::UnexpectedBinaryField { field_name, @@ -591,8 +591,8 @@ impl From for LogEntry { } } -impl From for LogEntry { - fn from(_err: Lz4DecompressionFailure) -> Self { +impl From for LogEntry { + fn from(_err: DecompressionFailure) -> Self { LogEntry::Lz4DecompressionFailure } } From 76bae5f91bf81409b8f592e52a2658d787515fa8 Mon Sep 17 00:00:00 2001 From: NyanCatTW1 <17372086+NyanCatTW1@users.noreply.github.com> Date: Sat, 6 Apr 2024 01:12:59 +0800 Subject: [PATCH 06/14] Implement mad.hi.cc (#196) --- ptx/src/ast.rs | 1 + ptx/src/emit.rs | 38 +++--------- ptx/src/ptx.lalrpop | 7 ++- ptx/src/test/spirv_run/mad_hi_cc.ll | 90 ++++++++++++++++++++++++++++ ptx/src/test/spirv_run/mad_hi_cc.ptx | 41 +++++++++++++ ptx/src/test/spirv_run/mod.rs | 1 + ptx/src/translate.rs | 8 ++- 7 files changed, 153 insertions(+), 33 deletions(-) create mode 100644 ptx/src/test/spirv_run/mad_hi_cc.ll create mode 100644 ptx/src/test/spirv_run/mad_hi_cc.ptx diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs index 0281961..93793e6 100644 --- a/ptx/src/ast.rs +++ b/ptx/src/ast.rs @@ -380,6 +380,7 @@ pub enum Instruction { }, MadCC { type_: ScalarType, + is_hi: bool, arg: Arg4

, }, Fma(ArithFloat, Arg4

), diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs index 94cc973..d4d6df6 100644 --- a/ptx/src/emit.rs +++ b/ptx/src/emit.rs @@ -621,8 +621,8 @@ fn emit_statement( crate::translate::Statement::MadC(MadCDetails { type_, is_hi, arg }) => { emit_inst_madc(ctx, type_, is_hi, &arg)? } - crate::translate::Statement::MadCC(MadCCDetails { type_, arg }) => { - emit_inst_madcc(ctx, type_, &arg)? + crate::translate::Statement::MadCC(MadCCDetails { type_, is_hi, arg }) => { + emit_inst_madcc(ctx, type_, is_hi, &arg)? } crate::translate::Statement::AddC(type_, arg) => emit_inst_add_c(ctx, type_, &arg)?, crate::translate::Statement::AddCC(type_, arg) => { @@ -2079,16 +2079,17 @@ fn emit_inst_mad_lo( ) } -// TODO: support mad.hi.cc fn emit_inst_madcc( ctx: &mut EmitContext, type_: ast::ScalarType, + is_hi: bool, arg: &Arg4CarryOut, ) -> Result<(), TranslateError> { - let builder = ctx.builder.get(); - let src1 = ctx.names.value(arg.src1)?; - let src2 = ctx.names.value(arg.src2)?; - let mul_result = unsafe { LLVMBuildMul(builder, src1, src2, LLVM_UNNAMED) }; + let mul_result = if is_hi { + emit_inst_mul_hi_impl(ctx, type_, None, arg.src1, arg.src2)? + } else { + emit_inst_mul_low_impl(ctx, None, arg.src1, arg.src2, LLVMBuildMul)? + }; emit_inst_addsub_cc_impl( ctx, "add", @@ -2176,29 +2177,6 @@ fn emit_inst_madc( mul_result, args.src3, ) - /* - let src3 = ctx.names.value(args.src3)?; - let add_no_carry = unsafe { LLVMBuildAdd(builder, mul_result, src3, LLVM_UNNAMED) }; - let carry_flag = ctx.names.value(args.carry_in)?; - let llvm_type = get_llvm_type(ctx, &ast::Type::Scalar(type_))?; - let carry_flag = unsafe { LLVMBuildZExt(builder, carry_flag, llvm_type, LLVM_UNNAMED) }; - if let Some(carry_out) = args.carry_out { - emit_inst_addsub_cc_impl( - ctx, - "add", - type_, - args.dst, - carry_out, - add_no_carry, - carry_flag, - )?; - } else { - ctx.names.register_result(args.dst, |dst| unsafe { - LLVMBuildAdd(builder, add_no_carry, carry_flag, dst) - }); - } - Ok(()) - */ } fn emit_inst_add_c( diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index ae57575..d5c9b61 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -1516,7 +1516,12 @@ InstMad: ast::Instruction> = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#extended-precision-arithmetic-instructions-mad-cc InstMadCC: ast::Instruction> = { - "mad" ".lo" ".cc" => ast::Instruction::MadCC{<>}, + "mad" ".lo" ".cc" => { + ast::Instruction::MadCC { type_, arg, is_hi: false } + }, + "mad" ".hi" ".cc" => { + ast::Instruction::MadCC { type_, arg, is_hi: true } + }, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#extended-precision-arithmetic-instructions-madc diff --git a/ptx/src/test/spirv_run/mad_hi_cc.ll b/ptx/src/test/spirv_run/mad_hi_cc.ll new file mode 100644 index 0000000..a5b1595 --- /dev/null +++ b/ptx/src/test/spirv_run/mad_hi_cc.ll @@ -0,0 +1,90 @@ +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" +target triple = "amdgcn-amd-amdhsa" + +define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"61", ptr addrspace(4) byref(i64) %"62") #0 { +"78": + %"14" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"14", align 1 + %"15" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"15", align 1 + %"4" = alloca i64, align 8, addrspace(5) + %"5" = alloca i64, align 8, addrspace(5) + %"6" = alloca i32, align 4, addrspace(5) + %"7" = alloca i32, align 4, addrspace(5) + %"8" = alloca i32, align 4, addrspace(5) + %"9" = alloca i32, align 4, addrspace(5) + %"10" = alloca i32, align 4, addrspace(5) + %"11" = alloca i32, align 4, addrspace(5) + %"12" = alloca i32, align 4, addrspace(5) + %"13" = alloca i32, align 4, addrspace(5) + %"16" = load i64, ptr addrspace(4) %"61", align 8 + store i64 %"16", ptr addrspace(5) %"4", align 8 + %"17" = load i64, ptr addrspace(4) %"62", align 8 + store i64 %"17", ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"64" = inttoptr i64 %"19" to ptr + %"63" = load i32, ptr %"64", align 4 + store i32 %"63", ptr addrspace(5) %"8", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"65" = inttoptr i64 %"21" to ptr + %"80" = getelementptr inbounds i8, ptr %"65", i64 4 + %"66" = load i32, ptr %"80", align 4 + store i32 %"66", ptr addrspace(5) %"9", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"67" = inttoptr i64 %"23" to ptr + %"82" = getelementptr inbounds i8, ptr %"67", i64 8 + %"22" = load i32, ptr %"82", align 4 + store i32 %"22", ptr addrspace(5) %"10", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %"27" = load i32, ptr addrspace(5) %"9", align 4 + %"28" = load i32, ptr addrspace(5) %"10", align 4 + %0 = sext i32 %"26" to i64 + %1 = sext i32 %"27" to i64 + %2 = mul nsw i64 %0, %1 + %3 = lshr i64 %2, 32 + %4 = trunc i64 %3 to i32 + %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"28") + %"24" = extractvalue { i32, i1 } %5, 0 + %"25" = extractvalue { i32, i1 } %5, 1 + store i32 %"24", ptr addrspace(5) %"7", align 4 + store i1 %"25", ptr addrspace(5) %"14", align 1 + %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2) + %"29" = extractvalue { i32, i1 } %6, 0 + %"30" = extractvalue { i32, i1 } %6, 1 + store i32 %"29", ptr addrspace(5) %"6", align 4 + store i1 %"30", ptr addrspace(5) %"14", align 1 + %"32" = load i1, ptr addrspace(5) %"14", align 1 + %7 = zext i1 %"32" to i32 + %"71" = add i32 0, %7 + store i32 %"71", ptr addrspace(5) %"12", align 4 + %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) + %"33" = extractvalue { i32, i1 } %8, 0 + %"34" = extractvalue { i32, i1 } %8, 1 + store i32 %"33", ptr addrspace(5) %"6", align 4 + store i1 %"34", ptr addrspace(5) %"14", align 1 + %"36" = load i1, ptr addrspace(5) %"14", align 1 + %9 = zext i1 %"36" to i32 + %"72" = add i32 0, %9 + store i32 %"72", ptr addrspace(5) %"13", align 4 + %"37" = load i64, ptr addrspace(5) %"5", align 8 + %"38" = load i32, ptr addrspace(5) %"7", align 4 + %"73" = inttoptr i64 %"37" to ptr + store i32 %"38", ptr %"73", align 4 + %"39" = load i64, ptr addrspace(5) %"5", align 8 + %"40" = load i32, ptr addrspace(5) %"12", align 4 + %"74" = inttoptr i64 %"39" to ptr + %"84" = getelementptr inbounds i8, ptr %"74", i64 4 + store i32 %"40", ptr %"84", align 4 + %"41" = load i64, ptr addrspace(5) %"5", align 8 + %"42" = load i32, ptr addrspace(5) %"13", align 4 + %"76" = inttoptr i64 %"41" to ptr + %"86" = getelementptr inbounds i8, ptr %"76", i64 8 + store i32 %"42", ptr %"86", align 4 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/mad_hi_cc.ptx b/ptx/src/test/spirv_run/mad_hi_cc.ptx new file mode 100644 index 0000000..4a8cac3 --- /dev/null +++ b/ptx/src/test/spirv_run/mad_hi_cc.ptx @@ -0,0 +1,41 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry mad_hi_cc( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .u32 unused; + + .reg .s32 dst1; + .reg .b32 src1; + .reg .b32 src2; + .reg .b32 src3; + + .reg .b32 result_1; + .reg .b32 carry_out_1; + .reg .b32 carry_out_2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + // test valid computational results + ld.s32 src1, [in_addr]; + ld.s32 src2, [in_addr+4]; + ld.b32 src3, [in_addr+8]; + mad.hi.cc.s32 dst1, src1, src2, src3; + + mad.hi.cc.u32 unused, 65536, 65536, 4294967294; // non-overflowing + addc.u32 carry_out_1, 0, 0; // carry_out_1 should be 0 + mad.hi.cc.u32 unused, 65536, 65536, 4294967295; // overflowing + addc.u32 carry_out_2, 0, 0; // carry_out_2 should be 1 + + st.s32 [out_addr], dst1; + st.s32 [out_addr+4], carry_out_1; + st.s32 [out_addr+8], carry_out_2; + ret; +} diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index a65240c..8f229c9 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -290,6 +290,7 @@ test_ptx!( [2147487519u32, 4294934539] ); test_ptx!(madc_cc2, [0xDEADu32], [0u32, 1, 1, 2]); +test_ptx!(mad_hi_cc, [0x26223377u32, 0x70777766u32, 0x60666633u32], [0x71272866u32, 0u32, 1u32]); // Multi-tap :) test_ptx!(mov_vector_cast, [0x200000001u64], [2u32, 1u32]); test_ptx!( cvt_clamp, diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 041c690..1a203bd 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -1999,9 +1999,10 @@ fn insert_hardware_registers_impl<'input>( is_hi, arg: Arg4CarryIn::new(arg, carry_out, TypedOperand::Reg(overflow_flag)), })), - Statement::Instruction(ast::Instruction::MadCC { type_, arg }) => { + Statement::Instruction(ast::Instruction::MadCC { type_, is_hi, arg }) => { result.push(Statement::MadCC(MadCCDetails { type_, + is_hi, arg: Arg4CarryOut::new(arg, TypedOperand::Reg(overflow_flag)), })) } @@ -5568,6 +5569,7 @@ impl, U: ArgParamsEx> Visitable for MadCD pub(crate) struct MadCCDetails { pub(crate) type_: ast::ScalarType, + pub(crate) is_hi: bool, pub(crate) arg: Arg4CarryOut

, } @@ -5578,6 +5580,7 @@ impl, U: ArgParamsEx> Visitable for MadCC ) -> Result, U>, TranslateError> { Ok(Statement::MadCC(MadCCDetails { type_: self.type_, + is_hi: self.is_hi, arg: self.arg.map(visitor, self.type_)?, })) } @@ -6486,8 +6489,9 @@ impl ast::Instruction { carry_out, arg: arg.map(visitor, &ast::Type::Scalar(type_), false)?, }, - ast::Instruction::MadCC { type_, arg } => ast::Instruction::MadCC { + ast::Instruction::MadCC { type_, arg, is_hi } => ast::Instruction::MadCC { type_, + is_hi, arg: arg.map(visitor, &ast::Type::Scalar(type_), false)?, }, ast::Instruction::Tex(details, arg) => { From 0d9ace247567a07554294dc4653624943334a410 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 5 Apr 2024 23:26:08 +0200 Subject: [PATCH 07/14] Fix buggy carry flags when mixing subc/sub.cc with addc/add.cc (#197) --- ptx/src/test/spirv_run/abs.ll | 60 ++-- ptx/src/test/spirv_run/activemask.ll | 22 +- ptx/src/test/spirv_run/add.ll | 34 ++- ptx/src/test/spirv_run/add_global.ll | 40 ++- ptx/src/test/spirv_run/add_non_coherent.ll | 34 ++- ptx/src/test/spirv_run/add_param_ptr.ll | 64 +++-- ptx/src/test/spirv_run/add_tuning.ll | 34 ++- ptx/src/test/spirv_run/addc_cc.ll | 124 +++++---- ptx/src/test/spirv_run/addc_cc2.ll | 76 +++--- ptx/src/test/spirv_run/alloca_call.ll | 72 +++-- ptx/src/test/spirv_run/amdgpu_unnamed.ll | 94 ++++--- ptx/src/test/spirv_run/and.ll | 46 ++-- ptx/src/test/spirv_run/assertfail.ll | 84 +++--- ptx/src/test/spirv_run/atom_add.ll | 62 ++--- ptx/src/test/spirv_run/atom_add_f16.ll | 64 +++-- ptx/src/test/spirv_run/atom_add_float.ll | 62 ++--- ptx/src/test/spirv_run/atom_cas.ll | 60 ++-- ptx/src/test/spirv_run/atom_inc.ll | 62 ++--- ptx/src/test/spirv_run/atom_ld_st.ll | 26 +- ptx/src/test/spirv_run/atom_ld_st_vec.ll | 40 ++- ptx/src/test/spirv_run/atom_max_u32.ll | 46 ++-- ptx/src/test/spirv_run/b64tof64.ll | 38 ++- ptx/src/test/spirv_run/barrier.ll | 4 +- ptx/src/test/spirv_run/bfe.ll | 58 ++-- ptx/src/test/spirv_run/bfi.ll | 70 +++-- ptx/src/test/spirv_run/bfind.ll | 98 ++++--- ptx/src/test/spirv_run/bfind_shiftamt.ll | 98 ++++--- ptx/src/test/spirv_run/block.ll | 40 ++- ptx/src/test/spirv_run/bra.ll | 42 ++- ptx/src/test/spirv_run/brev.ll | 34 ++- ptx/src/test/spirv_run/call.ll | 94 +++---- ptx/src/test/spirv_run/call_bug.ll | 94 +++---- ptx/src/test/spirv_run/call_multi_return.ll | 110 ++++---- ptx/src/test/spirv_run/callprototype.ll | 98 ++++--- ptx/src/test/spirv_run/carry_mixed.ll | 51 ---- ptx/src/test/spirv_run/carry_mixed.ptx | 32 --- ptx/src/test/spirv_run/carry_set_all.ll | 257 ++++++++++++++++++ ptx/src/test/spirv_run/carry_set_all.ptx | 84 ++++++ ptx/src/test/spirv_run/clz.ll | 32 +-- ptx/src/test/spirv_run/const.ll | 66 +++-- ptx/src/test/spirv_run/constant_f32.ll | 34 ++- ptx/src/test/spirv_run/constant_negative.ll | 34 ++- ptx/src/test/spirv_run/cos.ll | 34 ++- ptx/src/test/spirv_run/cvt_clamp.ll | 112 ++++---- ptx/src/test/spirv_run/cvt_f32_f16.ll | 36 ++- ptx/src/test/spirv_run/cvt_f32_s32.ll | 128 +++++---- ptx/src/test/spirv_run/cvt_f64_f32.ll | 34 ++- ptx/src/test/spirv_run/cvt_rni.ll | 60 ++-- ptx/src/test/spirv_run/cvt_rzi.ll | 60 ++-- ptx/src/test/spirv_run/cvt_s16_s8.ll | 38 ++- ptx/src/test/spirv_run/cvt_s32_f32.ll | 68 +++-- ptx/src/test/spirv_run/cvt_s64_s32.ll | 34 ++- ptx/src/test/spirv_run/cvt_sat_s_u.ll | 62 ++--- ptx/src/test/spirv_run/cvt_u32_s16.ll | 34 ++- ptx/src/test/spirv_run/cvta.ll | 42 ++- ptx/src/test/spirv_run/div_approx.ll | 46 ++-- ptx/src/test/spirv_run/dp4a.ll | 58 ++-- ptx/src/test/spirv_run/ex2.ll | 112 ++++---- ptx/src/test/spirv_run/extern_shared.ll | 36 ++- ptx/src/test/spirv_run/extern_shared_call.ll | 62 ++--- ptx/src/test/spirv_run/fma.ll | 58 ++-- ptx/src/test/spirv_run/func_ptr.ll | 70 +++-- ptx/src/test/spirv_run/generic.ll | 102 ++++--- ptx/src/test/spirv_run/global_array.ll | 28 +- ptx/src/test/spirv_run/lanemask_lt.ll | 50 ++-- ptx/src/test/spirv_run/ld_st.ll | 26 +- ptx/src/test/spirv_run/ld_st_implicit.ll | 36 ++- ptx/src/test/spirv_run/ld_st_offset.ll | 46 ++-- ptx/src/test/spirv_run/lg2.ll | 34 ++- ptx/src/test/spirv_run/local_align.ll | 26 +- ptx/src/test/spirv_run/mad_hi_cc.ll | 120 ++++---- ptx/src/test/spirv_run/mad_s32.ll | 118 ++++---- ptx/src/test/spirv_run/madc_cc.ll | 92 +++---- ptx/src/test/spirv_run/madc_cc2.ll | 73 ----- ptx/src/test/spirv_run/madc_cc2.ptx | 38 --- ptx/src/test/spirv_run/max.ll | 46 ++-- ptx/src/test/spirv_run/membar.ll | 28 +- ptx/src/test/spirv_run/min.ll | 46 ++-- ptx/src/test/spirv_run/mod.rs | 27 +- ptx/src/test/spirv_run/mov.ll | 36 ++- ptx/src/test/spirv_run/mov_address.ll | 14 +- ptx/src/test/spirv_run/mov_vector_cast.ll | 78 +++--- ptx/src/test/spirv_run/mul_ftz.ll | 46 ++-- ptx/src/test/spirv_run/mul_hi.ll | 34 ++- ptx/src/test/spirv_run/mul_lo.ll | 34 ++- ptx/src/test/spirv_run/mul_non_ftz.ll | 46 ++-- ptx/src/test/spirv_run/mul_wide.ll | 50 ++-- ptx/src/test/spirv_run/multireg.ll | 34 ++- ptx/src/test/spirv_run/neg.ll | 34 ++- .../test/spirv_run/non_scalar_ptr_offset.ll | 44 ++- ptx/src/test/spirv_run/not.ll | 34 ++- ptx/src/test/spirv_run/ntid.ll | 44 ++- ptx/src/test/spirv_run/or.ll | 46 ++-- ptx/src/test/spirv_run/param_ptr.ll | 48 ++-- ptx/src/test/spirv_run/popc.ll | 34 ++- ptx/src/test/spirv_run/pred_not.ll | 70 +++-- ptx/src/test/spirv_run/prmt.ll | 72 +++-- ptx/src/test/spirv_run/prmt_non_immediate.ll | 54 ++-- ptx/src/test/spirv_run/rcp.ll | 34 ++- ptx/src/test/spirv_run/reg_local.ll | 46 ++-- ptx/src/test/spirv_run/rem.ll | 46 ++-- ptx/src/test/spirv_run/rsqrt.ll | 36 ++- ptx/src/test/spirv_run/s64_min.ll | 22 +- ptx/src/test/spirv_run/selp.ll | 46 ++-- ptx/src/test/spirv_run/selp_true.ll | 46 ++-- ptx/src/test/spirv_run/set_f16x2.ll | 94 ++++--- ptx/src/test/spirv_run/setp.ll | 66 +++-- ptx/src/test/spirv_run/setp_bool.ll | 96 ++++--- ptx/src/test/spirv_run/setp_gt.ll | 74 +++-- ptx/src/test/spirv_run/setp_leu.ll | 74 +++-- ptx/src/test/spirv_run/setp_nan.ll | 228 ++++++++-------- ptx/src/test/spirv_run/setp_num.ll | 228 ++++++++-------- ptx/src/test/spirv_run/setp_pred2.ll | 78 +++--- ptx/src/test/spirv_run/shared_ptr_32.ll | 50 ++-- .../test/spirv_run/shared_ptr_take_address.ll | 48 ++-- ptx/src/test/spirv_run/shared_unify_decl.ll | 104 ++++--- ptx/src/test/spirv_run/shared_unify_extern.ll | 104 ++++--- ptx/src/test/spirv_run/shared_unify_local.ll | 110 ++++---- ptx/src/test/spirv_run/shared_variable.ll | 36 ++- ptx/src/test/spirv_run/shf.ll | 46 ++-- ptx/src/test/spirv_run/shl.ll | 36 ++- ptx/src/test/spirv_run/shl_link_hack.ll | 44 ++- ptx/src/test/spirv_run/shl_overflow.ll | 114 ++++---- ptx/src/test/spirv_run/shr_s32.ll | 50 ++-- ptx/src/test/spirv_run/shr_u32.ll | 82 +++--- ptx/src/test/spirv_run/sign_extend.ll | 28 +- ptx/src/test/spirv_run/sin.ll | 34 ++- ptx/src/test/spirv_run/sqrt.ll | 34 ++- ptx/src/test/spirv_run/sub.ll | 34 ++- ptx/src/test/spirv_run/subc_cc.ll | 130 ++++----- ptx/src/test/spirv_run/subc_cc2.ll | 127 --------- ptx/src/test/spirv_run/subc_cc2.ptx | 55 ---- ptx/src/test/spirv_run/vector.ll | 110 ++++---- ptx/src/test/spirv_run/vector4.ll | 36 ++- ptx/src/test/spirv_run/vector_extract.ll | 132 +++++---- ptx/src/test/spirv_run/vote_ballot.ll | 66 +++-- ptx/src/test/spirv_run/vshr.ll | 62 ++--- ptx/src/test/spirv_run/xor.ll | 46 ++-- ptx/src/translate.rs | 94 +++++-- 139 files changed, 4208 insertions(+), 4464 deletions(-) delete mode 100644 ptx/src/test/spirv_run/carry_mixed.ll delete mode 100644 ptx/src/test/spirv_run/carry_mixed.ptx create mode 100644 ptx/src/test/spirv_run/carry_set_all.ll create mode 100644 ptx/src/test/spirv_run/carry_set_all.ptx delete mode 100644 ptx/src/test/spirv_run/madc_cc2.ll delete mode 100644 ptx/src/test/spirv_run/madc_cc2.ptx delete mode 100644 ptx/src/test/spirv_run/subc_cc2.ll delete mode 100644 ptx/src/test/spirv_run/subc_cc2.ptx diff --git a/ptx/src/test/spirv_run/abs.ll b/ptx/src/test/spirv_run/abs.ll index c698e66..e086eda 100644 --- a/ptx/src/test/spirv_run/abs.ll +++ b/ptx/src/test/spirv_run/abs.ll @@ -1,44 +1,42 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"38": +define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { +"37": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"13" to ptr - %"30" = load i32, ptr %"31", align 4 - store i32 %"30", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"15" to ptr - %"40" = getelementptr inbounds i8, ptr %"32", i64 4 - %"33" = load i32, ptr %"40", align 4 - store i32 %"33", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"16" = call i32 @llvm.abs.i32(i32 %"17", i1 false) - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i32, ptr addrspace(5) %"7", align 4 - %"18" = call i32 @llvm.abs.i32(i32 %"19", i1 false) - store i32 %"18", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i32, ptr addrspace(5) %"6", align 4 - %"34" = inttoptr i64 %"20" to ptr - store i32 %"21", ptr %"34", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %"36" = inttoptr i64 %"22" to ptr - %"42" = getelementptr inbounds i8, ptr %"36", i64 4 - store i32 %"23", ptr %"42", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"12" to ptr + %"29" = load i32, ptr %"30", align 4 + store i32 %"29", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"14" to ptr + %"39" = getelementptr inbounds i8, ptr %"31", i64 4 + %"32" = load i32, ptr %"39", align 4 + store i32 %"32", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"15" = call i32 @llvm.abs.i32(i32 %"16", i1 false) + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i32, ptr addrspace(5) %"7", align 4 + %"17" = call i32 @llvm.abs.i32(i32 %"18", i1 false) + store i32 %"17", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i32, ptr addrspace(5) %"6", align 4 + %"33" = inttoptr i64 %"19" to ptr + store i32 %"20", ptr %"33", align 4 + %"21" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"35" = inttoptr i64 %"21" to ptr + %"41" = getelementptr inbounds i8, ptr %"35", i64 4 + store i32 %"22", ptr %"41", align 4 ret void } diff --git a/ptx/src/test/spirv_run/activemask.ll b/ptx/src/test/spirv_run/activemask.ll index 4e53429..5ca886c 100644 --- a/ptx/src/test/spirv_run/activemask.ll +++ b/ptx/src/test/spirv_run/activemask.ll @@ -3,22 +3,20 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__activemask() #0 -define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #1 { -"16": +define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"11", ptr addrspace(4) byref(i64) %"12") #1 { +"15": %"6" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"6", align 1 - %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i32, align 4, addrspace(5) - %"8" = load i64, ptr addrspace(4) %"13", align 8 - store i64 %"8", ptr addrspace(5) %"4", align 8 - %"9" = call i32 @__zluda_ptx_impl__activemask() - store i32 %"9", ptr addrspace(5) %"5", align 4 - %"10" = load i64, ptr addrspace(5) %"4", align 8 - %"11" = load i32, ptr addrspace(5) %"5", align 4 - %"14" = inttoptr i64 %"10" to ptr - store i32 %"11", ptr %"14", align 4 + %"7" = load i64, ptr addrspace(4) %"12", align 8 + store i64 %"7", ptr addrspace(5) %"4", align 8 + %"8" = call i32 @__zluda_ptx_impl__activemask() + store i32 %"8", ptr addrspace(5) %"5", align 4 + %"9" = load i64, ptr addrspace(5) %"4", align 8 + %"10" = load i32, ptr addrspace(5) %"5", align 4 + %"13" = inttoptr i64 %"9" to ptr + store i32 %"10", ptr %"13", align 4 ret void } diff --git a/ptx/src/test/spirv_run/add.ll b/ptx/src/test/spirv_run/add.ll index 3b11a73..6a8ed12 100644 --- a/ptx/src/test/spirv_run/add.ll +++ b/ptx/src/test/spirv_run/add.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = add i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = add i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/add_global.ll b/ptx/src/test/spirv_run/add_global.ll index 14ae1f9..754623c 100644 --- a/ptx/src/test/spirv_run/add_global.ll +++ b/ptx/src/test/spirv_run/add_global.ll @@ -3,34 +3,32 @@ target triple = "amdgcn-amd-amdhsa" @PI = protected addrspace(1) externally_initialized global float 0x400921FB60000000, align 4 -define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 { -"25": +define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 { +"24": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"20", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"21", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(4) %"22", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = inttoptr i64 %"14" to ptr - %"13" = load float, ptr %"23", align 4 - store float %"13", ptr addrspace(5) %"7", align 4 - %"15" = load float, ptr addrspace(1) @PI, align 4 - store float %"15", ptr addrspace(5) %"8", align 4 - %"17" = load float, ptr addrspace(5) %"7", align 4 - %"18" = load float, ptr addrspace(5) %"8", align 4 - %"16" = fadd float %"17", %"18" - store float %"16", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"6", align 8 - %"20" = load float, ptr addrspace(5) %"7", align 4 - %"24" = inttoptr i64 %"19" to ptr - store float %"20", ptr %"24", align 4 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = inttoptr i64 %"13" to ptr + %"12" = load float, ptr %"22", align 4 + store float %"12", ptr addrspace(5) %"7", align 4 + %"14" = load float, ptr addrspace(1) @PI, align 4 + store float %"14", ptr addrspace(5) %"8", align 4 + %"16" = load float, ptr addrspace(5) %"7", align 4 + %"17" = load float, ptr addrspace(5) %"8", align 4 + %"15" = fadd float %"16", %"17" + store float %"15", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"6", align 8 + %"19" = load float, ptr addrspace(5) %"7", align 4 + %"23" = inttoptr i64 %"18" to ptr + store float %"19", ptr %"23", align 4 ret void } diff --git a/ptx/src/test/spirv_run/add_non_coherent.ll b/ptx/src/test/spirv_run/add_non_coherent.ll index 7cf364c..ab8d0bc 100644 --- a/ptx/src/test/spirv_run/add_non_coherent.ll +++ b/ptx/src/test/spirv_run/add_non_coherent.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load i64, ptr addrspace(1) %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = add i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr addrspace(1) - store i64 %"17", ptr addrspace(1) %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load i64, ptr addrspace(1) %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = add i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr addrspace(1) + store i64 %"16", ptr addrspace(1) %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/add_param_ptr.ll b/ptx/src/test/spirv_run/add_param_ptr.ll index 9d90b23..810e9c8 100644 --- a/ptx/src/test/spirv_run/add_param_ptr.ll +++ b/ptx/src/test/spirv_run/add_param_ptr.ll @@ -1,47 +1,45 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"39": +define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { +"38": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) - %"32" = ptrtoint ptr addrspace(4) %"27" to i64 + %"31" = ptrtoint ptr addrspace(4) %"26" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"32", ptr addrspace(5) %0, align 8 - %"31" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"31", ptr addrspace(5) %"4", align 8 - %"34" = ptrtoint ptr addrspace(4) %"28" to i64 + store i64 %"31", ptr addrspace(5) %0, align 8 + %"30" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"30", ptr addrspace(5) %"4", align 8 + %"33" = ptrtoint ptr addrspace(4) %"27" to i64 %1 = alloca i64, align 8, addrspace(5) - store i64 %"34", ptr addrspace(5) %1, align 8 - %"33" = load i64, ptr addrspace(5) %1, align 8 - store i64 %"33", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"35" = inttoptr i64 %"13" to ptr addrspace(4) - %"41" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0 - %"12" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"36" = inttoptr i64 %"15" to ptr addrspace(4) - %"43" = getelementptr inbounds i8, ptr addrspace(4) %"36", i64 0 - %"14" = load i64, ptr addrspace(4) %"43", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"17" to ptr - %"16" = load i64, ptr %"37", align 8 - store i64 %"16", ptr addrspace(5) %"6", align 8 - %"19" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = add i64 %"19", 1 - store i64 %"18", ptr addrspace(5) %"7", align 8 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(5) %"7", align 8 - %"38" = inttoptr i64 %"20" to ptr - store i64 %"21", ptr %"38", align 8 + store i64 %"33", ptr addrspace(5) %1, align 8 + %"32" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"32", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"34" = inttoptr i64 %"12" to ptr addrspace(4) + %"40" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0 + %"11" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"35" = inttoptr i64 %"14" to ptr addrspace(4) + %"42" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0 + %"13" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"16" to ptr + %"15" = load i64, ptr %"36", align 8 + store i64 %"15", ptr addrspace(5) %"6", align 8 + %"18" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = add i64 %"18", 1 + store i64 %"17", ptr addrspace(5) %"7", align 8 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i64, ptr addrspace(5) %"7", align 8 + %"37" = inttoptr i64 %"19" to ptr + store i64 %"20", ptr %"37", align 8 ret void } diff --git a/ptx/src/test/spirv_run/add_tuning.ll b/ptx/src/test/spirv_run/add_tuning.ll index 1f36397..9ec6795 100644 --- a/ptx/src/test/spirv_run/add_tuning.ll +++ b/ptx/src/test/spirv_run/add_tuning.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = add i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = add i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/addc_cc.ll b/ptx/src/test/spirv_run/addc_cc.ll index 9015a80..3299982 100644 --- a/ptx/src/test/spirv_run/addc_cc.ll +++ b/ptx/src/test/spirv_run/addc_cc.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #0 { -"69": +define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 { +"68": %"13" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"13", align 1 - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -16,70 +14,70 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"54", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) + %"14" = load i64, ptr addrspace(4) %"53", align 8 + store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"54", align 8 - store i64 %"15", ptr addrspace(5) %"4", align 8 - %"16" = load i64, ptr addrspace(4) %"55", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"57" = inttoptr i64 %"18" to ptr - %"56" = load i32, ptr %"57", align 4 - store i32 %"56", ptr addrspace(5) %"9", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"58" = inttoptr i64 %"20" to ptr - %"71" = getelementptr inbounds i8, ptr %"58", i64 4 - %"59" = load i32, ptr %"71", align 4 - store i32 %"59", ptr addrspace(5) %"10", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"60" = inttoptr i64 %"22" to ptr - %"73" = getelementptr inbounds i8, ptr %"60", i64 8 - %"21" = load i32, ptr %"73", align 4 - store i32 %"21", ptr addrspace(5) %"11", align 4 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"61" = inttoptr i64 %"24" to ptr - %"75" = getelementptr inbounds i8, ptr %"61", i64 12 - %"23" = load i32, ptr %"75", align 4 - store i32 %"23", ptr addrspace(5) %"12", align 4 - %"27" = load i32, ptr addrspace(5) %"9", align 4 - %"28" = load i32, ptr addrspace(5) %"10", align 4 - %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"27", i32 %"28") - %"25" = extractvalue { i32, i1 } %0, 0 - %"26" = extractvalue { i32, i1 } %0, 1 - store i32 %"25", ptr addrspace(5) %"6", align 4 - store i1 %"26", ptr addrspace(5) %"13", align 1 - %"31" = load i1, ptr addrspace(5) %"13", align 1 - %"32" = load i32, ptr addrspace(5) %"6", align 4 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %1 = zext i1 %"31" to i32 - %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"32", i32 %"33") + store i64 %"15", ptr addrspace(5) %"5", align 8 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"56" = inttoptr i64 %"17" to ptr + %"55" = load i32, ptr %"56", align 4 + store i32 %"55", ptr addrspace(5) %"9", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"57" = inttoptr i64 %"19" to ptr + %"70" = getelementptr inbounds i8, ptr %"57", i64 4 + %"58" = load i32, ptr %"70", align 4 + store i32 %"58", ptr addrspace(5) %"10", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"59" = inttoptr i64 %"21" to ptr + %"72" = getelementptr inbounds i8, ptr %"59", i64 8 + %"20" = load i32, ptr %"72", align 4 + store i32 %"20", ptr addrspace(5) %"11", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"60" = inttoptr i64 %"23" to ptr + %"74" = getelementptr inbounds i8, ptr %"60", i64 12 + %"22" = load i32, ptr %"74", align 4 + store i32 %"22", ptr addrspace(5) %"12", align 4 + %"26" = load i32, ptr addrspace(5) %"9", align 4 + %"27" = load i32, ptr addrspace(5) %"10", align 4 + %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27") + %"24" = extractvalue { i32, i1 } %0, 0 + %"25" = extractvalue { i32, i1 } %0, 1 + store i32 %"24", ptr addrspace(5) %"6", align 4 + store i1 %"25", ptr addrspace(5) %"13", align 1 + %"30" = load i1, ptr addrspace(5) %"13", align 1 + %"31" = load i32, ptr addrspace(5) %"6", align 4 + %"32" = load i32, ptr addrspace(5) %"11", align 4 + %1 = zext i1 %"30" to i32 + %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32") %3 = extractvalue { i32, i1 } %2, 0 %4 = extractvalue { i32, i1 } %2, 1 %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"29" = extractvalue { i32, i1 } %5, 0 + %"28" = extractvalue { i32, i1 } %5, 0 %6 = extractvalue { i32, i1 } %5, 1 - %"30" = xor i1 %4, %6 - store i32 %"29", ptr addrspace(5) %"7", align 4 - store i1 %"30", ptr addrspace(5) %"13", align 1 - %"35" = load i1, ptr addrspace(5) %"13", align 1 - %"36" = load i32, ptr addrspace(5) %"7", align 4 - %"37" = load i32, ptr addrspace(5) %"12", align 4 - %7 = zext i1 %"35" to i32 - %8 = add i32 %"36", %"37" - %"34" = add i32 %8, %7 - store i32 %"34", ptr addrspace(5) %"8", align 4 - %"38" = load i64, ptr addrspace(5) %"5", align 8 - %"39" = load i32, ptr addrspace(5) %"6", align 4 - %"66" = inttoptr i64 %"38" to ptr - store i32 %"39", ptr %"66", align 4 - %"40" = load i64, ptr addrspace(5) %"5", align 8 - %"41" = load i32, ptr addrspace(5) %"7", align 4 - %"67" = inttoptr i64 %"40" to ptr - %"77" = getelementptr inbounds i8, ptr %"67", i64 4 - store i32 %"41", ptr %"77", align 4 - %"42" = load i64, ptr addrspace(5) %"5", align 8 - %"43" = load i32, ptr addrspace(5) %"8", align 4 - %"68" = inttoptr i64 %"42" to ptr - %"79" = getelementptr inbounds i8, ptr %"68", i64 8 - store i32 %"43", ptr %"79", align 4 + %"29" = xor i1 %4, %6 + store i32 %"28", ptr addrspace(5) %"7", align 4 + store i1 %"29", ptr addrspace(5) %"13", align 1 + %"34" = load i1, ptr addrspace(5) %"13", align 1 + %"35" = load i32, ptr addrspace(5) %"7", align 4 + %"36" = load i32, ptr addrspace(5) %"12", align 4 + %7 = zext i1 %"34" to i32 + %8 = add i32 %"35", %"36" + %"33" = add i32 %8, %7 + store i32 %"33", ptr addrspace(5) %"8", align 4 + %"37" = load i64, ptr addrspace(5) %"5", align 8 + %"38" = load i32, ptr addrspace(5) %"6", align 4 + %"65" = inttoptr i64 %"37" to ptr + store i32 %"38", ptr %"65", align 4 + %"39" = load i64, ptr addrspace(5) %"5", align 8 + %"40" = load i32, ptr addrspace(5) %"7", align 4 + %"66" = inttoptr i64 %"39" to ptr + %"76" = getelementptr inbounds i8, ptr %"66", i64 4 + store i32 %"40", ptr %"76", align 4 + %"41" = load i64, ptr addrspace(5) %"5", align 8 + %"42" = load i32, ptr addrspace(5) %"8", align 4 + %"67" = inttoptr i64 %"41" to ptr + %"78" = getelementptr inbounds i8, ptr %"67", i64 8 + store i32 %"42", ptr %"78", align 4 ret void } diff --git a/ptx/src/test/spirv_run/addc_cc2.ll b/ptx/src/test/spirv_run/addc_cc2.ll index 982be96..836d8d5 100644 --- a/ptx/src/test/spirv_run/addc_cc2.ll +++ b/ptx/src/test/spirv_run/addc_cc2.ll @@ -1,63 +1,61 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { -"51": +define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { +"50": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) - %"11" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 + %"10" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) - %"42" = extractvalue { i32, i1 } %0, 0 - %"13" = extractvalue { i32, i1 } %0, 1 - store i32 %"42", ptr addrspace(5) %"6", align 4 - store i1 %"13", ptr addrspace(5) %"9", align 1 - %"16" = load i1, ptr addrspace(5) %"9", align 1 - %1 = zext i1 %"16" to i32 + %"41" = extractvalue { i32, i1 } %0, 0 + %"12" = extractvalue { i32, i1 } %0, 1 + store i32 %"41", ptr addrspace(5) %"6", align 4 + store i1 %"12", ptr addrspace(5) %"9", align 1 + %"15" = load i1, ptr addrspace(5) %"9", align 1 + %1 = zext i1 %"15" to i32 %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4) %3 = extractvalue { i32, i1 } %2, 0 %4 = extractvalue { i32, i1 } %2, 1 %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"43" = extractvalue { i32, i1 } %5, 0 + %"42" = extractvalue { i32, i1 } %5, 0 %6 = extractvalue { i32, i1 } %5, 1 - %"15" = xor i1 %4, %6 - store i32 %"43", ptr addrspace(5) %"6", align 4 - store i1 %"15", ptr addrspace(5) %"9", align 1 - %"18" = load i1, ptr addrspace(5) %"9", align 1 - %7 = zext i1 %"18" to i32 - %"44" = add i32 0, %7 - store i32 %"44", ptr addrspace(5) %"7", align 4 - %"21" = load i1, ptr addrspace(5) %"9", align 1 - %8 = zext i1 %"21" to i32 + %"14" = xor i1 %4, %6 + store i32 %"42", ptr addrspace(5) %"6", align 4 + store i1 %"14", ptr addrspace(5) %"9", align 1 + %"17" = load i1, ptr addrspace(5) %"9", align 1 + %7 = zext i1 %"17" to i32 + %"43" = add i32 0, %7 + store i32 %"43", ptr addrspace(5) %"7", align 4 + %"20" = load i1, ptr addrspace(5) %"9", align 1 + %8 = zext i1 %"20" to i32 %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1) %10 = extractvalue { i32, i1 } %9, 0 %11 = extractvalue { i32, i1 } %9, 1 %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %10, i32 %8) - %"45" = extractvalue { i32, i1 } %12, 0 + %"44" = extractvalue { i32, i1 } %12, 0 %13 = extractvalue { i32, i1 } %12, 1 - %"20" = xor i1 %11, %13 - store i32 %"45", ptr addrspace(5) %"6", align 4 - store i1 %"20", ptr addrspace(5) %"9", align 1 - %"23" = load i1, ptr addrspace(5) %"9", align 1 - %14 = zext i1 %"23" to i32 - %"46" = add i32 0, %14 - store i32 %"46", ptr addrspace(5) %"8", align 4 - %"24" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = load i32, ptr addrspace(5) %"7", align 4 - %"47" = inttoptr i64 %"24" to ptr - store i32 %"25", ptr %"47", align 4 - %"26" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %"49" = inttoptr i64 %"26" to ptr - %"53" = getelementptr inbounds i8, ptr %"49", i64 4 - store i32 %"27", ptr %"53", align 4 + %"19" = xor i1 %11, %13 + store i32 %"44", ptr addrspace(5) %"6", align 4 + store i1 %"19", ptr addrspace(5) %"9", align 1 + %"22" = load i1, ptr addrspace(5) %"9", align 1 + %14 = zext i1 %"22" to i32 + %"45" = add i32 0, %14 + store i32 %"45", ptr addrspace(5) %"8", align 4 + %"23" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %"46" = inttoptr i64 %"23" to ptr + store i32 %"24", ptr %"46", align 4 + %"25" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %"48" = inttoptr i64 %"25" to ptr + %"52" = getelementptr inbounds i8, ptr %"48", i64 4 + store i32 %"26", ptr %"52", align 4 ret void } diff --git a/ptx/src/test/spirv_run/alloca_call.ll b/ptx/src/test/spirv_run/alloca_call.ll index 1ae760b..e6a9d6f 100644 --- a/ptx/src/test/spirv_run/alloca_call.ll +++ b/ptx/src/test/spirv_run/alloca_call.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { -"59": +define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { +"58": %"22" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"22", align 1 - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 %"7" = alloca i1, align 1, addrspace(5) %"8" = alloca double, align 8, addrspace(5) %"9" = alloca double, align 8, addrspace(5) @@ -14,47 +12,47 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) %"13" = alloca i64, align 8, addrspace(5) - %"47" = alloca i64, align 8, addrspace(5) - %"49" = alloca [4 x i32], align 16, addrspace(5) + %"46" = alloca i64, align 8, addrspace(5) + %"48" = alloca [4 x i32], align 16, addrspace(5) + %"50" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"50", ptr addrspace(5) %"10", align 8 %"51" = load i64, ptr addrspace(4) %"43", align 8 - store i64 %"51", ptr addrspace(5) %"10", align 8 + store i64 %"51", ptr addrspace(5) %"11", align 8 %"52" = load i64, ptr addrspace(4) %"44", align 8 - store i64 %"52", ptr addrspace(5) %"11", align 8 + store i64 %"52", ptr addrspace(5) %"12", align 8 %"53" = load i64, ptr addrspace(4) %"45", align 8 - store i64 %"53", ptr addrspace(5) %"12", align 8 - %"54" = load i64, ptr addrspace(4) %"46", align 8 - store i64 %"54", ptr addrspace(5) %"13", align 8 - %"29" = load i64, ptr addrspace(5) %"12", align 8 - %"30" = load i64, ptr addrspace(5) %"13", align 8 - %"28" = icmp sge i64 %"29", %"30" - store i1 %"28", ptr addrspace(5) %"7", align 1 - %"31" = load i1, ptr addrspace(5) %"7", align 1 - br i1 %"31", label %"6", label %"18" + store i64 %"53", ptr addrspace(5) %"13", align 8 + %"28" = load i64, ptr addrspace(5) %"12", align 8 + %"29" = load i64, ptr addrspace(5) %"13", align 8 + %"27" = icmp sge i64 %"28", %"29" + store i1 %"27", ptr addrspace(5) %"7", align 1 + %"30" = load i1, ptr addrspace(5) %"7", align 1 + br i1 %"30", label %"6", label %"18" -"18": ; preds = %"59" +"18": ; preds = %"58" + %"31" = load i64, ptr addrspace(5) %"11", align 8 + %"60" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0 + store i64 %"31", ptr addrspace(5) %"60", align 8 %"32" = load i64, ptr addrspace(5) %"11", align 8 - %"61" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0 - store i64 %"32", ptr addrspace(5) %"61", align 8 - %"33" = load i64, ptr addrspace(5) %"11", align 8 - %0 = inttoptr i64 %"33" to ptr + %0 = inttoptr i64 %"32" to ptr %"21" = call [4 x i32] %0() - store [4 x i32] %"21", ptr addrspace(5) %"49", align 4 - %"63" = getelementptr inbounds i8, ptr addrspace(5) %"49", i64 0 - %"19" = load <2 x double>, ptr addrspace(5) %"63", align 16 - %"34" = extractelement <2 x double> %"19", i32 0 - %"35" = extractelement <2 x double> %"19", i32 1 - store double %"34", ptr addrspace(5) %"8", align 8 - store double %"35", ptr addrspace(5) %"9", align 8 - %"36" = load double, ptr addrspace(5) %"8", align 8 - %"37" = load double, ptr addrspace(5) %"9", align 8 - %1 = insertelement <2 x double> undef, double %"36", i32 0 - %"20" = insertelement <2 x double> %1, double %"37", i32 1 - %"38" = load i64, ptr addrspace(5) %"10", align 8 - %"58" = inttoptr i64 %"38" to ptr addrspace(1) - store <2 x double> %"20", ptr addrspace(1) %"58", align 16 + store [4 x i32] %"21", ptr addrspace(5) %"48", align 4 + %"62" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0 + %"19" = load <2 x double>, ptr addrspace(5) %"62", align 16 + %"33" = extractelement <2 x double> %"19", i32 0 + %"34" = extractelement <2 x double> %"19", i32 1 + store double %"33", ptr addrspace(5) %"8", align 8 + store double %"34", ptr addrspace(5) %"9", align 8 + %"35" = load double, ptr addrspace(5) %"8", align 8 + %"36" = load double, ptr addrspace(5) %"9", align 8 + %1 = insertelement <2 x double> undef, double %"35", i32 0 + %"20" = insertelement <2 x double> %1, double %"36", i32 1 + %"37" = load i64, ptr addrspace(5) %"10", align 8 + %"57" = inttoptr i64 %"37" to ptr addrspace(1) + store <2 x double> %"20", ptr addrspace(1) %"57", align 16 br label %"6" -"6": ; preds = %"18", %"59" +"6": ; preds = %"18", %"58" ret void } diff --git a/ptx/src/test/spirv_run/amdgpu_unnamed.ll b/ptx/src/test/spirv_run/amdgpu_unnamed.ll index b08350b..61e3de4 100644 --- a/ptx/src/test/spirv_run/amdgpu_unnamed.ll +++ b/ptx/src/test/spirv_run/amdgpu_unnamed.ll @@ -7,12 +7,10 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0 -define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"58", ptr addrspace(4) byref(i64) %"59") #1 { -"74": +define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 { +"73": %"33" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"33", align 1 - %"34" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"34", align 1 %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) @@ -20,63 +18,63 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"18" = alloca i1, align 1, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) %"20" = alloca i32, align 4, addrspace(5) + %"59" = alloca i64, align 8, addrspace(5) %"60" = alloca i64, align 8, addrspace(5) - %"61" = alloca i64, align 8, addrspace(5) - %"62" = alloca i32, align 4, addrspace(5) + %"61" = alloca i32, align 4, addrspace(5) + %"62" = alloca i64, align 8, addrspace(5) %"63" = alloca i64, align 8, addrspace(5) - %"64" = alloca i64, align 8, addrspace(5) + %"34" = load i64, ptr addrspace(4) %"57", align 8 + store i64 %"34", ptr addrspace(5) %"14", align 8 %"35" = load i64, ptr addrspace(4) %"58", align 8 - store i64 %"35", ptr addrspace(5) %"14", align 8 - %"36" = load i64, ptr addrspace(4) %"59", align 8 - store i64 %"36", ptr addrspace(5) %"15", align 8 - %"38" = load i64, ptr addrspace(5) %"14", align 8 - %"66" = inttoptr i64 %"38" to ptr - %"37" = load i64, ptr %"66", align 8 - store i64 %"37", ptr addrspace(5) %"16", align 8 - %"40" = load i64, ptr addrspace(5) %"16", align 8 - %"39" = icmp uge i64 %"40", 1 - store i1 %"39", ptr addrspace(5) %"18", align 1 - %"41" = load i1, ptr addrspace(5) %"18", align 1 - br i1 %"41", label %"13", label %"27" + store i64 %"35", ptr addrspace(5) %"15", align 8 + %"37" = load i64, ptr addrspace(5) %"14", align 8 + %"65" = inttoptr i64 %"37" to ptr + %"36" = load i64, ptr %"65", align 8 + store i64 %"36", ptr addrspace(5) %"16", align 8 + %"39" = load i64, ptr addrspace(5) %"16", align 8 + %"38" = icmp uge i64 %"39", 1 + store i1 %"38", ptr addrspace(5) %"18", align 1 + %"40" = load i1, ptr addrspace(5) %"18", align 1 + br i1 %"40", label %"13", label %"27" -"27": ; preds = %"74" +"27": ; preds = %"73" %0 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %0, align 8 - %"67" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"67", ptr addrspace(5) %"19", align 8 - %"43" = load i64, ptr addrspace(5) %"19", align 8 - store i64 %"43", ptr addrspace(5) %"60", align 8 + %"66" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"66", ptr addrspace(5) %"19", align 8 + %"42" = load i64, ptr addrspace(5) %"19", align 8 + store i64 %"42", ptr addrspace(5) %"59", align 8 %1 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %1, align 8 - %"69" = load i64, ptr addrspace(5) %1, align 8 - store i64 %"69", ptr addrspace(5) %"19", align 8 - %"45" = load i64, ptr addrspace(5) %"19", align 8 - store i64 %"45", ptr addrspace(5) %"61", align 8 - store i32 1, ptr addrspace(5) %"62", align 4 + %"68" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"68", ptr addrspace(5) %"19", align 8 + %"44" = load i64, ptr addrspace(5) %"19", align 8 + store i64 %"44", ptr addrspace(5) %"60", align 8 + store i32 1, ptr addrspace(5) %"61", align 4 %2 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %2, align 8 - %"71" = load i64, ptr addrspace(5) %2, align 8 - store i64 %"71", ptr addrspace(5) %"19", align 8 - %"47" = load i64, ptr addrspace(5) %"19", align 8 - store i64 %"47", ptr addrspace(5) %"63", align 8 - %"76" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0 - store i64 1, ptr addrspace(5) %"76", align 8 - %"28" = load i64, ptr addrspace(5) %"60", align 8 - %"29" = load i64, ptr addrspace(5) %"61", align 8 - %"30" = load i32, ptr addrspace(5) %"62", align 4 - %"31" = load i64, ptr addrspace(5) %"63", align 8 - %"32" = load i64, ptr addrspace(5) %"64", align 8 + %"70" = load i64, ptr addrspace(5) %2, align 8 + store i64 %"70", ptr addrspace(5) %"19", align 8 + %"46" = load i64, ptr addrspace(5) %"19", align 8 + store i64 %"46", ptr addrspace(5) %"62", align 8 + %"75" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0 + store i64 1, ptr addrspace(5) %"75", align 8 + %"28" = load i64, ptr addrspace(5) %"59", align 8 + %"29" = load i64, ptr addrspace(5) %"60", align 8 + %"30" = load i32, ptr addrspace(5) %"61", align 4 + %"31" = load i64, ptr addrspace(5) %"62", align 8 + %"32" = load i64, ptr addrspace(5) %"63", align 8 call void @__zluda_ptx_impl____assertfail(i64 %"28", i64 %"29", i32 %"30", i64 %"31", i64 %"32") br label %"13" -"13": ; preds = %"27", %"74" - %"49" = load i64, ptr addrspace(5) %"16", align 8 - %"48" = add i64 %"49", 1 - store i64 %"48", ptr addrspace(5) %"17", align 8 - %"50" = load i64, ptr addrspace(5) %"15", align 8 - %"51" = load i64, ptr addrspace(5) %"17", align 8 - %"73" = inttoptr i64 %"50" to ptr - store i64 %"51", ptr %"73", align 8 +"13": ; preds = %"27", %"73" + %"48" = load i64, ptr addrspace(5) %"16", align 8 + %"47" = add i64 %"48", 1 + store i64 %"47", ptr addrspace(5) %"17", align 8 + %"49" = load i64, ptr addrspace(5) %"15", align 8 + %"50" = load i64, ptr addrspace(5) %"17", align 8 + %"72" = inttoptr i64 %"49" to ptr + store i64 %"50", ptr %"72", align 8 ret void } diff --git a/ptx/src/test/spirv_run/and.ll b/ptx/src/test/spirv_run/and.ll index 2862bcc..c90f390 100644 --- a/ptx/src/test/spirv_run/and.ll +++ b/ptx/src/test/spirv_run/and.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"31": +define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"30": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"33" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"33", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"27" = and i32 %"17", %"18" - store i32 %"27", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"30" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"30", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"32" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"32", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"26" = and i32 %"16", %"17" + store i32 %"26", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"29" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"29", align 4 ret void } diff --git a/ptx/src/test/spirv_run/assertfail.ll b/ptx/src/test/spirv_run/assertfail.ll index 0fb51f7..001dbfe 100644 --- a/ptx/src/test/spirv_run/assertfail.ll +++ b/ptx/src/test/spirv_run/assertfail.ll @@ -3,62 +3,60 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0 -define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"63", ptr addrspace(4) byref(i64) %"64") #1 { -"82": +define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"62", ptr addrspace(4) byref(i64) %"63") #1 { +"81": %"35" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"35", align 1 - %"36" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"36", align 1 %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"18" = alloca i64, align 8, addrspace(5) %"19" = alloca i32, align 4, addrspace(5) - %"65" = alloca i64, align 8, addrspace(5) - %"67" = alloca i64, align 8, addrspace(5) - %"69" = alloca i32, align 4, addrspace(5) - %"71" = alloca i64, align 8, addrspace(5) - %"73" = alloca i64, align 8, addrspace(5) + %"64" = alloca i64, align 8, addrspace(5) + %"66" = alloca i64, align 8, addrspace(5) + %"68" = alloca i32, align 4, addrspace(5) + %"70" = alloca i64, align 8, addrspace(5) + %"72" = alloca i64, align 8, addrspace(5) + %"36" = load i64, ptr addrspace(4) %"62", align 8 + store i64 %"36", ptr addrspace(5) %"15", align 8 %"37" = load i64, ptr addrspace(4) %"63", align 8 - store i64 %"37", ptr addrspace(5) %"15", align 8 - %"38" = load i64, ptr addrspace(4) %"64", align 8 - store i64 %"38", ptr addrspace(5) %"16", align 8 + store i64 %"37", ptr addrspace(5) %"16", align 8 %0 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %0, align 4 - %"75" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"75", ptr addrspace(5) %"19", align 4 + %"74" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"74", ptr addrspace(5) %"19", align 4 + %"39" = load i64, ptr addrspace(5) %"15", align 8 + %"83" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0 + store i64 %"39", ptr addrspace(5) %"83", align 8 %"40" = load i64, ptr addrspace(5) %"15", align 8 - %"84" = getelementptr inbounds i8, ptr addrspace(5) %"65", i64 0 - store i64 %"40", ptr addrspace(5) %"84", align 8 - %"41" = load i64, ptr addrspace(5) %"15", align 8 - %"86" = getelementptr inbounds i8, ptr addrspace(5) %"67", i64 0 - store i64 %"41", ptr addrspace(5) %"86", align 8 - %"42" = load i32, ptr addrspace(5) %"19", align 4 - %"88" = getelementptr inbounds i8, ptr addrspace(5) %"69", i64 0 - store i32 %"42", ptr addrspace(5) %"88", align 4 + %"85" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0 + store i64 %"40", ptr addrspace(5) %"85", align 8 + %"41" = load i32, ptr addrspace(5) %"19", align 4 + %"87" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0 + store i32 %"41", ptr addrspace(5) %"87", align 4 + %"42" = load i64, ptr addrspace(5) %"15", align 8 + %"89" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0 + store i64 %"42", ptr addrspace(5) %"89", align 8 %"43" = load i64, ptr addrspace(5) %"15", align 8 - %"90" = getelementptr inbounds i8, ptr addrspace(5) %"71", i64 0 - store i64 %"43", ptr addrspace(5) %"90", align 8 - %"44" = load i64, ptr addrspace(5) %"15", align 8 - %"92" = getelementptr inbounds i8, ptr addrspace(5) %"73", i64 0 - store i64 %"44", ptr addrspace(5) %"92", align 8 - %"30" = load i64, ptr addrspace(5) %"65", align 8 - %"31" = load i64, ptr addrspace(5) %"67", align 8 - %"32" = load i32, ptr addrspace(5) %"69", align 4 - %"33" = load i64, ptr addrspace(5) %"71", align 8 - %"34" = load i64, ptr addrspace(5) %"73", align 8 + %"91" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0 + store i64 %"43", ptr addrspace(5) %"91", align 8 + %"30" = load i64, ptr addrspace(5) %"64", align 8 + %"31" = load i64, ptr addrspace(5) %"66", align 8 + %"32" = load i32, ptr addrspace(5) %"68", align 4 + %"33" = load i64, ptr addrspace(5) %"70", align 8 + %"34" = load i64, ptr addrspace(5) %"72", align 8 call void @__zluda_ptx_impl____assertfail(i64 %"30", i64 %"31", i32 %"32", i64 %"33", i64 %"34") - %"46" = load i64, ptr addrspace(5) %"15", align 8 - %"80" = inttoptr i64 %"46" to ptr - %"45" = load i64, ptr %"80", align 8 - store i64 %"45", ptr addrspace(5) %"17", align 8 - %"48" = load i64, ptr addrspace(5) %"17", align 8 - %"47" = add i64 %"48", 1 - store i64 %"47", ptr addrspace(5) %"18", align 8 - %"49" = load i64, ptr addrspace(5) %"16", align 8 - %"50" = load i64, ptr addrspace(5) %"18", align 8 - %"81" = inttoptr i64 %"49" to ptr - store i64 %"50", ptr %"81", align 8 + %"45" = load i64, ptr addrspace(5) %"15", align 8 + %"79" = inttoptr i64 %"45" to ptr + %"44" = load i64, ptr %"79", align 8 + store i64 %"44", ptr addrspace(5) %"17", align 8 + %"47" = load i64, ptr addrspace(5) %"17", align 8 + %"46" = add i64 %"47", 1 + store i64 %"46", ptr addrspace(5) %"18", align 8 + %"48" = load i64, ptr addrspace(5) %"16", align 8 + %"49" = load i64, ptr addrspace(5) %"18", align 8 + %"80" = inttoptr i64 %"48" to ptr + store i64 %"49", ptr %"80", align 8 ret void } diff --git a/ptx/src/test/spirv_run/atom_add.ll b/ptx/src/test/spirv_run/atom_add.ll index 88ccc57..dff9e0e 100644 --- a/ptx/src/test/spirv_run/atom_add.ll +++ b/ptx/src/test/spirv_run/atom_add.ll @@ -3,45 +3,43 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 -define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"38": +define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { +"37": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"31", align 4 - store i32 %"13", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"40" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load i32, ptr %"40", align 4 - store i32 %"15", ptr addrspace(5) %"8", align 4 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - store i32 %"17", ptr addrspace(3) @"4", align 4 - %"19" = load i32, ptr addrspace(5) %"8", align 4 - %"18" = atomicrmw add ptr addrspace(3) @"4", i32 %"19" syncscope("agent-one-as") monotonic, align 4 - store i32 %"18", ptr addrspace(5) %"7", align 4 - %"20" = load i32, ptr addrspace(3) @"4", align 4 - store i32 %"20", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"36" = inttoptr i64 %"21" to ptr - store i32 %"22", ptr %"36", align 4 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"37" = inttoptr i64 %"23" to ptr - %"42" = getelementptr inbounds i8, ptr %"37", i64 4 - store i32 %"24", ptr %"42", align 4 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"30", align 4 + store i32 %"12", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"39" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"39", align 4 + store i32 %"14", ptr addrspace(5) %"8", align 4 + %"16" = load i32, ptr addrspace(5) %"7", align 4 + store i32 %"16", ptr addrspace(3) @"4", align 4 + %"18" = load i32, ptr addrspace(5) %"8", align 4 + %"17" = atomicrmw add ptr addrspace(3) @"4", i32 %"18" syncscope("agent-one-as") monotonic, align 4 + store i32 %"17", ptr addrspace(5) %"7", align 4 + %"19" = load i32, ptr addrspace(3) @"4", align 4 + store i32 %"19", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"35" = inttoptr i64 %"20" to ptr + store i32 %"21", ptr %"35", align 4 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"36" = inttoptr i64 %"22" to ptr + %"41" = getelementptr inbounds i8, ptr %"36", i64 4 + store i32 %"23", ptr %"41", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_add_f16.ll b/ptx/src/test/spirv_run/atom_add_f16.ll index 10a22a0..e63de90 100644 --- a/ptx/src/test/spirv_run/atom_add_f16.ll +++ b/ptx/src/test/spirv_run/atom_add_f16.ll @@ -3,46 +3,44 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 -define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"38": +define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { +"37": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca half, align 2, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"26", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"11" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"29" = inttoptr i64 %"13" to ptr - %"40" = getelementptr inbounds i8, ptr %"29", i64 2 - %"30" = load i16, ptr %"40", align 2 - %"12" = bitcast i16 %"30" to half - store half %"12", ptr addrspace(5) %"7", align 2 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load half, ptr addrspace(5) %"7", align 2 - %"31" = inttoptr i64 %"15" to ptr - %"14" = atomicrmw fadd ptr %"31", half %"16" syncscope("agent-one-as") monotonic, align 2 - store half %"14", ptr addrspace(5) %"7", align 2 - %"17" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = load half, ptr addrspace(5) %"7", align 2 - %"32" = inttoptr i64 %"17" to ptr - %"33" = bitcast half %"18" to i16 - store i16 %"33", ptr %"32", align 2 - %"20" = load i64, ptr addrspace(5) %"5", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"28" = inttoptr i64 %"12" to ptr + %"39" = getelementptr inbounds i8, ptr %"28", i64 2 + %"29" = load i16, ptr %"39", align 2 + %"11" = bitcast i16 %"29" to half + store half %"11", ptr addrspace(5) %"7", align 2 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load half, ptr addrspace(5) %"7", align 2 + %"30" = inttoptr i64 %"14" to ptr + %"13" = atomicrmw fadd ptr %"30", half %"15" syncscope("agent-one-as") monotonic, align 2 + store half %"13", ptr addrspace(5) %"7", align 2 + %"16" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = load half, ptr addrspace(5) %"7", align 2 + %"31" = inttoptr i64 %"16" to ptr + %"32" = bitcast half %"17" to i16 + store i16 %"32", ptr %"31", align 2 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"34" = inttoptr i64 %"19" to ptr + %"33" = load i16, ptr %"34", align 2 + %"18" = bitcast i16 %"33" to half + store half %"18", ptr addrspace(5) %"7", align 2 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load half, ptr addrspace(5) %"7", align 2 %"35" = inttoptr i64 %"20" to ptr - %"34" = load i16, ptr %"35", align 2 - %"19" = bitcast i16 %"34" to half - store half %"19", ptr addrspace(5) %"7", align 2 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load half, ptr addrspace(5) %"7", align 2 - %"36" = inttoptr i64 %"21" to ptr - %"42" = getelementptr inbounds i8, ptr %"36", i64 2 - %"37" = bitcast half %"22" to i16 - store i16 %"37", ptr %"42", align 2 + %"41" = getelementptr inbounds i8, ptr %"35", i64 2 + %"36" = bitcast half %"21" to i16 + store i16 %"36", ptr %"41", align 2 ret void } diff --git a/ptx/src/test/spirv_run/atom_add_float.ll b/ptx/src/test/spirv_run/atom_add_float.ll index efce26c..329d198 100644 --- a/ptx/src/test/spirv_run/atom_add_float.ll +++ b/ptx/src/test/spirv_run/atom_add_float.ll @@ -3,45 +3,43 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 -define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"38": +define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { +"37": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load float, ptr %"31", align 4 - store float %"13", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"40" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load float, ptr %"40", align 4 - store float %"15", ptr addrspace(5) %"8", align 4 - %"17" = load float, ptr addrspace(5) %"7", align 4 - store float %"17", ptr addrspace(3) @"4", align 4 - %"19" = load float, ptr addrspace(5) %"8", align 4 - %"18" = atomicrmw fadd ptr addrspace(3) @"4", float %"19" syncscope("agent-one-as") monotonic, align 4 - store float %"18", ptr addrspace(5) %"7", align 4 - %"20" = load float, ptr addrspace(3) @"4", align 4 - store float %"20", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load float, ptr addrspace(5) %"7", align 4 - %"36" = inttoptr i64 %"21" to ptr - store float %"22", ptr %"36", align 4 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load float, ptr addrspace(5) %"8", align 4 - %"37" = inttoptr i64 %"23" to ptr - %"42" = getelementptr inbounds i8, ptr %"37", i64 4 - store float %"24", ptr %"42", align 4 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load float, ptr %"30", align 4 + store float %"12", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"39" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load float, ptr %"39", align 4 + store float %"14", ptr addrspace(5) %"8", align 4 + %"16" = load float, ptr addrspace(5) %"7", align 4 + store float %"16", ptr addrspace(3) @"4", align 4 + %"18" = load float, ptr addrspace(5) %"8", align 4 + %"17" = atomicrmw fadd ptr addrspace(3) @"4", float %"18" syncscope("agent-one-as") monotonic, align 4 + store float %"17", ptr addrspace(5) %"7", align 4 + %"19" = load float, ptr addrspace(3) @"4", align 4 + store float %"19", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load float, ptr addrspace(5) %"7", align 4 + %"35" = inttoptr i64 %"20" to ptr + store float %"21", ptr %"35", align 4 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load float, ptr addrspace(5) %"8", align 4 + %"36" = inttoptr i64 %"22" to ptr + %"41" = getelementptr inbounds i8, ptr %"36", i64 4 + store float %"23", ptr %"41", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_cas.ll b/ptx/src/test/spirv_run/atom_cas.ll index fb83ed4..2e0475a 100644 --- a/ptx/src/test/spirv_run/atom_cas.ll +++ b/ptx/src/test/spirv_run/atom_cas.ll @@ -1,45 +1,43 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 { -"39": +define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { +"38": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"29", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"31", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"32", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"33" = inttoptr i64 %"15" to ptr - %"41" = getelementptr inbounds i8, ptr %"33", i64 4 - %0 = cmpxchg ptr %"41", i32 %"16", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 - %"34" = extractvalue { i32, i1 } %0, 0 - store i32 %"34", ptr addrspace(5) %"6", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"31", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"32" = inttoptr i64 %"14" to ptr + %"40" = getelementptr inbounds i8, ptr %"32", i64 4 + %0 = cmpxchg ptr %"40", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 + %"33" = extractvalue { i32, i1 } %0, 0 + store i32 %"33", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"35" = inttoptr i64 %"17" to ptr + %"42" = getelementptr inbounds i8, ptr %"35", i64 4 + %"16" = load i32, ptr %"42", align 4 + store i32 %"16", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 %"36" = inttoptr i64 %"18" to ptr - %"43" = getelementptr inbounds i8, ptr %"36", i64 4 - %"17" = load i32, ptr %"43", align 4 - store i32 %"17", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"37" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"37", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"38" = inttoptr i64 %"21" to ptr - %"45" = getelementptr inbounds i8, ptr %"38", i64 4 - store i32 %"22", ptr %"45", align 4 + store i32 %"19", ptr %"36", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"37" = inttoptr i64 %"20" to ptr + %"44" = getelementptr inbounds i8, ptr %"37", i64 4 + store i32 %"21", ptr %"44", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_inc.ll b/ptx/src/test/spirv_run/atom_inc.ll index 26b7b70..6fdc3c7 100644 --- a/ptx/src/test/spirv_run/atom_inc.ll +++ b/ptx/src/test/spirv_run/atom_inc.ll @@ -5,47 +5,45 @@ declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0 declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1), i32) #0 -define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #1 { -"39": +define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #1 { +"38": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"31", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"32", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"14" to ptr - %"13" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"33", i32 101) - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"34" = inttoptr i64 %"16" to ptr addrspace(1) - %"15" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1) %"34", i32 101) - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"13" to ptr + %"12" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"32", i32 101) + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"33" = inttoptr i64 %"15" to ptr addrspace(1) + %"14" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1) %"33", i32 101) + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"34" = inttoptr i64 %"17" to ptr + %"16" = load i32, ptr %"34", align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 %"35" = inttoptr i64 %"18" to ptr - %"17" = load i32, ptr %"35", align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"36" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"36", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"37" = inttoptr i64 %"21" to ptr - %"49" = getelementptr inbounds i8, ptr %"37", i64 4 - store i32 %"22", ptr %"49", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"38" = inttoptr i64 %"23" to ptr - %"51" = getelementptr inbounds i8, ptr %"38", i64 8 - store i32 %"24", ptr %"51", align 4 + store i32 %"19", ptr %"35", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"36" = inttoptr i64 %"20" to ptr + %"48" = getelementptr inbounds i8, ptr %"36", i64 4 + store i32 %"21", ptr %"48", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"37" = inttoptr i64 %"22" to ptr + %"50" = getelementptr inbounds i8, ptr %"37", i64 8 + store i32 %"23", ptr %"50", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_ld_st.ll b/ptx/src/test/spirv_run/atom_ld_st.ll index 31f39c8..3b6488c 100644 --- a/ptx/src/test/spirv_run/atom_ld_st.ll +++ b/ptx/src/test/spirv_run/atom_ld_st.ll @@ -1,27 +1,25 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"19": +define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { +"18": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"14", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"16" = inttoptr i64 %"11" to ptr + %"10" = load atomic i32, ptr %"16" syncscope("agent-one-as") acquire, align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"13" = load i32, ptr addrspace(5) %"6", align 4 %"17" = inttoptr i64 %"12" to ptr - %"11" = load atomic i32, ptr %"17" syncscope("agent-one-as") acquire, align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = inttoptr i64 %"13" to ptr - store atomic i32 %"14", ptr %"18" syncscope("agent-one-as") release, align 4 + store atomic i32 %"13", ptr %"17" syncscope("agent-one-as") release, align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_ld_st_vec.ll b/ptx/src/test/spirv_run/atom_ld_st_vec.ll index 95ff710..7ea0fc5 100644 --- a/ptx/src/test/spirv_run/atom_ld_st_vec.ll +++ b/ptx/src/test/spirv_run/atom_ld_st_vec.ll @@ -1,36 +1,34 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 { -"24": +define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { +"23": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"19", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"21", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"22" = inttoptr i64 %"14" to ptr - %0 = load atomic i128, ptr %"22" syncscope("agent-one-as") acquire, align 16 + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"21" = inttoptr i64 %"13" to ptr + %0 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16 %"8" = bitcast i128 %0 to <2 x i64> - %"15" = extractelement <2 x i64> %"8", i32 0 - %"16" = extractelement <2 x i64> %"8", i32 1 - store i64 %"15", ptr addrspace(5) %"6", align 8 - store i64 %"16", ptr addrspace(5) %"7", align 8 - %"17" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = load i64, ptr addrspace(5) %"7", align 8 - %1 = insertelement <2 x i64> undef, i64 %"17", i32 0 - %"9" = insertelement <2 x i64> %1, i64 %"18", i32 1 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = inttoptr i64 %"19" to ptr + %"14" = extractelement <2 x i64> %"8", i32 0 + %"15" = extractelement <2 x i64> %"8", i32 1 + store i64 %"14", ptr addrspace(5) %"6", align 8 + store i64 %"15", ptr addrspace(5) %"7", align 8 + %"16" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = load i64, ptr addrspace(5) %"7", align 8 + %1 = insertelement <2 x i64> undef, i64 %"16", i32 0 + %"9" = insertelement <2 x i64> %1, i64 %"17", i32 1 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = inttoptr i64 %"18" to ptr %2 = bitcast <2 x i64> %"9" to i128 - store atomic i128 %2, ptr %"23" syncscope("agent-one-as") release, align 16 + store atomic i128 %2, ptr %"22" syncscope("agent-one-as") release, align 16 ret void } diff --git a/ptx/src/test/spirv_run/atom_max_u32.ll b/ptx/src/test/spirv_run/atom_max_u32.ll index 7a89a13..64cb430 100644 --- a/ptx/src/test/spirv_run/atom_max_u32.ll +++ b/ptx/src/test/spirv_run/atom_max_u32.ll @@ -1,38 +1,36 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"31": +define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"30": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"14" = load i32, ptr addrspace(5) %"6", align 4 %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"15" = load i32, ptr addrspace(5) %"6", align 4 - %"26" = inttoptr i64 %"14" to ptr - store i32 %"15", ptr %"26", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"17" to ptr - %"33" = getelementptr inbounds i8, ptr %"27", i64 4 - %"16" = load i32, ptr %"33", align 4 - store i32 %"16", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %"29" = inttoptr i64 %"19" to ptr - %"28" = atomicrmw umax ptr %"29", i32 %"20" syncscope("agent-one-as") monotonic, align 4 - store i32 %"28", ptr addrspace(5) %"6", align 4 + store i32 %"14", ptr %"25", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"16" to ptr + %"32" = getelementptr inbounds i8, ptr %"26", i64 4 + %"15" = load i32, ptr %"32", align 4 + store i32 %"15", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %"28" = inttoptr i64 %"18" to ptr + %"27" = atomicrmw umax ptr %"28", i32 %"19" syncscope("agent-one-as") monotonic, align 4 + store i32 %"27", ptr addrspace(5) %"6", align 4 ret void } diff --git a/ptx/src/test/spirv_run/b64tof64.ll b/ptx/src/test/spirv_run/b64tof64.ll index 2c2b674..5cd7a2c 100644 --- a/ptx/src/test/spirv_run/b64tof64.ll +++ b/ptx/src/test/spirv_run/b64tof64.ll @@ -1,34 +1,32 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca double, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) - %"10" = load double, ptr addrspace(4) %"18", align 8 - store double %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load double, ptr addrspace(5) %"4", align 8 - %"21" = bitcast double %"13" to i64 + %"9" = load double, ptr addrspace(4) %"17", align 8 + store double %"9", ptr addrspace(5) %"4", align 8 + %"10" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load double, ptr addrspace(5) %"4", align 8 + %"20" = bitcast double %"12" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"21", ptr addrspace(5) %0, align 8 - %"12" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"5", align 8 + store i64 %"20", ptr addrspace(5) %0, align 8 + %"11" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = inttoptr i64 %"14" to ptr + %"13" = load i64, ptr %"21", align 8 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 %"22" = inttoptr i64 %"15" to ptr - %"14" = load i64, ptr %"22", align 8 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"23" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"23", align 8 + store i64 %"16", ptr %"22", align 8 ret void } diff --git a/ptx/src/test/spirv_run/barrier.ll b/ptx/src/test/spirv_run/barrier.ll index c247e32..e2e65f2 100644 --- a/ptx/src/test/spirv_run/barrier.ll +++ b/ptx/src/test/spirv_run/barrier.ll @@ -4,11 +4,9 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl__barrier_sync(i32) #0 define protected amdgpu_kernel void @barrier() #1 { -"5": +"4": %"2" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"2", align 1 - %"3" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"3", align 1 call void @__zluda_ptx_impl__barrier_sync(i32 0) ret void } diff --git a/ptx/src/test/spirv_run/bfe.ll b/ptx/src/test/spirv_run/bfe.ll index c67513a..99fd766 100644 --- a/ptx/src/test/spirv_run/bfe.ll +++ b/ptx/src/test/spirv_run/bfe.ll @@ -3,44 +3,42 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__bfe_u32(i32, i32, i32) #0 -define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #1 { -"35": +define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { +"34": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"31", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"42" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load i32, ptr %"42", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"18" to ptr - %"44" = getelementptr inbounds i8, ptr %"33", i64 8 - %"17" = load i32, ptr %"44", align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"21" = load i32, ptr addrspace(5) %"7", align 4 - %"22" = load i32, ptr addrspace(5) %"8", align 4 - %"19" = call i32 @__zluda_ptx_impl__bfe_u32(i32 %"20", i32 %"21", i32 %"22") - store i32 %"19", ptr addrspace(5) %"6", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"6", align 4 - %"34" = inttoptr i64 %"23" to ptr - store i32 %"24", ptr %"34", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"30", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"41" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"41", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"17" to ptr + %"43" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load i32, ptr %"43", align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"20" = load i32, ptr addrspace(5) %"7", align 4 + %"21" = load i32, ptr addrspace(5) %"8", align 4 + %"18" = call i32 @__zluda_ptx_impl__bfe_u32(i32 %"19", i32 %"20", i32 %"21") + store i32 %"18", ptr addrspace(5) %"6", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"6", align 4 + %"33" = inttoptr i64 %"22" to ptr + store i32 %"23", ptr %"33", align 4 ret void } diff --git a/ptx/src/test/spirv_run/bfi.ll b/ptx/src/test/spirv_run/bfi.ll index 2fc4191..bea4ac5 100644 --- a/ptx/src/test/spirv_run/bfi.ll +++ b/ptx/src/test/spirv_run/bfi.ll @@ -3,51 +3,49 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__bfi_b32(i32, i32, i32, i32) #0 -define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 { -"45": +define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 { +"44": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"15" to ptr - %"14" = load i32, ptr %"37", align 4 - store i32 %"14", ptr addrspace(5) %"6", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"38" = inttoptr i64 %"17" to ptr - %"53" = getelementptr inbounds i8, ptr %"38", i64 4 - %"16" = load i32, ptr %"53", align 4 - store i32 %"16", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"39" = inttoptr i64 %"19" to ptr - %"55" = getelementptr inbounds i8, ptr %"39", i64 8 - %"18" = load i32, ptr %"55", align 4 - store i32 %"18", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"40" = inttoptr i64 %"21" to ptr - %"57" = getelementptr inbounds i8, ptr %"40", i64 12 - %"20" = load i32, ptr %"57", align 4 - store i32 %"20", ptr addrspace(5) %"9", align 4 - %"23" = load i32, ptr addrspace(5) %"6", align 4 - %"24" = load i32, ptr addrspace(5) %"7", align 4 - %"25" = load i32, ptr addrspace(5) %"8", align 4 - %"26" = load i32, ptr addrspace(5) %"9", align 4 - %"41" = call i32 @__zluda_ptx_impl__bfi_b32(i32 %"23", i32 %"24", i32 %"25", i32 %"26") - store i32 %"41", ptr addrspace(5) %"6", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load i32, ptr addrspace(5) %"6", align 4 - %"44" = inttoptr i64 %"27" to ptr - store i32 %"28", ptr %"44", align 4 + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"14" to ptr + %"13" = load i32, ptr %"36", align 4 + store i32 %"13", ptr addrspace(5) %"6", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"37" = inttoptr i64 %"16" to ptr + %"52" = getelementptr inbounds i8, ptr %"37", i64 4 + %"15" = load i32, ptr %"52", align 4 + store i32 %"15", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"38" = inttoptr i64 %"18" to ptr + %"54" = getelementptr inbounds i8, ptr %"38", i64 8 + %"17" = load i32, ptr %"54", align 4 + store i32 %"17", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"39" = inttoptr i64 %"20" to ptr + %"56" = getelementptr inbounds i8, ptr %"39", i64 12 + %"19" = load i32, ptr %"56", align 4 + store i32 %"19", ptr addrspace(5) %"9", align 4 + %"22" = load i32, ptr addrspace(5) %"6", align 4 + %"23" = load i32, ptr addrspace(5) %"7", align 4 + %"24" = load i32, ptr addrspace(5) %"8", align 4 + %"25" = load i32, ptr addrspace(5) %"9", align 4 + %"40" = call i32 @__zluda_ptx_impl__bfi_b32(i32 %"22", i32 %"23", i32 %"24", i32 %"25") + store i32 %"40", ptr addrspace(5) %"6", align 4 + %"26" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = load i32, ptr addrspace(5) %"6", align 4 + %"43" = inttoptr i64 %"26" to ptr + store i32 %"27", ptr %"43", align 4 ret void } diff --git a/ptx/src/test/spirv_run/bfind.ll b/ptx/src/test/spirv_run/bfind.ll index 4b7dc1b..ebd9fea 100644 --- a/ptx/src/test/spirv_run/bfind.ll +++ b/ptx/src/test/spirv_run/bfind.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { -"53": +define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { +"52": %"12" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"12", align 1 - %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -15,56 +13,56 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"42", pt %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) + %"13" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"13", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"14", ptr addrspace(5) %"4", align 8 - %"15" = load i64, ptr addrspace(4) %"43", align 8 - store i64 %"15", ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"44" = inttoptr i64 %"17" to ptr - %"16" = load i32, ptr %"44", align 4 - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"45" = inttoptr i64 %"19" to ptr - %"55" = getelementptr inbounds i8, ptr %"45", i64 4 - %"18" = load i32, ptr %"55", align 4 - store i32 %"18", ptr addrspace(5) %"7", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"46" = inttoptr i64 %"21" to ptr - %"57" = getelementptr inbounds i8, ptr %"46", i64 8 - %"20" = load i32, ptr %"57", align 4 - store i32 %"20", ptr addrspace(5) %"8", align 4 - %"23" = load i32, ptr addrspace(5) %"6", align 4 - %0 = icmp eq i32 %"23", 0 - %1 = call i32 @llvm.ctlz.i32(i32 %"23", i1 true) + store i64 %"14", ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"43" = inttoptr i64 %"16" to ptr + %"15" = load i32, ptr %"43", align 4 + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"18" to ptr + %"54" = getelementptr inbounds i8, ptr %"44", i64 4 + %"17" = load i32, ptr %"54", align 4 + store i32 %"17", ptr addrspace(5) %"7", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"45" = inttoptr i64 %"20" to ptr + %"56" = getelementptr inbounds i8, ptr %"45", i64 8 + %"19" = load i32, ptr %"56", align 4 + store i32 %"19", ptr addrspace(5) %"8", align 4 + %"22" = load i32, ptr addrspace(5) %"6", align 4 + %0 = icmp eq i32 %"22", 0 + %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) %2 = sub i32 31, %1 - %"47" = select i1 %0, i32 -1, i32 %2 - store i32 %"47", ptr addrspace(5) %"9", align 4 - %"25" = load i32, ptr addrspace(5) %"7", align 4 - %3 = icmp eq i32 %"25", 0 - %4 = call i32 @llvm.ctlz.i32(i32 %"25", i1 true) + %"46" = select i1 %0, i32 -1, i32 %2 + store i32 %"46", ptr addrspace(5) %"9", align 4 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %3 = icmp eq i32 %"24", 0 + %4 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) %5 = sub i32 31, %4 - %"48" = select i1 %3, i32 -1, i32 %5 - store i32 %"48", ptr addrspace(5) %"10", align 4 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %6 = icmp eq i32 %"27", 0 - %7 = call i32 @llvm.ctlz.i32(i32 %"27", i1 true) + %"47" = select i1 %3, i32 -1, i32 %5 + store i32 %"47", ptr addrspace(5) %"10", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %6 = icmp eq i32 %"26", 0 + %7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) %8 = sub i32 31, %7 - %"49" = select i1 %6, i32 -1, i32 %8 - store i32 %"49", ptr addrspace(5) %"11", align 4 - %"28" = load i64, ptr addrspace(5) %"5", align 8 - %"29" = load i32, ptr addrspace(5) %"9", align 4 - %"50" = inttoptr i64 %"28" to ptr - store i32 %"29", ptr %"50", align 4 - %"30" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = load i32, ptr addrspace(5) %"10", align 4 - %"51" = inttoptr i64 %"30" to ptr - %"59" = getelementptr inbounds i8, ptr %"51", i64 4 - store i32 %"31", ptr %"59", align 4 - %"32" = load i64, ptr addrspace(5) %"5", align 8 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %"52" = inttoptr i64 %"32" to ptr - %"61" = getelementptr inbounds i8, ptr %"52", i64 8 - store i32 %"33", ptr %"61", align 4 + %"48" = select i1 %6, i32 -1, i32 %8 + store i32 %"48", ptr addrspace(5) %"11", align 4 + %"27" = load i64, ptr addrspace(5) %"5", align 8 + %"28" = load i32, ptr addrspace(5) %"9", align 4 + %"49" = inttoptr i64 %"27" to ptr + store i32 %"28", ptr %"49", align 4 + %"29" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = load i32, ptr addrspace(5) %"10", align 4 + %"50" = inttoptr i64 %"29" to ptr + %"58" = getelementptr inbounds i8, ptr %"50", i64 4 + store i32 %"30", ptr %"58", align 4 + %"31" = load i64, ptr addrspace(5) %"5", align 8 + %"32" = load i32, ptr addrspace(5) %"11", align 4 + %"51" = inttoptr i64 %"31" to ptr + %"60" = getelementptr inbounds i8, ptr %"51", i64 8 + store i32 %"32", ptr %"60", align 4 ret void } diff --git a/ptx/src/test/spirv_run/bfind_shiftamt.ll b/ptx/src/test/spirv_run/bfind_shiftamt.ll index 6a3ca72..fd21514 100644 --- a/ptx/src/test/spirv_run/bfind_shiftamt.ll +++ b/ptx/src/test/spirv_run/bfind_shiftamt.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { -"53": +define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { +"52": %"12" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"12", align 1 - %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -15,53 +13,53 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) + %"13" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"13", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"14", ptr addrspace(5) %"4", align 8 - %"15" = load i64, ptr addrspace(4) %"43", align 8 - store i64 %"15", ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"44" = inttoptr i64 %"17" to ptr - %"16" = load i32, ptr %"44", align 4 - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"45" = inttoptr i64 %"19" to ptr - %"55" = getelementptr inbounds i8, ptr %"45", i64 4 - %"18" = load i32, ptr %"55", align 4 - store i32 %"18", ptr addrspace(5) %"7", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"46" = inttoptr i64 %"21" to ptr - %"57" = getelementptr inbounds i8, ptr %"46", i64 8 - %"20" = load i32, ptr %"57", align 4 - store i32 %"20", ptr addrspace(5) %"8", align 4 - %"23" = load i32, ptr addrspace(5) %"6", align 4 - %0 = icmp eq i32 %"23", 0 - %1 = call i32 @llvm.ctlz.i32(i32 %"23", i1 true) - %"47" = select i1 %0, i32 -1, i32 %1 - store i32 %"47", ptr addrspace(5) %"9", align 4 - %"25" = load i32, ptr addrspace(5) %"7", align 4 - %2 = icmp eq i32 %"25", 0 - %3 = call i32 @llvm.ctlz.i32(i32 %"25", i1 true) - %"48" = select i1 %2, i32 -1, i32 %3 - store i32 %"48", ptr addrspace(5) %"10", align 4 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %4 = icmp eq i32 %"27", 0 - %5 = call i32 @llvm.ctlz.i32(i32 %"27", i1 true) - %"49" = select i1 %4, i32 -1, i32 %5 - store i32 %"49", ptr addrspace(5) %"11", align 4 - %"28" = load i64, ptr addrspace(5) %"5", align 8 - %"29" = load i32, ptr addrspace(5) %"9", align 4 - %"50" = inttoptr i64 %"28" to ptr - store i32 %"29", ptr %"50", align 4 - %"30" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = load i32, ptr addrspace(5) %"10", align 4 - %"51" = inttoptr i64 %"30" to ptr - %"59" = getelementptr inbounds i8, ptr %"51", i64 4 - store i32 %"31", ptr %"59", align 4 - %"32" = load i64, ptr addrspace(5) %"5", align 8 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %"52" = inttoptr i64 %"32" to ptr - %"61" = getelementptr inbounds i8, ptr %"52", i64 8 - store i32 %"33", ptr %"61", align 4 + store i64 %"14", ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"43" = inttoptr i64 %"16" to ptr + %"15" = load i32, ptr %"43", align 4 + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"18" to ptr + %"54" = getelementptr inbounds i8, ptr %"44", i64 4 + %"17" = load i32, ptr %"54", align 4 + store i32 %"17", ptr addrspace(5) %"7", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"45" = inttoptr i64 %"20" to ptr + %"56" = getelementptr inbounds i8, ptr %"45", i64 8 + %"19" = load i32, ptr %"56", align 4 + store i32 %"19", ptr addrspace(5) %"8", align 4 + %"22" = load i32, ptr addrspace(5) %"6", align 4 + %0 = icmp eq i32 %"22", 0 + %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) + %"46" = select i1 %0, i32 -1, i32 %1 + store i32 %"46", ptr addrspace(5) %"9", align 4 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %2 = icmp eq i32 %"24", 0 + %3 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) + %"47" = select i1 %2, i32 -1, i32 %3 + store i32 %"47", ptr addrspace(5) %"10", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %4 = icmp eq i32 %"26", 0 + %5 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) + %"48" = select i1 %4, i32 -1, i32 %5 + store i32 %"48", ptr addrspace(5) %"11", align 4 + %"27" = load i64, ptr addrspace(5) %"5", align 8 + %"28" = load i32, ptr addrspace(5) %"9", align 4 + %"49" = inttoptr i64 %"27" to ptr + store i32 %"28", ptr %"49", align 4 + %"29" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = load i32, ptr addrspace(5) %"10", align 4 + %"50" = inttoptr i64 %"29" to ptr + %"58" = getelementptr inbounds i8, ptr %"50", i64 4 + store i32 %"30", ptr %"58", align 4 + %"31" = load i64, ptr addrspace(5) %"5", align 8 + %"32" = load i32, ptr addrspace(5) %"11", align 4 + %"51" = inttoptr i64 %"31" to ptr + %"60" = getelementptr inbounds i8, ptr %"51", i64 8 + store i32 %"32", ptr %"60", align 4 ret void } diff --git a/ptx/src/test/spirv_run/block.ll b/ptx/src/test/spirv_run/block.ll index 87c9374..87dd227 100644 --- a/ptx/src/test/spirv_run/block.ll +++ b/ptx/src/test/spirv_run/block.ll @@ -1,35 +1,33 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"27": +define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"26": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"14" to ptr - %"13" = load i64, ptr %"25", align 8 - store i64 %"13", ptr addrspace(5) %"6", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"15" = add i64 %"16", 1 - store i64 %"15", ptr addrspace(5) %"7", align 8 - %"18" = load i64, ptr addrspace(5) %"8", align 8 - %"17" = add i64 %"18", 1 - store i64 %"17", ptr addrspace(5) %"8", align 8 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"7", align 8 - %"26" = inttoptr i64 %"19" to ptr - store i64 %"20", ptr %"26", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"13" to ptr + %"12" = load i64, ptr %"24", align 8 + store i64 %"12", ptr addrspace(5) %"6", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"14" = add i64 %"15", 1 + store i64 %"14", ptr addrspace(5) %"7", align 8 + %"17" = load i64, ptr addrspace(5) %"8", align 8 + %"16" = add i64 %"17", 1 + store i64 %"16", ptr addrspace(5) %"8", align 8 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"7", align 8 + %"25" = inttoptr i64 %"18" to ptr + store i64 %"19", ptr %"25", align 8 ret void } diff --git a/ptx/src/test/spirv_run/bra.ll b/ptx/src/test/spirv_run/bra.ll index 6188dc7..6d62cca 100644 --- a/ptx/src/test/spirv_run/bra.ll +++ b/ptx/src/test/spirv_run/bra.ll @@ -1,43 +1,41 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"29": +define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { +"28": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"24", align 8 + store i64 %"12", ptr addrspace(5) %"7", align 8 %"13" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"13", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"14", ptr addrspace(5) %"8", align 8 - %"16" = load i64, ptr addrspace(5) %"7", align 8 - %"27" = inttoptr i64 %"16" to ptr - %"15" = load i64, ptr %"27", align 8 - store i64 %"15", ptr addrspace(5) %"9", align 8 + store i64 %"13", ptr addrspace(5) %"8", align 8 + %"15" = load i64, ptr addrspace(5) %"7", align 8 + %"26" = inttoptr i64 %"15" to ptr + %"14" = load i64, ptr %"26", align 8 + store i64 %"14", ptr addrspace(5) %"9", align 8 br label %"4" -"4": ; preds = %"29" - %"18" = load i64, ptr addrspace(5) %"9", align 8 - %"17" = add i64 %"18", 1 - store i64 %"17", ptr addrspace(5) %"10", align 8 +"4": ; preds = %"28" + %"17" = load i64, ptr addrspace(5) %"9", align 8 + %"16" = add i64 %"17", 1 + store i64 %"16", ptr addrspace(5) %"10", align 8 br label %"6" 0: ; No predecessors! - %"20" = load i64, ptr addrspace(5) %"9", align 8 - %"19" = add i64 %"20", 2 - store i64 %"19", ptr addrspace(5) %"10", align 8 + %"19" = load i64, ptr addrspace(5) %"9", align 8 + %"18" = add i64 %"19", 2 + store i64 %"18", ptr addrspace(5) %"10", align 8 br label %"6" "6": ; preds = %0, %"4" - %"21" = load i64, ptr addrspace(5) %"8", align 8 - %"22" = load i64, ptr addrspace(5) %"10", align 8 - %"28" = inttoptr i64 %"21" to ptr - store i64 %"22", ptr %"28", align 8 + %"20" = load i64, ptr addrspace(5) %"8", align 8 + %"21" = load i64, ptr addrspace(5) %"10", align 8 + %"27" = inttoptr i64 %"20" to ptr + store i64 %"21", ptr %"27", align 8 ret void } diff --git a/ptx/src/test/spirv_run/brev.ll b/ptx/src/test/spirv_run/brev.ll index e43d1c6..a519c2b 100644 --- a/ptx/src/test/spirv_run/brev.ll +++ b/ptx/src/test/spirv_run/brev.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"19", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"13" = call i32 @llvm.bitreverse.i32(i32 %"14") - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"18", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"12" = call i32 @llvm.bitreverse.i32(i32 %"13") + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/call.ll b/ptx/src/test/spirv_run/call.ll index af26549..d89322e 100644 --- a/ptx/src/test/spirv_run/call.ll +++ b/ptx/src/test/spirv_run/call.ll @@ -1,63 +1,59 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private i64 @incr(i64 %"31") #0 { -"51": +define private i64 @incr(i64 %"29") #0 { +"49": %"18" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 - %"44" = alloca i64, align 8, addrspace(5) - %"45" = alloca i64, align 8, addrspace(5) - %"14" = alloca i64, align 8, addrspace(5) - store i64 %"31", ptr addrspace(5) %"18", align 8 - %"32" = load i64, ptr addrspace(5) %"18", align 8 - store i64 %"32", ptr addrspace(5) %"45", align 8 - %"33" = load i64, ptr addrspace(5) %"45", align 8 - store i64 %"33", ptr addrspace(5) %"14", align 8 - %"35" = load i64, ptr addrspace(5) %"14", align 8 - %"34" = add i64 %"35", 1 - store i64 %"34", ptr addrspace(5) %"14", align 8 - %"36" = load i64, ptr addrspace(5) %"14", align 8 - store i64 %"36", ptr addrspace(5) %"44", align 8 - %"37" = load i64, ptr addrspace(5) %"44", align 8 - store i64 %"37", ptr addrspace(5) %"17", align 8 - %"38" = load i64, ptr addrspace(5) %"17", align 8 - ret i64 %"38" -} - -define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { -"50": - %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"20" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"20", align 1 + %"42" = alloca i64, align 8, addrspace(5) + %"43" = alloca i64, align 8, addrspace(5) + %"14" = alloca i64, align 8, addrspace(5) + store i64 %"29", ptr addrspace(5) %"18", align 8 + %"30" = load i64, ptr addrspace(5) %"18", align 8 + store i64 %"30", ptr addrspace(5) %"43", align 8 + %"31" = load i64, ptr addrspace(5) %"43", align 8 + store i64 %"31", ptr addrspace(5) %"14", align 8 + %"33" = load i64, ptr addrspace(5) %"14", align 8 + %"32" = add i64 %"33", 1 + store i64 %"32", ptr addrspace(5) %"14", align 8 + %"34" = load i64, ptr addrspace(5) %"14", align 8 + store i64 %"34", ptr addrspace(5) %"42", align 8 + %"35" = load i64, ptr addrspace(5) %"42", align 8 + store i64 %"35", ptr addrspace(5) %"17", align 8 + %"36" = load i64, ptr addrspace(5) %"17", align 8 + ret i64 %"36" +} + +define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { +"48": + %"19" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"19", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) - %"42" = alloca i64, align 8, addrspace(5) - %"43" = alloca i64, align 8, addrspace(5) - %"23" = load i64, ptr addrspace(4) %"40", align 8 - store i64 %"23", ptr addrspace(5) %"7", align 8 - %"24" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"24", ptr addrspace(5) %"8", align 8 - %"26" = load i64, ptr addrspace(5) %"7", align 8 - %"46" = inttoptr i64 %"26" to ptr addrspace(1) - %"25" = load i64, ptr addrspace(1) %"46", align 8 - store i64 %"25", ptr addrspace(5) %"9", align 8 - %"27" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"27", ptr addrspace(5) %"42", align 8 - %"15" = load i64, ptr addrspace(5) %"42", align 8 + %"40" = alloca i64, align 8, addrspace(5) + %"41" = alloca i64, align 8, addrspace(5) + %"21" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"21", ptr addrspace(5) %"7", align 8 + %"22" = load i64, ptr addrspace(4) %"39", align 8 + store i64 %"22", ptr addrspace(5) %"8", align 8 + %"24" = load i64, ptr addrspace(5) %"7", align 8 + %"44" = inttoptr i64 %"24" to ptr addrspace(1) + %"23" = load i64, ptr addrspace(1) %"44", align 8 + store i64 %"23", ptr addrspace(5) %"9", align 8 + %"25" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"25", ptr addrspace(5) %"40", align 8 + %"15" = load i64, ptr addrspace(5) %"40", align 8 %"16" = call i64 @incr(i64 %"15") - store i64 %"16", ptr addrspace(5) %"43", align 8 - %"28" = load i64, ptr addrspace(5) %"43", align 8 - store i64 %"28", ptr addrspace(5) %"9", align 8 - %"29" = load i64, ptr addrspace(5) %"8", align 8 - %"30" = load i64, ptr addrspace(5) %"9", align 8 - %"49" = inttoptr i64 %"29" to ptr addrspace(1) - store i64 %"30", ptr addrspace(1) %"49", align 8 + store i64 %"16", ptr addrspace(5) %"41", align 8 + %"26" = load i64, ptr addrspace(5) %"41", align 8 + store i64 %"26", ptr addrspace(5) %"9", align 8 + %"27" = load i64, ptr addrspace(5) %"8", align 8 + %"28" = load i64, ptr addrspace(5) %"9", align 8 + %"47" = inttoptr i64 %"27" to ptr addrspace(1) + store i64 %"28", ptr addrspace(1) %"47", align 8 ret void } diff --git a/ptx/src/test/spirv_run/call_bug.ll b/ptx/src/test/spirv_run/call_bug.ll index 749b2b6..3ad9146 100644 --- a/ptx/src/test/spirv_run/call_bug.ll +++ b/ptx/src/test/spirv_run/call_bug.ll @@ -1,68 +1,64 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private [2 x i32] @incr(i64 %"23") #0 { -"58": +define private [2 x i32] @incr(i64 %"21") #0 { +"56": %"16" = alloca i64, align 8, addrspace(5) %"15" = alloca [2 x i32], align 4, addrspace(5) %"19" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"19", align 1 - %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 - %"44" = alloca [2 x i32], align 4, addrspace(5) - %"45" = alloca i64, align 8, addrspace(5) + %"42" = alloca [2 x i32], align 4, addrspace(5) + %"43" = alloca i64, align 8, addrspace(5) %"4" = alloca i64, align 8, addrspace(5) - store i64 %"23", ptr addrspace(5) %"16", align 8 - %"24" = load i64, ptr addrspace(5) %"16", align 8 - store i64 %"24", ptr addrspace(5) %"45", align 8 - %"25" = load i64, ptr addrspace(5) %"45", align 8 - store i64 %"25", ptr addrspace(5) %"4", align 8 - %"27" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = add i64 %"27", 1 - store i64 %"26", ptr addrspace(5) %"4", align 8 - %"28" = load i64, ptr addrspace(5) %"4", align 8 - store i64 %"28", ptr addrspace(5) %"44", align 8 - %"29" = load [2 x i32], ptr addrspace(5) %"44", align 4 - store [2 x i32] %"29", ptr addrspace(5) %"15", align 4 - %"30" = load [2 x i32], ptr addrspace(5) %"15", align 4 - ret [2 x i32] %"30" + store i64 %"21", ptr addrspace(5) %"16", align 8 + %"22" = load i64, ptr addrspace(5) %"16", align 8 + store i64 %"22", ptr addrspace(5) %"43", align 8 + %"23" = load i64, ptr addrspace(5) %"43", align 8 + store i64 %"23", ptr addrspace(5) %"4", align 8 + %"25" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = add i64 %"25", 1 + store i64 %"24", ptr addrspace(5) %"4", align 8 + %"26" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"26", ptr addrspace(5) %"42", align 8 + %"27" = load [2 x i32], ptr addrspace(5) %"42", align 4 + store [2 x i32] %"27", ptr addrspace(5) %"15", align 4 + %"28" = load [2 x i32], ptr addrspace(5) %"15", align 4 + ret [2 x i32] %"28" } -define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"59": - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 +define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { +"57": + %"20" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"20", align 1 %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) - %"48" = alloca i64, align 8, addrspace(5) - %"49" = alloca [2 x i32], align 4, addrspace(5) - %"31" = load i64, ptr addrspace(4) %"46", align 8 - store i64 %"31", ptr addrspace(5) %"8", align 8 - %"32" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"32", ptr addrspace(5) %"9", align 8 - %"34" = load i64, ptr addrspace(5) %"8", align 8 - %"52" = inttoptr i64 %"34" to ptr addrspace(1) - %"33" = load i64, ptr addrspace(1) %"52", align 8 - store i64 %"33", ptr addrspace(5) %"10", align 8 - %"35" = load i64, ptr addrspace(5) %"10", align 8 - store i64 %"35", ptr addrspace(5) %"48", align 8 + %"46" = alloca i64, align 8, addrspace(5) + %"47" = alloca [2 x i32], align 4, addrspace(5) + %"29" = load i64, ptr addrspace(4) %"44", align 8 + store i64 %"29", ptr addrspace(5) %"8", align 8 + %"30" = load i64, ptr addrspace(4) %"45", align 8 + store i64 %"30", ptr addrspace(5) %"9", align 8 + %"32" = load i64, ptr addrspace(5) %"8", align 8 + %"50" = inttoptr i64 %"32" to ptr addrspace(1) + %"31" = load i64, ptr addrspace(1) %"50", align 8 + store i64 %"31", ptr addrspace(5) %"10", align 8 + %"33" = load i64, ptr addrspace(5) %"10", align 8 + store i64 %"33", ptr addrspace(5) %"46", align 8 store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"11", align 8 - %"17" = load i64, ptr addrspace(5) %"48", align 8 - %"37" = load i64, ptr addrspace(5) %"11", align 8 - %0 = inttoptr i64 %"37" to ptr + %"17" = load i64, ptr addrspace(5) %"46", align 8 + %"35" = load i64, ptr addrspace(5) %"11", align 8 + %0 = inttoptr i64 %"35" to ptr %"18" = call [2 x i32] %0(i64 %"17") - store [2 x i32] %"18", ptr addrspace(5) %"49", align 4 - %"61" = getelementptr inbounds i8, ptr addrspace(5) %"49", i64 0 - %"38" = load i64, ptr addrspace(5) %"61", align 8 - store i64 %"38", ptr addrspace(5) %"10", align 8 - %"39" = load i64, ptr addrspace(5) %"9", align 8 - %"40" = load i64, ptr addrspace(5) %"10", align 8 - %"57" = inttoptr i64 %"39" to ptr addrspace(1) - store i64 %"40", ptr addrspace(1) %"57", align 8 + store [2 x i32] %"18", ptr addrspace(5) %"47", align 4 + %"59" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0 + %"36" = load i64, ptr addrspace(5) %"59", align 8 + store i64 %"36", ptr addrspace(5) %"10", align 8 + %"37" = load i64, ptr addrspace(5) %"9", align 8 + %"38" = load i64, ptr addrspace(5) %"10", align 8 + %"55" = inttoptr i64 %"37" to ptr addrspace(1) + store i64 %"38", ptr addrspace(1) %"55", align 8 ret void } diff --git a/ptx/src/test/spirv_run/call_multi_return.ll b/ptx/src/test/spirv_run/call_multi_return.ll index a6cb883..35cc5e0 100644 --- a/ptx/src/test/spirv_run/call_multi_return.ll +++ b/ptx/src/test/spirv_run/call_multi_return.ll @@ -3,43 +3,39 @@ target triple = "amdgcn-amd-amdhsa" %struct.i64i32 = type { i64, i32 } -define private %struct.i64i32 @"1"(i32 %"41", i32 %"42") #0 { -"64": +define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 { +"62": %"18" = alloca i32, align 4, addrspace(5) %"19" = alloca i32, align 4, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i32, align 4, addrspace(5) - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 - %"24" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"24", align 1 + %"22" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"22", align 1 %"20" = alloca i32, align 4, addrspace(5) - store i32 %"41", ptr addrspace(5) %"18", align 4 - store i32 %"42", ptr addrspace(5) %"19", align 4 - %"44" = load i32, ptr addrspace(5) %"18", align 4 - %"45" = load i32, ptr addrspace(5) %"19", align 4 - %"43" = add i32 %"44", %"45" - store i32 %"43", ptr addrspace(5) %"20", align 4 - %"47" = load i32, ptr addrspace(5) %"20", align 4 - %"46" = zext i32 %"47" to i64 - store i64 %"46", ptr addrspace(5) %"16", align 8 - %"49" = load i32, ptr addrspace(5) %"18", align 4 - %"50" = load i32, ptr addrspace(5) %"19", align 4 - %"48" = mul i32 %"49", %"50" - store i32 %"48", ptr addrspace(5) %"17", align 4 - %"51" = load i64, ptr addrspace(5) %"16", align 8 - %"52" = load i32, ptr addrspace(5) %"17", align 4 - %0 = insertvalue %struct.i64i32 undef, i64 %"51", 0 - %1 = insertvalue %struct.i64i32 %0, i32 %"52", 1 + store i32 %"39", ptr addrspace(5) %"18", align 4 + store i32 %"40", ptr addrspace(5) %"19", align 4 + %"42" = load i32, ptr addrspace(5) %"18", align 4 + %"43" = load i32, ptr addrspace(5) %"19", align 4 + %"41" = add i32 %"42", %"43" + store i32 %"41", ptr addrspace(5) %"20", align 4 + %"45" = load i32, ptr addrspace(5) %"20", align 4 + %"44" = zext i32 %"45" to i64 + store i64 %"44", ptr addrspace(5) %"16", align 8 + %"47" = load i32, ptr addrspace(5) %"18", align 4 + %"48" = load i32, ptr addrspace(5) %"19", align 4 + %"46" = mul i32 %"47", %"48" + store i32 %"46", ptr addrspace(5) %"17", align 4 + %"49" = load i64, ptr addrspace(5) %"16", align 8 + %"50" = load i32, ptr addrspace(5) %"17", align 4 + %0 = insertvalue %struct.i64i32 undef, i64 %"49", 0 + %1 = insertvalue %struct.i64i32 %0, i32 %"50", 1 ret %struct.i64i32 %1 } -define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #0 { -"63": +define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #0 { +"61": %"21" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) @@ -47,38 +43,38 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6 %"13" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i32, align 4, addrspace(5) - %"25" = load i64, ptr addrspace(4) %"57", align 8 - store i64 %"25", ptr addrspace(5) %"9", align 8 - %"26" = load i64, ptr addrspace(4) %"58", align 8 - store i64 %"26", ptr addrspace(5) %"10", align 8 + %"23" = load i64, ptr addrspace(4) %"55", align 8 + store i64 %"23", ptr addrspace(5) %"9", align 8 + %"24" = load i64, ptr addrspace(4) %"56", align 8 + store i64 %"24", ptr addrspace(5) %"10", align 8 + %"26" = load i64, ptr addrspace(5) %"9", align 8 + %"57" = inttoptr i64 %"26" to ptr addrspace(1) + %"25" = load i32, ptr addrspace(1) %"57", align 4 + store i32 %"25", ptr addrspace(5) %"11", align 4 %"28" = load i64, ptr addrspace(5) %"9", align 8 - %"59" = inttoptr i64 %"28" to ptr addrspace(1) - %"27" = load i32, ptr addrspace(1) %"59", align 4 - store i32 %"27", ptr addrspace(5) %"11", align 4 - %"30" = load i64, ptr addrspace(5) %"9", align 8 - %"60" = inttoptr i64 %"30" to ptr addrspace(1) - %"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 4 - %"29" = load i32, ptr addrspace(1) %"66", align 4 - store i32 %"29", ptr addrspace(5) %"12", align 4 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %"34" = load i32, ptr addrspace(5) %"12", align 4 - %0 = call %struct.i64i32 @"1"(i32 %"33", i32 %"34") - %"31" = extractvalue %struct.i64i32 %0, 0 - %"32" = extractvalue %struct.i64i32 %0, 1 - store i64 %"31", ptr addrspace(5) %"13", align 8 - store i32 %"32", ptr addrspace(5) %"15", align 4 - %"36" = load i32, ptr addrspace(5) %"15", align 4 - %"35" = zext i32 %"36" to i64 - store i64 %"35", ptr addrspace(5) %"14", align 8 + %"58" = inttoptr i64 %"28" to ptr addrspace(1) + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4 + %"27" = load i32, ptr addrspace(1) %"64", align 4 + store i32 %"27", ptr addrspace(5) %"12", align 4 + %"31" = load i32, ptr addrspace(5) %"11", align 4 + %"32" = load i32, ptr addrspace(5) %"12", align 4 + %0 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32") + %"29" = extractvalue %struct.i64i32 %0, 0 + %"30" = extractvalue %struct.i64i32 %0, 1 + store i64 %"29", ptr addrspace(5) %"13", align 8 + store i32 %"30", ptr addrspace(5) %"15", align 4 + %"34" = load i32, ptr addrspace(5) %"15", align 4 + %"33" = zext i32 %"34" to i64 + store i64 %"33", ptr addrspace(5) %"14", align 8 + %"35" = load i64, ptr addrspace(5) %"10", align 8 + %"36" = load i64, ptr addrspace(5) %"13", align 8 + %"59" = inttoptr i64 %"35" to ptr addrspace(1) + store i64 %"36", ptr addrspace(1) %"59", align 8 %"37" = load i64, ptr addrspace(5) %"10", align 8 - %"38" = load i64, ptr addrspace(5) %"13", align 8 - %"61" = inttoptr i64 %"37" to ptr addrspace(1) - store i64 %"38", ptr addrspace(1) %"61", align 8 - %"39" = load i64, ptr addrspace(5) %"10", align 8 - %"40" = load i64, ptr addrspace(5) %"14", align 8 - %"62" = inttoptr i64 %"39" to ptr addrspace(1) - %"68" = getelementptr inbounds i8, ptr addrspace(1) %"62", i64 8 - store i64 %"40", ptr addrspace(1) %"68", align 8 + %"38" = load i64, ptr addrspace(5) %"14", align 8 + %"60" = inttoptr i64 %"37" to ptr addrspace(1) + %"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8 + store i64 %"38", ptr addrspace(1) %"66", align 8 ret void } diff --git a/ptx/src/test/spirv_run/callprototype.ll b/ptx/src/test/spirv_run/callprototype.ll index 84e5987..be431ea 100644 --- a/ptx/src/test/spirv_run/callprototype.ll +++ b/ptx/src/test/spirv_run/callprototype.ll @@ -1,67 +1,63 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private i64 @incr(i64 %"35") #0 { -"56": +define private i64 @incr(i64 %"33") #0 { +"54": %"20" = alloca i64, align 8, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 - %"24" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"24", align 1 - %"48" = alloca i64, align 8, addrspace(5) - %"49" = alloca i64, align 8, addrspace(5) - %"16" = alloca i64, align 8, addrspace(5) - store i64 %"35", ptr addrspace(5) %"20", align 8 - %"36" = load i64, ptr addrspace(5) %"20", align 8 - store i64 %"36", ptr addrspace(5) %"49", align 8 - %"37" = load i64, ptr addrspace(5) %"49", align 8 - store i64 %"37", ptr addrspace(5) %"16", align 8 - %"39" = load i64, ptr addrspace(5) %"16", align 8 - %"38" = add i64 %"39", 1 - store i64 %"38", ptr addrspace(5) %"16", align 8 - %"40" = load i64, ptr addrspace(5) %"16", align 8 - store i64 %"40", ptr addrspace(5) %"48", align 8 - %"41" = load i64, ptr addrspace(5) %"48", align 8 - store i64 %"41", ptr addrspace(5) %"19", align 8 - %"42" = load i64, ptr addrspace(5) %"19", align 8 - ret i64 %"42" -} - -define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { -"55": - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 %"22" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"22", align 1 + %"46" = alloca i64, align 8, addrspace(5) + %"47" = alloca i64, align 8, addrspace(5) + %"16" = alloca i64, align 8, addrspace(5) + store i64 %"33", ptr addrspace(5) %"20", align 8 + %"34" = load i64, ptr addrspace(5) %"20", align 8 + store i64 %"34", ptr addrspace(5) %"47", align 8 + %"35" = load i64, ptr addrspace(5) %"47", align 8 + store i64 %"35", ptr addrspace(5) %"16", align 8 + %"37" = load i64, ptr addrspace(5) %"16", align 8 + %"36" = add i64 %"37", 1 + store i64 %"36", ptr addrspace(5) %"16", align 8 + %"38" = load i64, ptr addrspace(5) %"16", align 8 + store i64 %"38", ptr addrspace(5) %"46", align 8 + %"39" = load i64, ptr addrspace(5) %"46", align 8 + store i64 %"39", ptr addrspace(5) %"19", align 8 + %"40" = load i64, ptr addrspace(5) %"19", align 8 + ret i64 %"40" +} + +define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { +"53": + %"21" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"21", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) - %"46" = alloca i64, align 8, addrspace(5) - %"47" = alloca i64, align 8, addrspace(5) - %"25" = load i64, ptr addrspace(4) %"44", align 8 - store i64 %"25", ptr addrspace(5) %"7", align 8 - %"26" = load i64, ptr addrspace(4) %"45", align 8 - store i64 %"26", ptr addrspace(5) %"8", align 8 - %"28" = load i64, ptr addrspace(5) %"7", align 8 - %"50" = inttoptr i64 %"28" to ptr addrspace(1) - %"27" = load i64, ptr addrspace(1) %"50", align 8 - store i64 %"27", ptr addrspace(5) %"9", align 8 - %"29" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"29", ptr addrspace(5) %"46", align 8 + %"44" = alloca i64, align 8, addrspace(5) + %"45" = alloca i64, align 8, addrspace(5) + %"23" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"23", ptr addrspace(5) %"7", align 8 + %"24" = load i64, ptr addrspace(4) %"43", align 8 + store i64 %"24", ptr addrspace(5) %"8", align 8 + %"26" = load i64, ptr addrspace(5) %"7", align 8 + %"48" = inttoptr i64 %"26" to ptr addrspace(1) + %"25" = load i64, ptr addrspace(1) %"48", align 8 + store i64 %"25", ptr addrspace(5) %"9", align 8 + %"27" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"27", ptr addrspace(5) %"44", align 8 store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"10", align 8 - %"17" = load i64, ptr addrspace(5) %"46", align 8 - %"31" = load i64, ptr addrspace(5) %"10", align 8 - %0 = inttoptr i64 %"31" to ptr + %"17" = load i64, ptr addrspace(5) %"44", align 8 + %"29" = load i64, ptr addrspace(5) %"10", align 8 + %0 = inttoptr i64 %"29" to ptr %"18" = call i64 %0(i64 %"17") - store i64 %"18", ptr addrspace(5) %"47", align 8 - %"32" = load i64, ptr addrspace(5) %"47", align 8 - store i64 %"32", ptr addrspace(5) %"9", align 8 - %"33" = load i64, ptr addrspace(5) %"8", align 8 - %"34" = load i64, ptr addrspace(5) %"9", align 8 - %"54" = inttoptr i64 %"33" to ptr addrspace(1) - store i64 %"34", ptr addrspace(1) %"54", align 8 + store i64 %"18", ptr addrspace(5) %"45", align 8 + %"30" = load i64, ptr addrspace(5) %"45", align 8 + store i64 %"30", ptr addrspace(5) %"9", align 8 + %"31" = load i64, ptr addrspace(5) %"8", align 8 + %"32" = load i64, ptr addrspace(5) %"9", align 8 + %"52" = inttoptr i64 %"31" to ptr addrspace(1) + store i64 %"32", ptr addrspace(1) %"52", align 8 ret void } diff --git a/ptx/src/test/spirv_run/carry_mixed.ll b/ptx/src/test/spirv_run/carry_mixed.ll deleted file mode 100644 index c33cc5e..0000000 --- a/ptx/src/test/spirv_run/carry_mixed.ll +++ /dev/null @@ -1,51 +0,0 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -target triple = "amdgcn-amd-amdhsa" - -define protected amdgpu_kernel void @carry_mixed(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"44": - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 - %"4" = alloca i64, align 8, addrspace(5) - %"5" = alloca i64, align 8, addrspace(5) - %"6" = alloca i32, align 4, addrspace(5) - %"7" = alloca i32, align 4, addrspace(5) - %"8" = alloca i32, align 4, addrspace(5) - %"11" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"36" = extractvalue { i32, i1 } %0, 0 - %"13" = extractvalue { i32, i1 } %0, 1 - store i32 %"36", ptr addrspace(5) %"6", align 4 - store i1 %"13", ptr addrspace(5) %"10", align 1 - %"15" = load i1, ptr addrspace(5) %"10", align 1 - %1 = zext i1 %"15" to i32 - %"37" = sub i32 2, %1 - store i32 %"37", ptr addrspace(5) %"7", align 4 - %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"38" = extractvalue { i32, i1 } %2, 0 - %"17" = extractvalue { i32, i1 } %2, 1 - store i32 %"38", ptr addrspace(5) %"6", align 4 - store i1 %"17", ptr addrspace(5) %"10", align 1 - %"19" = load i1, ptr addrspace(5) %"9", align 1 - %3 = zext i1 %"19" to i32 - %"39" = add i32 1, %3 - store i32 %"39", ptr addrspace(5) %"8", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i32, ptr addrspace(5) %"7", align 4 - %"40" = inttoptr i64 %"20" to ptr - store i32 %"21", ptr %"40", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load i32, ptr addrspace(5) %"8", align 4 - %"42" = inttoptr i64 %"22" to ptr - %"46" = getelementptr inbounds i8, ptr %"42", i64 4 - store i32 %"23", ptr %"46", align 4 - ret void -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 - -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/carry_mixed.ptx b/ptx/src/test/spirv_run/carry_mixed.ptx deleted file mode 100644 index b4f2caa..0000000 --- a/ptx/src/test/spirv_run/carry_mixed.ptx +++ /dev/null @@ -1,32 +0,0 @@ -.version 6.5 -.target sm_30 -.address_size 64 - -.visible .entry carry_mixed( - .param .u64 input, - .param .u64 output -) -{ - .reg .u64 in_addr; - .reg .u64 out_addr; - .reg .b32 unused; - - .reg .b32 carry_out_1; - .reg .b32 carry_out_2; - - ld.param.u64 out_addr, [output]; - - // set carry with sub - sub.cc.s32 unused, 0, 1; - // write carry with sub - subc.s32 carry_out_1, 2, 0; - - // set carry with sub - sub.cc.s32 unused, 0, 1; - // fail writing carry with add - addc.s32 carry_out_2, 1, 0; - - st.s32 [out_addr], carry_out_1; - st.s32 [out_addr+4], carry_out_2; - ret; -} diff --git a/ptx/src/test/spirv_run/carry_set_all.ll b/ptx/src/test/spirv_run/carry_set_all.ll new file mode 100644 index 0000000..8b412c1 --- /dev/null +++ b/ptx/src/test/spirv_run/carry_set_all.ll @@ -0,0 +1,257 @@ +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" +target triple = "amdgcn-amd-amdhsa" + +define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %"208", ptr addrspace(4) byref(i64) %"209") #0 { +"268": + %"22" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"22", align 1 + %"4" = alloca i64, align 8, addrspace(5) + %"5" = alloca i64, align 8, addrspace(5) + %"6" = alloca i32, align 4, addrspace(5) + %"7" = alloca i32, align 4, addrspace(5) + %"8" = alloca i32, align 4, addrspace(5) + %"9" = alloca i32, align 4, addrspace(5) + %"10" = alloca i32, align 4, addrspace(5) + %"11" = alloca i32, align 4, addrspace(5) + %"12" = alloca i32, align 4, addrspace(5) + %"13" = alloca i32, align 4, addrspace(5) + %"14" = alloca i32, align 4, addrspace(5) + %"15" = alloca i32, align 4, addrspace(5) + %"16" = alloca i32, align 4, addrspace(5) + %"17" = alloca i32, align 4, addrspace(5) + %"18" = alloca i32, align 4, addrspace(5) + %"19" = alloca i32, align 4, addrspace(5) + %"20" = alloca i32, align 4, addrspace(5) + %"21" = alloca i32, align 4, addrspace(5) + %"37" = load i64, ptr addrspace(4) %"209", align 8 + store i64 %"37", ptr addrspace(5) %"5", align 8 + %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) + %"210" = extractvalue { i32, i1 } %0, 0 + %"23" = extractvalue { i32, i1 } %0, 1 + store i32 %"210", ptr addrspace(5) %"6", align 4 + %"39" = xor i1 %"23", true + store i1 %"39", ptr addrspace(5) %"22", align 1 + %"41" = load i1, ptr addrspace(5) %"22", align 1 + %1 = zext i1 %"41" to i32 + %"211" = add i32 0, %1 + store i32 %"211", ptr addrspace(5) %"6", align 4 + %"42" = load i1, ptr addrspace(5) %"22", align 1 + %"24" = xor i1 %"42", true + %2 = zext i1 %"24" to i32 + %"212" = sub i32 0, %2 + store i32 %"212", ptr addrspace(5) %"7", align 4 + %3 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) + %"213" = extractvalue { i32, i1 } %3, 0 + %"25" = extractvalue { i32, i1 } %3, 1 + store i32 %"213", ptr addrspace(5) %"8", align 4 + %"45" = xor i1 %"25", true + store i1 %"45", ptr addrspace(5) %"22", align 1 + %"47" = load i1, ptr addrspace(5) %"22", align 1 + %4 = zext i1 %"47" to i32 + %"214" = add i32 0, %4 + store i32 %"214", ptr addrspace(5) %"8", align 4 + %"48" = load i1, ptr addrspace(5) %"22", align 1 + %"26" = xor i1 %"48", true + %5 = zext i1 %"26" to i32 + %"215" = sub i32 0, %5 + store i32 %"215", ptr addrspace(5) %"9", align 4 + %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"216" = extractvalue { i32, i1 } %6, 0 + %"51" = extractvalue { i32, i1 } %6, 1 + store i32 %"216", ptr addrspace(5) %"10", align 4 + store i1 %"51", ptr addrspace(5) %"22", align 1 + %"53" = load i1, ptr addrspace(5) %"22", align 1 + %7 = zext i1 %"53" to i32 + %"217" = add i32 0, %7 + store i32 %"217", ptr addrspace(5) %"10", align 4 + %"54" = load i1, ptr addrspace(5) %"22", align 1 + %"27" = xor i1 %"54", true + %8 = zext i1 %"27" to i32 + %"218" = sub i32 0, %8 + store i32 %"218", ptr addrspace(5) %"11", align 4 + %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"219" = extractvalue { i32, i1 } %9, 0 + %"57" = extractvalue { i32, i1 } %9, 1 + store i32 %"219", ptr addrspace(5) %"12", align 4 + store i1 %"57", ptr addrspace(5) %"22", align 1 + %"59" = load i1, ptr addrspace(5) %"22", align 1 + %10 = zext i1 %"59" to i32 + %"220" = add i32 0, %10 + store i32 %"220", ptr addrspace(5) %"12", align 4 + %"60" = load i1, ptr addrspace(5) %"22", align 1 + %"28" = xor i1 %"60", true + %11 = zext i1 %"28" to i32 + %"221" = sub i32 0, %11 + store i32 %"221", ptr addrspace(5) %"13", align 4 + %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"222" = extractvalue { i32, i1 } %12, 0 + %"63" = extractvalue { i32, i1 } %12, 1 + store i32 %"222", ptr addrspace(5) %"14", align 4 + store i1 %"63", ptr addrspace(5) %"22", align 1 + %"65" = load i1, ptr addrspace(5) %"22", align 1 + %13 = zext i1 %"65" to i32 + %"223" = add i32 0, %13 + store i32 %"223", ptr addrspace(5) %"14", align 4 + %"66" = load i1, ptr addrspace(5) %"22", align 1 + %"29" = xor i1 %"66", true + %14 = zext i1 %"29" to i32 + %"224" = sub i32 0, %14 + store i32 %"224", ptr addrspace(5) %"15", align 4 + %15 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"225" = extractvalue { i32, i1 } %15, 0 + %"69" = extractvalue { i32, i1 } %15, 1 + store i32 %"225", ptr addrspace(5) %"16", align 4 + store i1 %"69", ptr addrspace(5) %"22", align 1 + %"71" = load i1, ptr addrspace(5) %"22", align 1 + %16 = zext i1 %"71" to i32 + %"226" = add i32 0, %16 + store i32 %"226", ptr addrspace(5) %"16", align 4 + %"72" = load i1, ptr addrspace(5) %"22", align 1 + %"30" = xor i1 %"72", true + %17 = zext i1 %"30" to i32 + %"227" = sub i32 0, %17 + store i32 %"227", ptr addrspace(5) %"17", align 4 + %18 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"228" = extractvalue { i32, i1 } %18, 0 + %"75" = extractvalue { i32, i1 } %18, 1 + store i32 %"228", ptr addrspace(5) %"18", align 4 + store i1 %"75", ptr addrspace(5) %"22", align 1 + %"76" = load i1, ptr addrspace(5) %"22", align 1 + %"31" = xor i1 %"76", true + %19 = zext i1 %"31" to i32 + %20 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) + %21 = extractvalue { i32, i1 } %20, 0 + %22 = extractvalue { i32, i1 } %20, 1 + %23 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %21, i32 %19) + %"229" = extractvalue { i32, i1 } %23, 0 + %24 = extractvalue { i32, i1 } %23, 1 + %"32" = xor i1 %22, %24 + store i32 %"229", ptr addrspace(5) %"18", align 4 + %"78" = xor i1 %"32", true + store i1 %"78", ptr addrspace(5) %"22", align 1 + %"80" = load i1, ptr addrspace(5) %"22", align 1 + %25 = zext i1 %"80" to i32 + %"230" = add i32 0, %25 + store i32 %"230", ptr addrspace(5) %"18", align 4 + %"81" = load i1, ptr addrspace(5) %"22", align 1 + %"33" = xor i1 %"81", true + %26 = zext i1 %"33" to i32 + %"231" = sub i32 0, %26 + store i32 %"231", ptr addrspace(5) %"19", align 4 + %27 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"232" = extractvalue { i32, i1 } %27, 0 + %"84" = extractvalue { i32, i1 } %27, 1 + store i32 %"232", ptr addrspace(5) %"20", align 4 + store i1 %"84", ptr addrspace(5) %"22", align 1 + %"85" = load i1, ptr addrspace(5) %"22", align 1 + %"34" = xor i1 %"85", true + %28 = zext i1 %"34" to i32 + %29 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) + %30 = extractvalue { i32, i1 } %29, 0 + %31 = extractvalue { i32, i1 } %29, 1 + %32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %30, i32 %28) + %"233" = extractvalue { i32, i1 } %32, 0 + %33 = extractvalue { i32, i1 } %32, 1 + %"35" = xor i1 %31, %33 + store i32 %"233", ptr addrspace(5) %"20", align 4 + %"87" = xor i1 %"35", true + store i1 %"87", ptr addrspace(5) %"22", align 1 + %"89" = load i1, ptr addrspace(5) %"22", align 1 + %34 = zext i1 %"89" to i32 + %"234" = add i32 0, %34 + store i32 %"234", ptr addrspace(5) %"20", align 4 + %"90" = load i1, ptr addrspace(5) %"22", align 1 + %"36" = xor i1 %"90", true + %35 = zext i1 %"36" to i32 + %"235" = sub i32 0, %35 + store i32 %"235", ptr addrspace(5) %"21", align 4 + %"92" = load i64, ptr addrspace(5) %"5", align 8 + %"93" = load i32, ptr addrspace(5) %"6", align 4 + %"236" = inttoptr i64 %"92" to ptr + store i32 %"93", ptr %"236", align 4 + %"94" = load i64, ptr addrspace(5) %"5", align 8 + %"95" = load i32, ptr addrspace(5) %"8", align 4 + %"238" = inttoptr i64 %"94" to ptr + %"270" = getelementptr inbounds i8, ptr %"238", i64 4 + store i32 %"95", ptr %"270", align 4 + %"96" = load i64, ptr addrspace(5) %"5", align 8 + %"97" = load i32, ptr addrspace(5) %"10", align 4 + %"240" = inttoptr i64 %"96" to ptr + %"272" = getelementptr inbounds i8, ptr %"240", i64 8 + store i32 %"97", ptr %"272", align 4 + %"98" = load i64, ptr addrspace(5) %"5", align 8 + %"99" = load i32, ptr addrspace(5) %"12", align 4 + %"242" = inttoptr i64 %"98" to ptr + %"274" = getelementptr inbounds i8, ptr %"242", i64 12 + store i32 %"99", ptr %"274", align 4 + %"100" = load i64, ptr addrspace(5) %"5", align 8 + %"101" = load i32, ptr addrspace(5) %"14", align 4 + %"244" = inttoptr i64 %"100" to ptr + %"276" = getelementptr inbounds i8, ptr %"244", i64 16 + store i32 %"101", ptr %"276", align 4 + %"102" = load i64, ptr addrspace(5) %"5", align 8 + %"103" = load i32, ptr addrspace(5) %"16", align 4 + %"246" = inttoptr i64 %"102" to ptr + %"278" = getelementptr inbounds i8, ptr %"246", i64 20 + store i32 %"103", ptr %"278", align 4 + %"104" = load i64, ptr addrspace(5) %"5", align 8 + %"105" = load i32, ptr addrspace(5) %"18", align 4 + %"248" = inttoptr i64 %"104" to ptr + %"280" = getelementptr inbounds i8, ptr %"248", i64 24 + store i32 %"105", ptr %"280", align 4 + %"106" = load i64, ptr addrspace(5) %"5", align 8 + %"107" = load i32, ptr addrspace(5) %"20", align 4 + %"250" = inttoptr i64 %"106" to ptr + %"282" = getelementptr inbounds i8, ptr %"250", i64 28 + store i32 %"107", ptr %"282", align 4 + %"108" = load i64, ptr addrspace(5) %"5", align 8 + %"109" = load i32, ptr addrspace(5) %"7", align 4 + %"252" = inttoptr i64 %"108" to ptr + %"284" = getelementptr inbounds i8, ptr %"252", i64 32 + store i32 %"109", ptr %"284", align 4 + %"110" = load i64, ptr addrspace(5) %"5", align 8 + %"111" = load i32, ptr addrspace(5) %"9", align 4 + %"254" = inttoptr i64 %"110" to ptr + %"286" = getelementptr inbounds i8, ptr %"254", i64 36 + store i32 %"111", ptr %"286", align 4 + %"112" = load i64, ptr addrspace(5) %"5", align 8 + %"113" = load i32, ptr addrspace(5) %"11", align 4 + %"256" = inttoptr i64 %"112" to ptr + %"288" = getelementptr inbounds i8, ptr %"256", i64 40 + store i32 %"113", ptr %"288", align 4 + %"114" = load i64, ptr addrspace(5) %"5", align 8 + %"115" = load i32, ptr addrspace(5) %"13", align 4 + %"258" = inttoptr i64 %"114" to ptr + %"290" = getelementptr inbounds i8, ptr %"258", i64 44 + store i32 %"115", ptr %"290", align 4 + %"116" = load i64, ptr addrspace(5) %"5", align 8 + %"117" = load i32, ptr addrspace(5) %"15", align 4 + %"260" = inttoptr i64 %"116" to ptr + %"292" = getelementptr inbounds i8, ptr %"260", i64 48 + store i32 %"117", ptr %"292", align 4 + %"118" = load i64, ptr addrspace(5) %"5", align 8 + %"119" = load i32, ptr addrspace(5) %"17", align 4 + %"262" = inttoptr i64 %"118" to ptr + %"294" = getelementptr inbounds i8, ptr %"262", i64 52 + store i32 %"119", ptr %"294", align 4 + %"120" = load i64, ptr addrspace(5) %"5", align 8 + %"121" = load i32, ptr addrspace(5) %"19", align 4 + %"264" = inttoptr i64 %"120" to ptr + %"296" = getelementptr inbounds i8, ptr %"264", i64 56 + store i32 %"121", ptr %"296", align 4 + %"122" = load i64, ptr addrspace(5) %"5", align 8 + %"123" = load i32, ptr addrspace(5) %"21", align 4 + %"266" = inttoptr i64 %"122" to ptr + %"298" = getelementptr inbounds i8, ptr %"266", i64 60 + store i32 %"123", ptr %"298", align 4 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/carry_set_all.ptx b/ptx/src/test/spirv_run/carry_set_all.ptx new file mode 100644 index 0000000..ace6e33 --- /dev/null +++ b/ptx/src/test/spirv_run/carry_set_all.ptx @@ -0,0 +1,84 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry carry_set_all( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + + .reg .b32 carry1_add; + .reg .b32 carry1_sub; + .reg .b32 carry2_add; + .reg .b32 carry2_sub; + .reg .b32 carry3_add; + .reg .b32 carry3_sub; + .reg .b32 carry4_add; + .reg .b32 carry4_sub; + .reg .b32 carry5_add; + .reg .b32 carry5_sub; + .reg .b32 carry6_add; + .reg .b32 carry6_sub; + .reg .b32 carry7_add; + .reg .b32 carry7_sub; + .reg .b32 carry8_add; + .reg .b32 carry8_sub; + + ld.param.u64 out_addr, [output]; + + sub.cc.u32 carry1_add, 0, 0; + addc.u32 carry1_add, 0, 0; + subc.u32 carry1_sub, 0, 0; + + sub.cc.u32 carry2_add, 0, 1; + addc.u32 carry2_add, 0, 0; + subc.u32 carry2_sub, 0, 0; + + add.cc.u32 carry3_add, 0, 0; + addc.u32 carry3_add, 0, 0; + subc.u32 carry3_sub, 0, 0; + + add.cc.u32 carry4_add, 4294967295, 4294967295; + addc.u32 carry4_add, 0, 0; + subc.u32 carry4_sub, 0, 0; + + mad.lo.cc.u32 carry5_add, 0, 0, 0; + addc.u32 carry5_add, 0, 0; + subc.u32 carry5_sub, 0, 0; + + mad.lo.cc.u32 carry6_add, 1, 4294967295, 4294967295; + addc.u32 carry6_add, 0, 0; + subc.u32 carry6_sub, 0, 0; + + add.cc.u32 carry7_add, 0, 0; + subc.cc.u32 carry7_add, 0, 0; + addc.u32 carry7_add, 0, 0; + subc.u32 carry7_sub, 0, 0; + + add.cc.u32 carry8_add, 0, 0; + subc.cc.u32 carry8_add, 0, 1; + addc.u32 carry8_add, 0, 0; + subc.u32 carry8_sub, 0, 0; + + st.u32 [out_addr], carry1_add; + st.u32 [out_addr+4], carry2_add; + st.u32 [out_addr+8], carry3_add; + st.u32 [out_addr+12], carry4_add; + st.u32 [out_addr+16], carry5_add; + st.u32 [out_addr+20], carry6_add; + st.u32 [out_addr+24], carry7_add; + st.u32 [out_addr+28], carry8_add; + + st.u32 [out_addr+32], carry1_sub; + st.u32 [out_addr+36], carry2_sub; + st.u32 [out_addr+40], carry3_sub; + st.u32 [out_addr+44], carry4_sub; + st.u32 [out_addr+48], carry5_sub; + st.u32 [out_addr+52], carry6_sub; + st.u32 [out_addr+56], carry7_sub; + st.u32 [out_addr+60], carry8_sub; + ret; +} diff --git a/ptx/src/test/spirv_run/clz.ll b/ptx/src/test/spirv_run/clz.ll index 356ee7d..31f408d 100644 --- a/ptx/src/test/spirv_run/clz.ll +++ b/ptx/src/test/spirv_run/clz.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"19", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %0 = call i32 @llvm.ctlz.i32(i32 %"14", i1 false) + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"18", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %0 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false) store i32 %0, ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"20", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/const.ll b/ptx/src/test/spirv_run/const.ll index 472421d..80fcc07 100644 --- a/ptx/src/test/spirv_run/const.ll +++ b/ptx/src/test/spirv_run/const.ll @@ -3,49 +3,47 @@ target triple = "amdgcn-amd-amdhsa" @constparams = protected addrspace(4) externally_initialized global [4 x i16] [i16 10, i16 20, i16 30, i16 40], align 8 -define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { -"53": +define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { +"52": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) %"8" = alloca i16, align 2, addrspace(5) %"9" = alloca i16, align 2, addrspace(5) %"10" = alloca i16, align 2, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"12", ptr addrspace(5) %"5", align 8 %"13" = load i64, ptr addrspace(4) %"39", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(4) %"40", align 8 - store i64 %"14", ptr addrspace(5) %"6", align 8 - %"15" = load i16, ptr addrspace(4) @constparams, align 2 - store i16 %"15", ptr addrspace(5) %"7", align 2 - %"16" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2 - store i16 %"16", ptr addrspace(5) %"8", align 2 - %"17" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2 - store i16 %"17", ptr addrspace(5) %"9", align 2 - %"18" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2 - store i16 %"18", ptr addrspace(5) %"10", align 2 - %"19" = load i64, ptr addrspace(5) %"6", align 8 - %"20" = load i16, ptr addrspace(5) %"7", align 2 - %"45" = inttoptr i64 %"19" to ptr - store i16 %"20", ptr %"45", align 2 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load i16, ptr addrspace(5) %"8", align 2 - %"47" = inttoptr i64 %"21" to ptr - %"61" = getelementptr inbounds i8, ptr %"47", i64 2 - store i16 %"22", ptr %"61", align 2 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load i16, ptr addrspace(5) %"9", align 2 - %"49" = inttoptr i64 %"23" to ptr - %"63" = getelementptr inbounds i8, ptr %"49", i64 4 - store i16 %"24", ptr %"63", align 2 - %"25" = load i64, ptr addrspace(5) %"6", align 8 - %"26" = load i16, ptr addrspace(5) %"10", align 2 - %"51" = inttoptr i64 %"25" to ptr - %"65" = getelementptr inbounds i8, ptr %"51", i64 6 - store i16 %"26", ptr %"65", align 2 + store i64 %"13", ptr addrspace(5) %"6", align 8 + %"14" = load i16, ptr addrspace(4) @constparams, align 2 + store i16 %"14", ptr addrspace(5) %"7", align 2 + %"15" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2 + store i16 %"15", ptr addrspace(5) %"8", align 2 + %"16" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2 + store i16 %"16", ptr addrspace(5) %"9", align 2 + %"17" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2 + store i16 %"17", ptr addrspace(5) %"10", align 2 + %"18" = load i64, ptr addrspace(5) %"6", align 8 + %"19" = load i16, ptr addrspace(5) %"7", align 2 + %"44" = inttoptr i64 %"18" to ptr + store i16 %"19", ptr %"44", align 2 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load i16, ptr addrspace(5) %"8", align 2 + %"46" = inttoptr i64 %"20" to ptr + %"60" = getelementptr inbounds i8, ptr %"46", i64 2 + store i16 %"21", ptr %"60", align 2 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load i16, ptr addrspace(5) %"9", align 2 + %"48" = inttoptr i64 %"22" to ptr + %"62" = getelementptr inbounds i8, ptr %"48", i64 4 + store i16 %"23", ptr %"62", align 2 + %"24" = load i64, ptr addrspace(5) %"6", align 8 + %"25" = load i16, ptr addrspace(5) %"10", align 2 + %"50" = inttoptr i64 %"24" to ptr + %"64" = getelementptr inbounds i8, ptr %"50", i64 6 + store i16 %"25", ptr %"64", align 2 ret void } diff --git a/ptx/src/test/spirv_run/constant_f32.ll b/ptx/src/test/spirv_run/constant_f32.ll index e918c89..e0309ea 100644 --- a/ptx/src/test/spirv_run/constant_f32.ll +++ b/ptx/src/test/spirv_run/constant_f32.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": +define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"21": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"20", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = fmul float %"14", 5.000000e-01 - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"21" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"21", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"19", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = fmul float %"13", 5.000000e-01 + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"20" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/constant_negative.ll b/ptx/src/test/spirv_run/constant_negative.ll index 09478b6..337689f 100644 --- a/ptx/src/test/spirv_run/constant_negative.ll +++ b/ptx/src/test/spirv_run/constant_negative.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": +define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"21": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"20", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"13" = mul i32 %"14", -1 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"21" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"21", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"19", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"12" = mul i32 %"13", -1 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"20" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cos.ll b/ptx/src/test/spirv_run/cos.ll index 0cf9c30..d385e1f 100644 --- a/ptx/src/test/spirv_run/cos.ll +++ b/ptx/src/test/spirv_run/cos.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.cos.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.cos.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_clamp.ll b/ptx/src/test/spirv_run/cvt_clamp.ll index 29de682..f2be477 100644 --- a/ptx/src/test/spirv_run/cvt_clamp.ll +++ b/ptx/src/test/spirv_run/cvt_clamp.ll @@ -3,69 +3,67 @@ target triple = "amdgcn-amd-amdhsa" declare float @__zluda_ptx_impl__cvt_sat_f32_f32(float) #0 -define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 { -"57": +define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 { +"56": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"49" = inttoptr i64 %"12" to ptr addrspace(1) - %"11" = load float, ptr addrspace(1) %"49", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"50" = inttoptr i64 %"15" to ptr addrspace(1) - store float %"16", ptr addrspace(1) %"50", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"51" = inttoptr i64 %"18" to ptr addrspace(1) - %"62" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4 - %"17" = load float, ptr addrspace(1) %"62", align 4 - store float %"17", ptr addrspace(5) %"6", align 4 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"19" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"20") - store float %"19", ptr addrspace(5) %"6", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load float, ptr addrspace(5) %"6", align 4 - %"52" = inttoptr i64 %"21" to ptr addrspace(1) - %"64" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 4 - store float %"22", ptr addrspace(1) %"64", align 4 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"53" = inttoptr i64 %"24" to ptr addrspace(1) - %"66" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8 - %"23" = load float, ptr addrspace(1) %"66", align 4 - store float %"23", ptr addrspace(5) %"6", align 4 - %"26" = load float, ptr addrspace(5) %"6", align 4 - %"25" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"26") - store float %"25", ptr addrspace(5) %"6", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load float, ptr addrspace(5) %"6", align 4 - %"54" = inttoptr i64 %"27" to ptr addrspace(1) - %"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 - store float %"28", ptr addrspace(1) %"68", align 4 - %"30" = load i64, ptr addrspace(5) %"4", align 8 - %"55" = inttoptr i64 %"30" to ptr addrspace(1) - %"70" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12 - %"29" = load float, ptr addrspace(1) %"70", align 4 - store float %"29", ptr addrspace(5) %"6", align 4 - %"32" = load float, ptr addrspace(5) %"6", align 4 - %"31" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"32") - store float %"31", ptr addrspace(5) %"6", align 4 - %"33" = load i64, ptr addrspace(5) %"5", align 8 - %"34" = load float, ptr addrspace(5) %"6", align 4 - %"56" = inttoptr i64 %"33" to ptr addrspace(1) - %"72" = getelementptr inbounds i8, ptr addrspace(1) %"56", i64 12 - store float %"34", ptr addrspace(1) %"72", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"48" = inttoptr i64 %"11" to ptr addrspace(1) + %"10" = load float, ptr addrspace(1) %"48", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"49" = inttoptr i64 %"14" to ptr addrspace(1) + store float %"15", ptr addrspace(1) %"49", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"50" = inttoptr i64 %"17" to ptr addrspace(1) + %"61" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4 + %"16" = load float, ptr addrspace(1) %"61", align 4 + store float %"16", ptr addrspace(5) %"6", align 4 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"18" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"19") + store float %"18", ptr addrspace(5) %"6", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load float, ptr addrspace(5) %"6", align 4 + %"51" = inttoptr i64 %"20" to ptr addrspace(1) + %"63" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4 + store float %"21", ptr addrspace(1) %"63", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"52" = inttoptr i64 %"23" to ptr addrspace(1) + %"65" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8 + %"22" = load float, ptr addrspace(1) %"65", align 4 + store float %"22", ptr addrspace(5) %"6", align 4 + %"25" = load float, ptr addrspace(5) %"6", align 4 + %"24" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"25") + store float %"24", ptr addrspace(5) %"6", align 4 + %"26" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = load float, ptr addrspace(5) %"6", align 4 + %"53" = inttoptr i64 %"26" to ptr addrspace(1) + %"67" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8 + store float %"27", ptr addrspace(1) %"67", align 4 + %"29" = load i64, ptr addrspace(5) %"4", align 8 + %"54" = inttoptr i64 %"29" to ptr addrspace(1) + %"69" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12 + %"28" = load float, ptr addrspace(1) %"69", align 4 + store float %"28", ptr addrspace(5) %"6", align 4 + %"31" = load float, ptr addrspace(5) %"6", align 4 + %"30" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"31") + store float %"30", ptr addrspace(5) %"6", align 4 + %"32" = load i64, ptr addrspace(5) %"5", align 8 + %"33" = load float, ptr addrspace(5) %"6", align 4 + %"55" = inttoptr i64 %"32" to ptr addrspace(1) + %"71" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12 + store float %"33", ptr addrspace(1) %"71", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f32_f16.ll b/ptx/src/test/spirv_run/cvt_f32_f16.ll index 169eb59..e3acdb6 100644 --- a/ptx/src/test/spirv_run/cvt_f32_f16.ll +++ b/ptx/src/test/spirv_run/cvt_f32_f16.ll @@ -1,32 +1,30 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"23": +define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca half, align 2, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr addrspace(1) - %"20" = load i16, ptr addrspace(1) %"21", align 2 - %"12" = bitcast i16 %"20" to half - store half %"12", ptr addrspace(5) %"6", align 2 - %"15" = load half, ptr addrspace(5) %"6", align 2 - %"14" = fpext half %"15" to float - store float %"14", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load float, ptr addrspace(5) %"7", align 4 - %"22" = inttoptr i64 %"16" to ptr - store float %"17", ptr %"22", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr addrspace(1) + %"19" = load i16, ptr addrspace(1) %"20", align 2 + %"11" = bitcast i16 %"19" to half + store half %"11", ptr addrspace(5) %"6", align 2 + %"14" = load half, ptr addrspace(5) %"6", align 2 + %"13" = fpext half %"14" to float + store float %"13", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load float, ptr addrspace(5) %"7", align 4 + %"21" = inttoptr i64 %"15" to ptr + store float %"16", ptr %"21", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f32_s32.ll b/ptx/src/test/spirv_run/cvt_f32_s32.ll index 119d052..65b00ce 100644 --- a/ptx/src/test/spirv_run/cvt_f32_s32.ll +++ b/ptx/src/test/spirv_run/cvt_f32_s32.ll @@ -9,80 +9,78 @@ declare float @__zluda_ptx_impl__cvt_rp_f32_s32(i32) #0 declare float @__zluda_ptx_impl__cvt_rz_f32_s32(i32) #0 -define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"50", ptr addrspace(4) byref(i64) %"51") #1 { -"76": +define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #1 { +"75": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"49", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"50", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"51", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"53" = inttoptr i64 %"15" to ptr - %"52" = load i32, ptr %"53", align 4 - store i32 %"52", ptr addrspace(5) %"6", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"54" = inttoptr i64 %"17" to ptr - %"90" = getelementptr inbounds i8, ptr %"54", i64 4 - %"55" = load i32, ptr %"90", align 4 - store i32 %"55", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"56" = inttoptr i64 %"19" to ptr - %"92" = getelementptr inbounds i8, ptr %"56", i64 8 - %"57" = load i32, ptr %"92", align 4 - store i32 %"57", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"58" = inttoptr i64 %"21" to ptr - %"94" = getelementptr inbounds i8, ptr %"58", i64 12 - %"59" = load i32, ptr %"94", align 4 - store i32 %"59", ptr addrspace(5) %"9", align 4 - %"23" = load i32, ptr addrspace(5) %"6", align 4 - %"60" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"23") - %"22" = bitcast float %"60" to i32 - store i32 %"22", ptr addrspace(5) %"6", align 4 - %"25" = load i32, ptr addrspace(5) %"7", align 4 - %"62" = call float @__zluda_ptx_impl__cvt_rz_f32_s32(i32 %"25") - %"24" = bitcast float %"62" to i32 - store i32 %"24", ptr addrspace(5) %"7", align 4 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %"64" = call float @__zluda_ptx_impl__cvt_rm_f32_s32(i32 %"27") - %"26" = bitcast float %"64" to i32 - store i32 %"26", ptr addrspace(5) %"8", align 4 - %"29" = load i32, ptr addrspace(5) %"9", align 4 - %"66" = call float @__zluda_ptx_impl__cvt_rp_f32_s32(i32 %"29") - %"28" = bitcast float %"66" to i32 - store i32 %"28", ptr addrspace(5) %"9", align 4 - %"30" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = load i32, ptr addrspace(5) %"6", align 4 - %"68" = inttoptr i64 %"30" to ptr addrspace(1) - %"69" = bitcast i32 %"31" to float - store float %"69", ptr addrspace(1) %"68", align 4 - %"32" = load i64, ptr addrspace(5) %"5", align 8 - %"33" = load i32, ptr addrspace(5) %"7", align 4 - %"70" = inttoptr i64 %"32" to ptr addrspace(1) - %"96" = getelementptr inbounds i8, ptr addrspace(1) %"70", i64 4 - %"71" = bitcast i32 %"33" to float - store float %"71", ptr addrspace(1) %"96", align 4 - %"34" = load i64, ptr addrspace(5) %"5", align 8 - %"35" = load i32, ptr addrspace(5) %"8", align 4 - %"72" = inttoptr i64 %"34" to ptr addrspace(1) - %"98" = getelementptr inbounds i8, ptr addrspace(1) %"72", i64 8 - %"73" = bitcast i32 %"35" to float - store float %"73", ptr addrspace(1) %"98", align 4 - %"36" = load i64, ptr addrspace(5) %"5", align 8 - %"37" = load i32, ptr addrspace(5) %"9", align 4 - %"74" = inttoptr i64 %"36" to ptr addrspace(1) - %"100" = getelementptr inbounds i8, ptr addrspace(1) %"74", i64 12 - %"75" = bitcast i32 %"37" to float - store float %"75", ptr addrspace(1) %"100", align 4 + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"52" = inttoptr i64 %"14" to ptr + %"51" = load i32, ptr %"52", align 4 + store i32 %"51", ptr addrspace(5) %"6", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"53" = inttoptr i64 %"16" to ptr + %"89" = getelementptr inbounds i8, ptr %"53", i64 4 + %"54" = load i32, ptr %"89", align 4 + store i32 %"54", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"55" = inttoptr i64 %"18" to ptr + %"91" = getelementptr inbounds i8, ptr %"55", i64 8 + %"56" = load i32, ptr %"91", align 4 + store i32 %"56", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"57" = inttoptr i64 %"20" to ptr + %"93" = getelementptr inbounds i8, ptr %"57", i64 12 + %"58" = load i32, ptr %"93", align 4 + store i32 %"58", ptr addrspace(5) %"9", align 4 + %"22" = load i32, ptr addrspace(5) %"6", align 4 + %"59" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"22") + %"21" = bitcast float %"59" to i32 + store i32 %"21", ptr addrspace(5) %"6", align 4 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %"61" = call float @__zluda_ptx_impl__cvt_rz_f32_s32(i32 %"24") + %"23" = bitcast float %"61" to i32 + store i32 %"23", ptr addrspace(5) %"7", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %"63" = call float @__zluda_ptx_impl__cvt_rm_f32_s32(i32 %"26") + %"25" = bitcast float %"63" to i32 + store i32 %"25", ptr addrspace(5) %"8", align 4 + %"28" = load i32, ptr addrspace(5) %"9", align 4 + %"65" = call float @__zluda_ptx_impl__cvt_rp_f32_s32(i32 %"28") + %"27" = bitcast float %"65" to i32 + store i32 %"27", ptr addrspace(5) %"9", align 4 + %"29" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = load i32, ptr addrspace(5) %"6", align 4 + %"67" = inttoptr i64 %"29" to ptr addrspace(1) + %"68" = bitcast i32 %"30" to float + store float %"68", ptr addrspace(1) %"67", align 4 + %"31" = load i64, ptr addrspace(5) %"5", align 8 + %"32" = load i32, ptr addrspace(5) %"7", align 4 + %"69" = inttoptr i64 %"31" to ptr addrspace(1) + %"95" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4 + %"70" = bitcast i32 %"32" to float + store float %"70", ptr addrspace(1) %"95", align 4 + %"33" = load i64, ptr addrspace(5) %"5", align 8 + %"34" = load i32, ptr addrspace(5) %"8", align 4 + %"71" = inttoptr i64 %"33" to ptr addrspace(1) + %"97" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8 + %"72" = bitcast i32 %"34" to float + store float %"72", ptr addrspace(1) %"97", align 4 + %"35" = load i64, ptr addrspace(5) %"5", align 8 + %"36" = load i32, ptr addrspace(5) %"9", align 4 + %"73" = inttoptr i64 %"35" to ptr addrspace(1) + %"99" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12 + %"74" = bitcast i32 %"36" to float + store float %"74", ptr addrspace(1) %"99", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f64_f32.ll b/ptx/src/test/spirv_run/cvt_f64_f32.ll index f608ed1..96267f4 100644 --- a/ptx/src/test/spirv_run/cvt_f64_f32.ll +++ b/ptx/src/test/spirv_run/cvt_f64_f32.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": +define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"21": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca double, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load float, ptr addrspace(1) %"20", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load float, ptr addrspace(5) %"6", align 4 - %"14" = fpext float %"15" to double - store double %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load double, ptr addrspace(5) %"7", align 8 - %"21" = inttoptr i64 %"16" to ptr - store double %"17", ptr %"21", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load float, ptr addrspace(1) %"19", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load float, ptr addrspace(5) %"6", align 4 + %"13" = fpext float %"14" to double + store double %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load double, ptr addrspace(5) %"7", align 8 + %"20" = inttoptr i64 %"15" to ptr + store double %"16", ptr %"20", align 8 ret void } diff --git a/ptx/src/test/spirv_run/cvt_rni.ll b/ptx/src/test/spirv_run/cvt_rni.ll index fa56dfa..5eb6eaa 100644 --- a/ptx/src/test/spirv_run/cvt_rni.ll +++ b/ptx/src/test/spirv_run/cvt_rni.ll @@ -1,44 +1,42 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"34": +define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { +"33": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"30" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"30", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"15" to ptr - %"36" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load float, ptr %"36", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"16" = call float @llvm.rint.f32(float %"17") - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load float, ptr addrspace(5) %"7", align 4 - %"18" = call float @llvm.rint.f32(float %"19") - store float %"18", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load float, ptr addrspace(5) %"6", align 4 - %"32" = inttoptr i64 %"20" to ptr - store float %"21", ptr %"32", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load float, ptr addrspace(5) %"7", align 4 - %"33" = inttoptr i64 %"22" to ptr - %"38" = getelementptr inbounds i8, ptr %"33", i64 4 - store float %"23", ptr %"38", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"29" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"29", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"14" to ptr + %"35" = getelementptr inbounds i8, ptr %"30", i64 4 + %"13" = load float, ptr %"35", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"15" = call float @llvm.rint.f32(float %"16") + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load float, ptr addrspace(5) %"7", align 4 + %"17" = call float @llvm.rint.f32(float %"18") + store float %"17", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load float, ptr addrspace(5) %"6", align 4 + %"31" = inttoptr i64 %"19" to ptr + store float %"20", ptr %"31", align 4 + %"21" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = load float, ptr addrspace(5) %"7", align 4 + %"32" = inttoptr i64 %"21" to ptr + %"37" = getelementptr inbounds i8, ptr %"32", i64 4 + store float %"22", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_rzi.ll b/ptx/src/test/spirv_run/cvt_rzi.ll index ad4a305..83783d8 100644 --- a/ptx/src/test/spirv_run/cvt_rzi.ll +++ b/ptx/src/test/spirv_run/cvt_rzi.ll @@ -1,44 +1,42 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"34": +define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { +"33": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"30" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"30", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"15" to ptr - %"36" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load float, ptr %"36", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"16" = call float @llvm.trunc.f32(float %"17") - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load float, ptr addrspace(5) %"7", align 4 - %"18" = call float @llvm.trunc.f32(float %"19") - store float %"18", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load float, ptr addrspace(5) %"6", align 4 - %"32" = inttoptr i64 %"20" to ptr - store float %"21", ptr %"32", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load float, ptr addrspace(5) %"7", align 4 - %"33" = inttoptr i64 %"22" to ptr - %"38" = getelementptr inbounds i8, ptr %"33", i64 4 - store float %"23", ptr %"38", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"29" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"29", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"14" to ptr + %"35" = getelementptr inbounds i8, ptr %"30", i64 4 + %"13" = load float, ptr %"35", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"15" = call float @llvm.trunc.f32(float %"16") + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load float, ptr addrspace(5) %"7", align 4 + %"17" = call float @llvm.trunc.f32(float %"18") + store float %"17", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load float, ptr addrspace(5) %"6", align 4 + %"31" = inttoptr i64 %"19" to ptr + store float %"20", ptr %"31", align 4 + %"21" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = load float, ptr addrspace(5) %"7", align 4 + %"32" = inttoptr i64 %"21" to ptr + %"37" = getelementptr inbounds i8, ptr %"32", i64 4 + store float %"22", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s16_s8.ll b/ptx/src/test/spirv_run/cvt_s16_s8.ll index dcf4555..841178e 100644 --- a/ptx/src/test/spirv_run/cvt_s16_s8.ll +++ b/ptx/src/test/spirv_run/cvt_s16_s8.ll @@ -1,33 +1,31 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load i32, ptr addrspace(1) %"20", align 4 - store i32 %"12", ptr addrspace(5) %"7", align 4 - %"15" = load i32, ptr addrspace(5) %"7", align 4 - %"26" = trunc i32 %"15" to i8 - %"21" = sext i8 %"26" to i16 - %"14" = sext i16 %"21" to i32 - store i32 %"14", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"23" = inttoptr i64 %"16" to ptr - store i32 %"17", ptr %"23", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load i32, ptr addrspace(1) %"19", align 4 + store i32 %"11", ptr addrspace(5) %"7", align 4 + %"14" = load i32, ptr addrspace(5) %"7", align 4 + %"25" = trunc i32 %"14" to i8 + %"20" = sext i8 %"25" to i16 + %"13" = sext i16 %"20" to i32 + store i32 %"13", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = inttoptr i64 %"15" to ptr + store i32 %"16", ptr %"22", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s32_f32.ll b/ptx/src/test/spirv_run/cvt_s32_f32.ll index b8f8b2b..bd1b9e3 100644 --- a/ptx/src/test/spirv_run/cvt_s32_f32.ll +++ b/ptx/src/test/spirv_run/cvt_s32_f32.ll @@ -3,48 +3,46 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float) #0 -define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { -"42": +define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 { +"41": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"13" to ptr - %"30" = load float, ptr %"31", align 4 - %"12" = bitcast float %"30" to i32 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"15" to ptr - %"47" = getelementptr inbounds i8, ptr %"32", i64 4 - %"33" = load float, ptr %"47", align 4 - %"14" = bitcast float %"33" to i32 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"35" = bitcast i32 %"17" to float - %"34" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"35") - store i32 %"34", ptr addrspace(5) %"6", align 4 - %"19" = load i32, ptr addrspace(5) %"7", align 4 - %"37" = bitcast i32 %"19" to float - %"36" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"37") - store i32 %"36", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i32, ptr addrspace(5) %"6", align 4 - %"38" = inttoptr i64 %"20" to ptr addrspace(1) - store i32 %"21", ptr addrspace(1) %"38", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %"40" = inttoptr i64 %"22" to ptr addrspace(1) - %"49" = getelementptr inbounds i8, ptr addrspace(1) %"40", i64 4 - store i32 %"23", ptr addrspace(1) %"49", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"12" to ptr + %"29" = load float, ptr %"30", align 4 + %"11" = bitcast float %"29" to i32 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"14" to ptr + %"46" = getelementptr inbounds i8, ptr %"31", i64 4 + %"32" = load float, ptr %"46", align 4 + %"13" = bitcast float %"32" to i32 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"34" = bitcast i32 %"16" to float + %"33" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"34") + store i32 %"33", ptr addrspace(5) %"6", align 4 + %"18" = load i32, ptr addrspace(5) %"7", align 4 + %"36" = bitcast i32 %"18" to float + %"35" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"36") + store i32 %"35", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i32, ptr addrspace(5) %"6", align 4 + %"37" = inttoptr i64 %"19" to ptr addrspace(1) + store i32 %"20", ptr addrspace(1) %"37", align 4 + %"21" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"39" = inttoptr i64 %"21" to ptr addrspace(1) + %"48" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4 + store i32 %"22", ptr addrspace(1) %"48", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s64_s32.ll b/ptx/src/test/spirv_run/cvt_s64_s32.ll index a272a4c..4958266 100644 --- a/ptx/src/test/spirv_run/cvt_s64_s32.ll +++ b/ptx/src/test/spirv_run/cvt_s64_s32.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"20" = load i32, ptr %"21", align 4 - store i32 %"20", ptr addrspace(5) %"6", align 4 - %"15" = load i32, ptr addrspace(5) %"6", align 4 - %"14" = sext i32 %"15" to i64 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"19" = load i32, ptr %"20", align 4 + store i32 %"19", ptr addrspace(5) %"6", align 4 + %"14" = load i32, ptr addrspace(5) %"6", align 4 + %"13" = sext i32 %"14" to i64 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/cvt_sat_s_u.ll b/ptx/src/test/spirv_run/cvt_sat_s_u.ll index 946ece1..3af6ef5 100644 --- a/ptx/src/test/spirv_run/cvt_sat_s_u.ll +++ b/ptx/src/test/spirv_run/cvt_sat_s_u.ll @@ -1,50 +1,48 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"35": +define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { +"34": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"26", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"29" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"29", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %0 = call i32 @llvm.smax.i32(i32 %"16", i32 0) + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"28" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"28", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %0 = call i32 @llvm.smax.i32(i32 %"15", i32 0) %1 = alloca i32, align 4, addrspace(5) store i32 %0, ptr addrspace(5) %1, align 4 - %"15" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 + %"14" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 %2 = alloca i32, align 4, addrspace(5) - store i32 %"18", ptr addrspace(5) %2, align 4 - %"30" = load i32, ptr addrspace(5) %2, align 4 - store i32 %"30", ptr addrspace(5) %"7", align 4 - %"20" = load i32, ptr addrspace(5) %"6", align 4 + store i32 %"17", ptr addrspace(5) %2, align 4 + %"29" = load i32, ptr addrspace(5) %2, align 4 + store i32 %"29", ptr addrspace(5) %"7", align 4 + %"19" = load i32, ptr addrspace(5) %"6", align 4 %3 = alloca i32, align 4, addrspace(5) - store i32 %"20", ptr addrspace(5) %3, align 4 - %"31" = load i32, ptr addrspace(5) %3, align 4 - store i32 %"31", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"32" = inttoptr i64 %"21" to ptr - store i32 %"22", ptr %"32", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"34" = inttoptr i64 %"23" to ptr - %"37" = getelementptr inbounds i8, ptr %"34", i64 4 - store i32 %"24", ptr %"37", align 4 + store i32 %"19", ptr addrspace(5) %3, align 4 + %"30" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"30", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"31" = inttoptr i64 %"20" to ptr + store i32 %"21", ptr %"31", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"33" = inttoptr i64 %"22" to ptr + %"36" = getelementptr inbounds i8, ptr %"33", i64 4 + store i32 %"23", ptr %"36", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_u32_s16.ll b/ptx/src/test/spirv_run/cvt_u32_s16.ll index 7ab8366..141f83f 100644 --- a/ptx/src/test/spirv_run/cvt_u32_s16.ll +++ b/ptx/src/test/spirv_run/cvt_u32_s16.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load i16, ptr addrspace(1) %"20", align 2 - store i16 %"12", ptr addrspace(5) %"6", align 2 - %"15" = load i16, ptr addrspace(5) %"6", align 2 - %"21" = sext i16 %"15" to i32 - store i32 %"21", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - %"23" = inttoptr i64 %"16" to ptr - store i32 %"17", ptr %"23", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load i16, ptr addrspace(1) %"19", align 2 + store i16 %"11", ptr addrspace(5) %"6", align 2 + %"14" = load i16, ptr addrspace(5) %"6", align 2 + %"20" = sext i16 %"14" to i32 + store i32 %"20", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i32, ptr addrspace(5) %"7", align 4 + %"22" = inttoptr i64 %"15" to ptr + store i32 %"16", ptr %"22", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvta.ll b/ptx/src/test/spirv_run/cvta.ll index 8cba990..d5c0f73 100644 --- a/ptx/src/test/spirv_run/cvta.ll +++ b/ptx/src/test/spirv_run/cvta.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"27": +define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"26": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %0 = inttoptr i64 %"12" to ptr + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %0 = inttoptr i64 %"11" to ptr %1 = addrspacecast ptr %0 to ptr addrspace(1) - %"21" = ptrtoint ptr addrspace(1) %1 to i64 - store i64 %"21", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %2 = inttoptr i64 %"14" to ptr + %"20" = ptrtoint ptr addrspace(1) %1 to i64 + store i64 %"20", ptr addrspace(5) %"4", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %2 = inttoptr i64 %"13" to ptr %3 = addrspacecast ptr %2 to ptr addrspace(1) - %"23" = ptrtoint ptr addrspace(1) %3 to i64 - store i64 %"23", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"22" = ptrtoint ptr addrspace(1) %3 to i64 + store i64 %"22", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"15" to ptr addrspace(1) + %"14" = load float, ptr addrspace(1) %"24", align 4 + store float %"14", ptr addrspace(5) %"6", align 4 + %"16" = load i64, ptr addrspace(5) %"5", align 8 + %"17" = load float, ptr addrspace(5) %"6", align 4 %"25" = inttoptr i64 %"16" to ptr addrspace(1) - %"15" = load float, ptr addrspace(1) %"25", align 4 - store float %"15", ptr addrspace(5) %"6", align 4 - %"17" = load i64, ptr addrspace(5) %"5", align 8 - %"18" = load float, ptr addrspace(5) %"6", align 4 - %"26" = inttoptr i64 %"17" to ptr addrspace(1) - store float %"18", ptr addrspace(1) %"26", align 4 + store float %"17", ptr addrspace(1) %"25", align 4 ret void } diff --git a/ptx/src/test/spirv_run/div_approx.ll b/ptx/src/test/spirv_run/div_approx.ll index 91b3fb7..833065e 100644 --- a/ptx/src/test/spirv_run/div_approx.ll +++ b/ptx/src/test/spirv_run/div_approx.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"25", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load float, ptr %"30", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"18" = load float, ptr addrspace(5) %"7", align 4 - %"16" = fdiv arcp afn float %"17", %"18" - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store float %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"24", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"29", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"17" = load float, ptr addrspace(5) %"7", align 4 + %"15" = fdiv arcp afn float %"16", %"17" + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store float %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/dp4a.ll b/ptx/src/test/spirv_run/dp4a.ll index f55aa62..2ada6cb 100644 --- a/ptx/src/test/spirv_run/dp4a.ll +++ b/ptx/src/test/spirv_run/dp4a.ll @@ -3,44 +3,42 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__dp4a_s32_s32(i32, i32, i32) #0 -define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #1 { -"39": +define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { +"38": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"31", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"46" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load i32, ptr %"46", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"18" to ptr - %"48" = getelementptr inbounds i8, ptr %"33", i64 8 - %"17" = load i32, ptr %"48", align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"21" = load i32, ptr addrspace(5) %"7", align 4 - %"22" = load i32, ptr addrspace(5) %"8", align 4 - %"34" = call i32 @__zluda_ptx_impl__dp4a_s32_s32(i32 %"20", i32 %"21", i32 %"22") - store i32 %"34", ptr addrspace(5) %"6", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"6", align 4 - %"38" = inttoptr i64 %"23" to ptr - store i32 %"24", ptr %"38", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"30", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"45" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"45", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"17" to ptr + %"47" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load i32, ptr %"47", align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"20" = load i32, ptr addrspace(5) %"7", align 4 + %"21" = load i32, ptr addrspace(5) %"8", align 4 + %"33" = call i32 @__zluda_ptx_impl__dp4a_s32_s32(i32 %"19", i32 %"20", i32 %"21") + store i32 %"33", ptr addrspace(5) %"6", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"6", align 4 + %"37" = inttoptr i64 %"22" to ptr + store i32 %"23", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/ex2.ll b/ptx/src/test/spirv_run/ex2.ll index 8e13d43..b5e671e 100644 --- a/ptx/src/test/spirv_run/ex2.ll +++ b/ptx/src/test/spirv_run/ex2.ll @@ -1,69 +1,67 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { -"57": +define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { +"56": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"49" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"49", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.exp2.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"50" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"50", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"51" = inttoptr i64 %"18" to ptr - %"59" = getelementptr inbounds i8, ptr %"51", i64 4 - %"17" = load float, ptr %"59", align 4 - store float %"17", ptr addrspace(5) %"6", align 4 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"19" = call afn float @llvm.exp2.f32(float %"20") - store float %"19", ptr addrspace(5) %"6", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load float, ptr addrspace(5) %"6", align 4 - %"52" = inttoptr i64 %"21" to ptr - %"61" = getelementptr inbounds i8, ptr %"52", i64 4 - store float %"22", ptr %"61", align 4 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"53" = inttoptr i64 %"24" to ptr - %"63" = getelementptr inbounds i8, ptr %"53", i64 8 - %"23" = load float, ptr %"63", align 4 - store float %"23", ptr addrspace(5) %"6", align 4 - %"26" = load float, ptr addrspace(5) %"6", align 4 - %"25" = call afn float @llvm.exp2.f32(float %"26") - store float %"25", ptr addrspace(5) %"6", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load float, ptr addrspace(5) %"6", align 4 - %"54" = inttoptr i64 %"27" to ptr - %"65" = getelementptr inbounds i8, ptr %"54", i64 8 - store float %"28", ptr %"65", align 4 - %"30" = load i64, ptr addrspace(5) %"4", align 8 - %"55" = inttoptr i64 %"30" to ptr - %"67" = getelementptr inbounds i8, ptr %"55", i64 12 - %"29" = load float, ptr %"67", align 4 - store float %"29", ptr addrspace(5) %"6", align 4 - %"32" = load float, ptr addrspace(5) %"6", align 4 - %"31" = call afn float @llvm.exp2.f32(float %"32") - store float %"31", ptr addrspace(5) %"6", align 4 - %"33" = load i64, ptr addrspace(5) %"5", align 8 - %"34" = load float, ptr addrspace(5) %"6", align 4 - %"56" = inttoptr i64 %"33" to ptr - %"69" = getelementptr inbounds i8, ptr %"56", i64 12 - store float %"34", ptr %"69", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"48" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"48", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.exp2.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"49" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"49", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"50" = inttoptr i64 %"17" to ptr + %"58" = getelementptr inbounds i8, ptr %"50", i64 4 + %"16" = load float, ptr %"58", align 4 + store float %"16", ptr addrspace(5) %"6", align 4 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"18" = call afn float @llvm.exp2.f32(float %"19") + store float %"18", ptr addrspace(5) %"6", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load float, ptr addrspace(5) %"6", align 4 + %"51" = inttoptr i64 %"20" to ptr + %"60" = getelementptr inbounds i8, ptr %"51", i64 4 + store float %"21", ptr %"60", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"52" = inttoptr i64 %"23" to ptr + %"62" = getelementptr inbounds i8, ptr %"52", i64 8 + %"22" = load float, ptr %"62", align 4 + store float %"22", ptr addrspace(5) %"6", align 4 + %"25" = load float, ptr addrspace(5) %"6", align 4 + %"24" = call afn float @llvm.exp2.f32(float %"25") + store float %"24", ptr addrspace(5) %"6", align 4 + %"26" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = load float, ptr addrspace(5) %"6", align 4 + %"53" = inttoptr i64 %"26" to ptr + %"64" = getelementptr inbounds i8, ptr %"53", i64 8 + store float %"27", ptr %"64", align 4 + %"29" = load i64, ptr addrspace(5) %"4", align 8 + %"54" = inttoptr i64 %"29" to ptr + %"66" = getelementptr inbounds i8, ptr %"54", i64 12 + %"28" = load float, ptr %"66", align 4 + store float %"28", ptr addrspace(5) %"6", align 4 + %"31" = load float, ptr addrspace(5) %"6", align 4 + %"30" = call afn float @llvm.exp2.f32(float %"31") + store float %"30", ptr addrspace(5) %"6", align 4 + %"32" = load i64, ptr addrspace(5) %"5", align 8 + %"33" = load float, ptr addrspace(5) %"6", align 4 + %"55" = inttoptr i64 %"32" to ptr + %"68" = getelementptr inbounds i8, ptr %"55", i64 12 + store float %"33", ptr %"68", align 4 ret void } diff --git a/ptx/src/test/spirv_run/extern_shared.ll b/ptx/src/test/spirv_run/extern_shared.ll index 34f1d33..eeb0d50 100644 --- a/ptx/src/test/spirv_run/extern_shared.ll +++ b/ptx/src/test/spirv_run/extern_shared.ll @@ -3,31 +3,29 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i32] -define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load i64, ptr addrspace(1) %"20", align 8 - store i64 %"12", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(5) %"7", align 8 - store i64 %"14", ptr addrspace(3) @shared_mem, align 8 - %"15" = load i64, ptr addrspace(3) @shared_mem, align 8 - store i64 %"15", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"23" = inttoptr i64 %"16" to ptr addrspace(1) - store i64 %"17", ptr addrspace(1) %"23", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load i64, ptr addrspace(1) %"19", align 8 + store i64 %"11", ptr addrspace(5) %"7", align 8 + %"13" = load i64, ptr addrspace(5) %"7", align 8 + store i64 %"13", ptr addrspace(3) @shared_mem, align 8 + %"14" = load i64, ptr addrspace(3) @shared_mem, align 8 + store i64 %"14", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"22" = inttoptr i64 %"15" to ptr addrspace(1) + store i64 %"16", ptr addrspace(1) %"22", align 8 ret void } diff --git a/ptx/src/test/spirv_run/extern_shared_call.ll b/ptx/src/test/spirv_run/extern_shared_call.ll index 241053f..cdd37be 100644 --- a/ptx/src/test/spirv_run/extern_shared_call.ll +++ b/ptx/src/test/spirv_run/extern_shared_call.ll @@ -3,49 +3,45 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i32], align 4 -define private void @"2"(ptr addrspace(3) %"37") #0 { -"35": +define private void @"2"(ptr addrspace(3) %"35") #0 { +"33": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"3" = alloca i64, align 8, addrspace(5) - %"14" = load i64, ptr addrspace(3) %"37", align 8 - store i64 %"14", ptr addrspace(5) %"3", align 8 - %"16" = load i64, ptr addrspace(5) %"3", align 8 - %"15" = add i64 %"16", 2 - store i64 %"15", ptr addrspace(5) %"3", align 8 - %"17" = load i64, ptr addrspace(5) %"3", align 8 - store i64 %"17", ptr addrspace(3) %"37", align 8 + %"12" = load i64, ptr addrspace(3) %"35", align 8 + store i64 %"12", ptr addrspace(5) %"3", align 8 + %"14" = load i64, ptr addrspace(5) %"3", align 8 + %"13" = add i64 %"14", 2 + store i64 %"13", ptr addrspace(5) %"3", align 8 + %"15" = load i64, ptr addrspace(5) %"3", align 8 + store i64 %"15", ptr addrspace(3) %"35", align 8 ret void } -define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"36": - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 - %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 +define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { +"34": + %"11" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"11", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) - %"18" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"18", ptr addrspace(5) %"7", align 8 - %"19" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"19", ptr addrspace(5) %"8", align 8 - %"21" = load i64, ptr addrspace(5) %"7", align 8 - %"31" = inttoptr i64 %"21" to ptr addrspace(1) - %"20" = load i64, ptr addrspace(1) %"31", align 8 - store i64 %"20", ptr addrspace(5) %"9", align 8 - %"22" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"22", ptr addrspace(3) @shared_mem, align 8 + %"16" = load i64, ptr addrspace(4) %"25", align 8 + store i64 %"16", ptr addrspace(5) %"7", align 8 + %"17" = load i64, ptr addrspace(4) %"26", align 8 + store i64 %"17", ptr addrspace(5) %"8", align 8 + %"19" = load i64, ptr addrspace(5) %"7", align 8 + %"29" = inttoptr i64 %"19" to ptr addrspace(1) + %"18" = load i64, ptr addrspace(1) %"29", align 8 + store i64 %"18", ptr addrspace(5) %"9", align 8 + %"20" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"20", ptr addrspace(3) @shared_mem, align 8 call void @"2"(ptr addrspace(3) @shared_mem) - %"23" = load i64, ptr addrspace(3) @shared_mem, align 8 - store i64 %"23", ptr addrspace(5) %"9", align 8 - %"24" = load i64, ptr addrspace(5) %"8", align 8 - %"25" = load i64, ptr addrspace(5) %"9", align 8 - %"34" = inttoptr i64 %"24" to ptr addrspace(1) - store i64 %"25", ptr addrspace(1) %"34", align 8 + %"21" = load i64, ptr addrspace(3) @shared_mem, align 8 + store i64 %"21", ptr addrspace(5) %"9", align 8 + %"22" = load i64, ptr addrspace(5) %"8", align 8 + %"23" = load i64, ptr addrspace(5) %"9", align 8 + %"32" = inttoptr i64 %"22" to ptr addrspace(1) + store i64 %"23", ptr addrspace(1) %"32", align 8 ret void } diff --git a/ptx/src/test/spirv_run/fma.ll b/ptx/src/test/spirv_run/fma.ll index d518432..1dff2b8 100644 --- a/ptx/src/test/spirv_run/fma.ll +++ b/ptx/src/test/spirv_run/fma.ll @@ -1,44 +1,42 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"35": +define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { +"34": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load float, ptr %"31", align 4 - store float %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"37" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load float, ptr %"37", align 4 - store float %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"18" to ptr - %"39" = getelementptr inbounds i8, ptr %"33", i64 8 - %"17" = load float, ptr %"39", align 4 - store float %"17", ptr addrspace(5) %"8", align 4 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"21" = load float, ptr addrspace(5) %"7", align 4 - %"22" = load float, ptr addrspace(5) %"8", align 4 - %"19" = call float @llvm.fma.f32(float %"20", float %"21", float %"22") - store float %"19", ptr addrspace(5) %"6", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load float, ptr addrspace(5) %"6", align 4 - %"34" = inttoptr i64 %"23" to ptr - store float %"24", ptr %"34", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load float, ptr %"30", align 4 + store float %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"36" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load float, ptr %"36", align 4 + store float %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"17" to ptr + %"38" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load float, ptr %"38", align 4 + store float %"16", ptr addrspace(5) %"8", align 4 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"20" = load float, ptr addrspace(5) %"7", align 4 + %"21" = load float, ptr addrspace(5) %"8", align 4 + %"18" = call float @llvm.fma.f32(float %"19", float %"20", float %"21") + store float %"18", ptr addrspace(5) %"6", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load float, ptr addrspace(5) %"6", align 4 + %"33" = inttoptr i64 %"22" to ptr + store float %"23", ptr %"33", align 4 ret void } diff --git a/ptx/src/test/spirv_run/func_ptr.ll b/ptx/src/test/spirv_run/func_ptr.ll index b7c0603..1160a76 100644 --- a/ptx/src/test/spirv_run/func_ptr.ll +++ b/ptx/src/test/spirv_run/func_ptr.ll @@ -1,56 +1,52 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private float @"1"(float %"17", float %"18") #0 { -"40": +define private float @"1"(float %"15", float %"16") #0 { +"38": %"3" = alloca float, align 4, addrspace(5) %"4" = alloca float, align 4, addrspace(5) %"2" = alloca float, align 4, addrspace(5) %"13" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"13", align 1 - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 - store float %"17", ptr addrspace(5) %"3", align 4 - store float %"18", ptr addrspace(5) %"4", align 4 - %"20" = load float, ptr addrspace(5) %"3", align 4 - %"21" = load float, ptr addrspace(5) %"4", align 4 - %"19" = fadd float %"20", %"21" - store float %"19", ptr addrspace(5) %"2", align 4 - %"22" = load float, ptr addrspace(5) %"2", align 4 - ret float %"22" + store float %"15", ptr addrspace(5) %"3", align 4 + store float %"16", ptr addrspace(5) %"4", align 4 + %"18" = load float, ptr addrspace(5) %"3", align 4 + %"19" = load float, ptr addrspace(5) %"4", align 4 + %"17" = fadd float %"18", %"19" + store float %"17", ptr addrspace(5) %"2", align 4 + %"20" = load float, ptr addrspace(5) %"2", align 4 + ret float %"20" } -define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { -"41": - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 - %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 +define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"39": + %"14" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"14", align 1 %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) - %"23" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"23", ptr addrspace(5) %"8", align 8 - %"24" = load i64, ptr addrspace(4) %"37", align 8 - store i64 %"24", ptr addrspace(5) %"9", align 8 - %"26" = load i64, ptr addrspace(5) %"8", align 8 - %"38" = inttoptr i64 %"26" to ptr - %"25" = load i64, ptr %"38", align 8 - store i64 %"25", ptr addrspace(5) %"10", align 8 - %"28" = load i64, ptr addrspace(5) %"10", align 8 - %"27" = add i64 %"28", 1 - store i64 %"27", ptr addrspace(5) %"11", align 8 + %"21" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"21", ptr addrspace(5) %"8", align 8 + %"22" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"22", ptr addrspace(5) %"9", align 8 + %"24" = load i64, ptr addrspace(5) %"8", align 8 + %"36" = inttoptr i64 %"24" to ptr + %"23" = load i64, ptr %"36", align 8 + store i64 %"23", ptr addrspace(5) %"10", align 8 + %"26" = load i64, ptr addrspace(5) %"10", align 8 + %"25" = add i64 %"26", 1 + store i64 %"25", ptr addrspace(5) %"11", align 8 store i64 ptrtoint (ptr @"1" to i64), ptr addrspace(5) %"12", align 8 - %"31" = load i64, ptr addrspace(5) %"11", align 8 - %"32" = load i64, ptr addrspace(5) %"12", align 8 - %"30" = add i64 %"31", %"32" - store i64 %"30", ptr addrspace(5) %"11", align 8 - %"33" = load i64, ptr addrspace(5) %"9", align 8 - %"34" = load i64, ptr addrspace(5) %"11", align 8 - %"39" = inttoptr i64 %"33" to ptr - store i64 %"34", ptr %"39", align 8 + %"29" = load i64, ptr addrspace(5) %"11", align 8 + %"30" = load i64, ptr addrspace(5) %"12", align 8 + %"28" = add i64 %"29", %"30" + store i64 %"28", ptr addrspace(5) %"11", align 8 + %"31" = load i64, ptr addrspace(5) %"9", align 8 + %"32" = load i64, ptr addrspace(5) %"11", align 8 + %"37" = inttoptr i64 %"31" to ptr + store i64 %"32", ptr %"37", align 8 ret void } diff --git a/ptx/src/test/spirv_run/generic.ll b/ptx/src/test/spirv_run/generic.ll index d746a22..312a7cd 100644 --- a/ptx/src/test/spirv_run/generic.ll +++ b/ptx/src/test/spirv_run/generic.ll @@ -4,66 +4,64 @@ target triple = "amdgcn-amd-amdhsa" @foo = protected addrspace(1) externally_initialized global [4 x i32] [i32 2, i32 3, i32 5, i32 7] @bar = protected addrspace(1) externally_initialized global [4 x i64] [i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 4), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 8), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 12)] -define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { -"58": +define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { +"57": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) - %"12" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"12", ptr addrspace(5) %"7", align 8 + %"11" = load i64, ptr addrspace(4) %"47", align 8 + store i64 %"11", ptr addrspace(5) %"7", align 8 %0 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %0, align 4 - %"13" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"13", ptr addrspace(5) %"8", align 4 - %"14" = load i64, ptr addrspace(1) @bar, align 8 - store i64 %"14", ptr addrspace(5) %"6", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"50" = inttoptr i64 %"16" to ptr - %"15" = load i32, ptr %"50", align 4 - store i32 %"15", ptr addrspace(5) %"9", align 4 - %"18" = load i32, ptr addrspace(5) %"8", align 4 - %"19" = load i32, ptr addrspace(5) %"9", align 4 - %"17" = mul i32 %"18", %"19" - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"20" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 8), align 8 - store i64 %"20", ptr addrspace(5) %"6", align 8 - %"22" = load i64, ptr addrspace(5) %"6", align 8 - %"52" = inttoptr i64 %"22" to ptr - %"21" = load i32, ptr %"52", align 4 - store i32 %"21", ptr addrspace(5) %"9", align 4 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"25" = load i32, ptr addrspace(5) %"9", align 4 - %"23" = mul i32 %"24", %"25" - store i32 %"23", ptr addrspace(5) %"8", align 4 - %"26" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 16), align 8 - store i64 %"26", ptr addrspace(5) %"6", align 8 - %"28" = load i64, ptr addrspace(5) %"6", align 8 - %"54" = inttoptr i64 %"28" to ptr - %"27" = load i32, ptr %"54", align 4 - store i32 %"27", ptr addrspace(5) %"9", align 4 - %"30" = load i32, ptr addrspace(5) %"8", align 4 - %"31" = load i32, ptr addrspace(5) %"9", align 4 - %"29" = mul i32 %"30", %"31" - store i32 %"29", ptr addrspace(5) %"8", align 4 - %"32" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 24), align 8 - store i64 %"32", ptr addrspace(5) %"6", align 8 - %"34" = load i64, ptr addrspace(5) %"6", align 8 - %"56" = inttoptr i64 %"34" to ptr - %"33" = load i32, ptr %"56", align 4 - store i32 %"33", ptr addrspace(5) %"9", align 4 - %"36" = load i32, ptr addrspace(5) %"8", align 4 - %"37" = load i32, ptr addrspace(5) %"9", align 4 - %"35" = mul i32 %"36", %"37" - store i32 %"35", ptr addrspace(5) %"8", align 4 - %"38" = load i64, ptr addrspace(5) %"7", align 8 - %"39" = load i32, ptr addrspace(5) %"8", align 4 - %"57" = inttoptr i64 %"38" to ptr - store i32 %"39", ptr %"57", align 4 + %"12" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"12", ptr addrspace(5) %"8", align 4 + %"13" = load i64, ptr addrspace(1) @bar, align 8 + store i64 %"13", ptr addrspace(5) %"6", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"49" = inttoptr i64 %"15" to ptr + %"14" = load i32, ptr %"49", align 4 + store i32 %"14", ptr addrspace(5) %"9", align 4 + %"17" = load i32, ptr addrspace(5) %"8", align 4 + %"18" = load i32, ptr addrspace(5) %"9", align 4 + %"16" = mul i32 %"17", %"18" + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"19" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 8), align 8 + store i64 %"19", ptr addrspace(5) %"6", align 8 + %"21" = load i64, ptr addrspace(5) %"6", align 8 + %"51" = inttoptr i64 %"21" to ptr + %"20" = load i32, ptr %"51", align 4 + store i32 %"20", ptr addrspace(5) %"9", align 4 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"24" = load i32, ptr addrspace(5) %"9", align 4 + %"22" = mul i32 %"23", %"24" + store i32 %"22", ptr addrspace(5) %"8", align 4 + %"25" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 16), align 8 + store i64 %"25", ptr addrspace(5) %"6", align 8 + %"27" = load i64, ptr addrspace(5) %"6", align 8 + %"53" = inttoptr i64 %"27" to ptr + %"26" = load i32, ptr %"53", align 4 + store i32 %"26", ptr addrspace(5) %"9", align 4 + %"29" = load i32, ptr addrspace(5) %"8", align 4 + %"30" = load i32, ptr addrspace(5) %"9", align 4 + %"28" = mul i32 %"29", %"30" + store i32 %"28", ptr addrspace(5) %"8", align 4 + %"31" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 24), align 8 + store i64 %"31", ptr addrspace(5) %"6", align 8 + %"33" = load i64, ptr addrspace(5) %"6", align 8 + %"55" = inttoptr i64 %"33" to ptr + %"32" = load i32, ptr %"55", align 4 + store i32 %"32", ptr addrspace(5) %"9", align 4 + %"35" = load i32, ptr addrspace(5) %"8", align 4 + %"36" = load i32, ptr addrspace(5) %"9", align 4 + %"34" = mul i32 %"35", %"36" + store i32 %"34", ptr addrspace(5) %"8", align 4 + %"37" = load i64, ptr addrspace(5) %"7", align 8 + %"38" = load i32, ptr addrspace(5) %"8", align 4 + %"56" = inttoptr i64 %"37" to ptr + store i32 %"38", ptr %"56", align 4 ret void } diff --git a/ptx/src/test/spirv_run/global_array.ll b/ptx/src/test/spirv_run/global_array.ll index 3a8da01..e2ad2f2 100644 --- a/ptx/src/test/spirv_run/global_array.ll +++ b/ptx/src/test/spirv_run/global_array.ll @@ -4,29 +4,27 @@ target triple = "amdgcn-amd-amdhsa" @asdas = protected addrspace(1) externally_initialized global [4 x [2 x i32]] [[2 x i32] [i32 -1, i32 2], [2 x i32] [i32 3, i32 0], [2 x i32] zeroinitializer, [2 x i32] zeroinitializer] @foobar = protected addrspace(1) externally_initialized global [4 x [2 x i64]] [[2 x i64] [i64 -1, i64 2], [2 x i64] [i64 3, i64 0], [2 x i64] [i64 ptrtoint (ptr addrspace(1) @asdas to i64), i64 0], [2 x i64] zeroinitializer] -define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"22": +define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"21": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %0 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %0, align 8 - %"11" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"12" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"12", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"10" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"11" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"11", ptr addrspace(5) %"7", align 8 + %"13" = load i64, ptr addrspace(5) %"6", align 8 + %"19" = inttoptr i64 %"13" to ptr addrspace(1) + %"12" = load i32, ptr addrspace(1) %"19", align 4 + store i32 %"12", ptr addrspace(5) %"8", align 4 + %"14" = load i64, ptr addrspace(5) %"7", align 8 + %"15" = load i32, ptr addrspace(5) %"8", align 4 %"20" = inttoptr i64 %"14" to ptr addrspace(1) - %"13" = load i32, ptr addrspace(1) %"20", align 4 - store i32 %"13", ptr addrspace(5) %"8", align 4 - %"15" = load i64, ptr addrspace(5) %"7", align 8 - %"16" = load i32, ptr addrspace(5) %"8", align 4 - %"21" = inttoptr i64 %"15" to ptr addrspace(1) - store i32 %"16", ptr addrspace(1) %"21", align 4 + store i32 %"15", ptr addrspace(1) %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/lanemask_lt.ll b/ptx/src/test/spirv_run/lanemask_lt.ll index d36d4a2..efa1746 100644 --- a/ptx/src/test/spirv_run/lanemask_lt.ll +++ b/ptx/src/test/spirv_run/lanemask_lt.ll @@ -3,41 +3,39 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__sreg_lanemask_lt() #0 -define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { -"40": +define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 { +"39": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"14" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"15", ptr addrspace(5) %"4", align 8 - %"16" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"18" to ptr - %"30" = load i32, ptr %"31", align 4 - store i32 %"30", ptr addrspace(5) %"6", align 4 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"32" = add i32 %"20", 1 - store i32 %"32", ptr addrspace(5) %"7", align 4 - %"12" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt() + store i64 %"15", ptr addrspace(5) %"5", align 8 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"17" to ptr + %"29" = load i32, ptr %"30", align 4 + store i32 %"29", ptr addrspace(5) %"6", align 4 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"31" = add i32 %"19", 1 + store i32 %"31", ptr addrspace(5) %"7", align 4 + %"11" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt() %0 = alloca i32, align 4, addrspace(5) - store i32 %"12", ptr addrspace(5) %0, align 4 - %"34" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"34", ptr addrspace(5) %"8", align 4 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"35" = add i32 %"23", %"24" - store i32 %"35", ptr addrspace(5) %"7", align 4 - %"25" = load i64, ptr addrspace(5) %"5", align 8 - %"26" = load i32, ptr addrspace(5) %"7", align 4 - %"38" = inttoptr i64 %"25" to ptr - store i32 %"26", ptr %"38", align 4 + store i32 %"11", ptr addrspace(5) %0, align 4 + %"33" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"33", ptr addrspace(5) %"8", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"34" = add i32 %"22", %"23" + store i32 %"34", ptr addrspace(5) %"7", align 4 + %"24" = load i64, ptr addrspace(5) %"5", align 8 + %"25" = load i32, ptr addrspace(5) %"7", align 4 + %"37" = inttoptr i64 %"24" to ptr + store i32 %"25", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/ld_st.ll b/ptx/src/test/spirv_run/ld_st.ll index c8d6eb1..0fe06f2 100644 --- a/ptx/src/test/spirv_run/ld_st.ll +++ b/ptx/src/test/spirv_run/ld_st.ll @@ -1,27 +1,25 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"19": +define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { +"18": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"14", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"16" = inttoptr i64 %"11" to ptr + %"10" = load i64, ptr %"16", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"6", align 8 %"17" = inttoptr i64 %"12" to ptr - %"11" = load i64, ptr %"17", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = inttoptr i64 %"13" to ptr - store i64 %"14", ptr %"18", align 8 + store i64 %"13", ptr %"17", align 8 ret void } diff --git a/ptx/src/test/spirv_run/ld_st_implicit.ll b/ptx/src/test/spirv_run/ld_st_implicit.ll index da47ad8..3ec1474 100644 --- a/ptx/src/test/spirv_run/ld_st_implicit.ll +++ b/ptx/src/test/spirv_run/ld_st_implicit.ll @@ -1,35 +1,33 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": +define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"22": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %0 = alloca i64, align 8, addrspace(5) store i64 81985529216486895, ptr addrspace(5) %0, align 8 - %"11" = load i64, ptr addrspace(5) %0, align 8 + %"10" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"18" = load float, ptr addrspace(1) %"19", align 4 + %"23" = bitcast float %"18" to i32 + %"11" = zext i32 %"23" to i64 store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"19" = load float, ptr addrspace(1) %"20", align 4 - %"24" = bitcast float %"19" to i32 - %"12" = zext i32 %"24" to i64 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"21" = inttoptr i64 %"14" to ptr addrspace(1) - %"26" = trunc i64 %"15" to i32 - %"22" = bitcast i32 %"26" to float - store float %"22", ptr addrspace(1) %"21", align 4 + %"25" = trunc i64 %"14" to i32 + %"21" = bitcast i32 %"25" to float + store float %"21", ptr addrspace(1) %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/ld_st_offset.ll b/ptx/src/test/spirv_run/ld_st_offset.ll index 1b020cb..ee8bde6 100644 --- a/ptx/src/test/spirv_run/ld_st_offset.ll +++ b/ptx/src/test/spirv_run/ld_st_offset.ll @@ -1,38 +1,36 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"30": +define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"29": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"26", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"25", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"14" to ptr + %"31" = getelementptr inbounds i8, ptr %"26", i64 4 + %"13" = load i32, ptr %"31", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i32, ptr addrspace(5) %"7", align 4 %"27" = inttoptr i64 %"15" to ptr - %"32" = getelementptr inbounds i8, ptr %"27", i64 4 - %"14" = load i32, ptr %"32", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - %"28" = inttoptr i64 %"16" to ptr - store i32 %"17", ptr %"28", align 4 - %"18" = load i64, ptr addrspace(5) %"5", align 8 - %"19" = load i32, ptr addrspace(5) %"6", align 4 - %"29" = inttoptr i64 %"18" to ptr - %"34" = getelementptr inbounds i8, ptr %"29", i64 4 - store i32 %"19", ptr %"34", align 4 + store i32 %"16", ptr %"27", align 4 + %"17" = load i64, ptr addrspace(5) %"5", align 8 + %"18" = load i32, ptr addrspace(5) %"6", align 4 + %"28" = inttoptr i64 %"17" to ptr + %"33" = getelementptr inbounds i8, ptr %"28", i64 4 + store i32 %"18", ptr %"33", align 4 ret void } diff --git a/ptx/src/test/spirv_run/lg2.ll b/ptx/src/test/spirv_run/lg2.ll index 5e29fe2..7dd63d6 100644 --- a/ptx/src/test/spirv_run/lg2.ll +++ b/ptx/src/test/spirv_run/lg2.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.log2.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.log2.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/local_align.ll b/ptx/src/test/spirv_run/local_align.ll index 035d1f7..13fbe4b 100644 --- a/ptx/src/test/spirv_run/local_align.ll +++ b/ptx/src/test/spirv_run/local_align.ll @@ -1,28 +1,26 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": +define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { +"19": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca [8 x i8], align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"15", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"11" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"17" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"17", align 8 + store i64 %"11", ptr addrspace(5) %"7", align 8 + %"13" = load i64, ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"7", align 8 %"18" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"18", align 8 - store i64 %"12", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"7", align 8 - %"19" = inttoptr i64 %"14" to ptr - store i64 %"15", ptr %"19", align 8 + store i64 %"14", ptr %"18", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mad_hi_cc.ll b/ptx/src/test/spirv_run/mad_hi_cc.ll index a5b1595..6c86dbc 100644 --- a/ptx/src/test/spirv_run/mad_hi_cc.ll +++ b/ptx/src/test/spirv_run/mad_hi_cc.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"61", ptr addrspace(4) byref(i64) %"62") #0 { -"78": +define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60", ptr addrspace(4) byref(i64) %"61") #0 { +"77": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -17,69 +15,69 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"61" %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) %"13" = alloca i32, align 4, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"60", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"61", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"62", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"64" = inttoptr i64 %"19" to ptr - %"63" = load i32, ptr %"64", align 4 - store i32 %"63", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"65" = inttoptr i64 %"21" to ptr - %"80" = getelementptr inbounds i8, ptr %"65", i64 4 - %"66" = load i32, ptr %"80", align 4 - store i32 %"66", ptr addrspace(5) %"9", align 4 - %"23" = load i64, ptr addrspace(5) %"4", align 8 - %"67" = inttoptr i64 %"23" to ptr - %"82" = getelementptr inbounds i8, ptr %"67", i64 8 - %"22" = load i32, ptr %"82", align 4 - store i32 %"22", ptr addrspace(5) %"10", align 4 - %"26" = load i32, ptr addrspace(5) %"8", align 4 - %"27" = load i32, ptr addrspace(5) %"9", align 4 - %"28" = load i32, ptr addrspace(5) %"10", align 4 - %0 = sext i32 %"26" to i64 - %1 = sext i32 %"27" to i64 + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"63" = inttoptr i64 %"18" to ptr + %"62" = load i32, ptr %"63", align 4 + store i32 %"62", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"64" = inttoptr i64 %"20" to ptr + %"79" = getelementptr inbounds i8, ptr %"64", i64 4 + %"65" = load i32, ptr %"79", align 4 + store i32 %"65", ptr addrspace(5) %"9", align 4 + %"22" = load i64, ptr addrspace(5) %"4", align 8 + %"66" = inttoptr i64 %"22" to ptr + %"81" = getelementptr inbounds i8, ptr %"66", i64 8 + %"21" = load i32, ptr %"81", align 4 + store i32 %"21", ptr addrspace(5) %"10", align 4 + %"25" = load i32, ptr addrspace(5) %"8", align 4 + %"26" = load i32, ptr addrspace(5) %"9", align 4 + %"27" = load i32, ptr addrspace(5) %"10", align 4 + %0 = sext i32 %"25" to i64 + %1 = sext i32 %"26" to i64 %2 = mul nsw i64 %0, %1 %3 = lshr i64 %2, 32 %4 = trunc i64 %3 to i32 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"28") - %"24" = extractvalue { i32, i1 } %5, 0 - %"25" = extractvalue { i32, i1 } %5, 1 - store i32 %"24", ptr addrspace(5) %"7", align 4 - store i1 %"25", ptr addrspace(5) %"14", align 1 + %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"27") + %"23" = extractvalue { i32, i1 } %5, 0 + %"24" = extractvalue { i32, i1 } %5, 1 + store i32 %"23", ptr addrspace(5) %"7", align 4 + store i1 %"24", ptr addrspace(5) %"14", align 1 %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2) - %"29" = extractvalue { i32, i1 } %6, 0 - %"30" = extractvalue { i32, i1 } %6, 1 - store i32 %"29", ptr addrspace(5) %"6", align 4 - store i1 %"30", ptr addrspace(5) %"14", align 1 - %"32" = load i1, ptr addrspace(5) %"14", align 1 - %7 = zext i1 %"32" to i32 - %"71" = add i32 0, %7 - store i32 %"71", ptr addrspace(5) %"12", align 4 + %"28" = extractvalue { i32, i1 } %6, 0 + %"29" = extractvalue { i32, i1 } %6, 1 + store i32 %"28", ptr addrspace(5) %"6", align 4 + store i1 %"29", ptr addrspace(5) %"14", align 1 + %"31" = load i1, ptr addrspace(5) %"14", align 1 + %7 = zext i1 %"31" to i32 + %"70" = add i32 0, %7 + store i32 %"70", ptr addrspace(5) %"12", align 4 %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) - %"33" = extractvalue { i32, i1 } %8, 0 - %"34" = extractvalue { i32, i1 } %8, 1 - store i32 %"33", ptr addrspace(5) %"6", align 4 - store i1 %"34", ptr addrspace(5) %"14", align 1 - %"36" = load i1, ptr addrspace(5) %"14", align 1 - %9 = zext i1 %"36" to i32 - %"72" = add i32 0, %9 - store i32 %"72", ptr addrspace(5) %"13", align 4 - %"37" = load i64, ptr addrspace(5) %"5", align 8 - %"38" = load i32, ptr addrspace(5) %"7", align 4 - %"73" = inttoptr i64 %"37" to ptr - store i32 %"38", ptr %"73", align 4 - %"39" = load i64, ptr addrspace(5) %"5", align 8 - %"40" = load i32, ptr addrspace(5) %"12", align 4 - %"74" = inttoptr i64 %"39" to ptr - %"84" = getelementptr inbounds i8, ptr %"74", i64 4 - store i32 %"40", ptr %"84", align 4 - %"41" = load i64, ptr addrspace(5) %"5", align 8 - %"42" = load i32, ptr addrspace(5) %"13", align 4 - %"76" = inttoptr i64 %"41" to ptr - %"86" = getelementptr inbounds i8, ptr %"76", i64 8 - store i32 %"42", ptr %"86", align 4 + %"32" = extractvalue { i32, i1 } %8, 0 + %"33" = extractvalue { i32, i1 } %8, 1 + store i32 %"32", ptr addrspace(5) %"6", align 4 + store i1 %"33", ptr addrspace(5) %"14", align 1 + %"35" = load i1, ptr addrspace(5) %"14", align 1 + %9 = zext i1 %"35" to i32 + %"71" = add i32 0, %9 + store i32 %"71", ptr addrspace(5) %"13", align 4 + %"36" = load i64, ptr addrspace(5) %"5", align 8 + %"37" = load i32, ptr addrspace(5) %"7", align 4 + %"72" = inttoptr i64 %"36" to ptr + store i32 %"37", ptr %"72", align 4 + %"38" = load i64, ptr addrspace(5) %"5", align 8 + %"39" = load i32, ptr addrspace(5) %"12", align 4 + %"73" = inttoptr i64 %"38" to ptr + %"83" = getelementptr inbounds i8, ptr %"73", i64 4 + store i32 %"39", ptr %"83", align 4 + %"40" = load i64, ptr addrspace(5) %"5", align 8 + %"41" = load i32, ptr addrspace(5) %"13", align 4 + %"75" = inttoptr i64 %"40" to ptr + %"85" = getelementptr inbounds i8, ptr %"75", i64 8 + store i32 %"41", ptr %"85", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mad_s32.ll b/ptx/src/test/spirv_run/mad_s32.ll index 75a204a..5ab86ad 100644 --- a/ptx/src/test/spirv_run/mad_s32.ll +++ b/ptx/src/test/spirv_run/mad_s32.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 { -"76": +define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 { +"75": %"13" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"13", align 1 - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -16,67 +14,67 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"53", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) + %"14" = load i64, ptr addrspace(4) %"52", align 8 + store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"53", align 8 - store i64 %"15", ptr addrspace(5) %"4", align 8 - %"16" = load i64, ptr addrspace(4) %"54", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"56" = inttoptr i64 %"18" to ptr - %"55" = load i32, ptr %"56", align 4 - store i32 %"55", ptr addrspace(5) %"9", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"57" = inttoptr i64 %"20" to ptr - %"78" = getelementptr inbounds i8, ptr %"57", i64 4 - %"58" = load i32, ptr %"78", align 4 - store i32 %"58", ptr addrspace(5) %"10", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"59" = inttoptr i64 %"22" to ptr - %"80" = getelementptr inbounds i8, ptr %"59", i64 8 - %"21" = load i64, ptr %"80", align 8 - store i64 %"21", ptr addrspace(5) %"12", align 8 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"60" = inttoptr i64 %"24" to ptr - %"82" = getelementptr inbounds i8, ptr %"60", i64 16 - %"61" = load i32, ptr %"82", align 4 - store i32 %"61", ptr addrspace(5) %"11", align 4 - %"26" = load i32, ptr addrspace(5) %"9", align 4 - %"27" = load i32, ptr addrspace(5) %"10", align 4 - %"28" = load i32, ptr addrspace(5) %"11", align 4 - %0 = mul i32 %"26", %"27" - %"25" = add i32 %0, %"28" - store i32 %"25", ptr addrspace(5) %"6", align 4 - %"30" = load i32, ptr addrspace(5) %"9", align 4 - %"31" = load i32, ptr addrspace(5) %"10", align 4 - %"32" = load i32, ptr addrspace(5) %"11", align 4 - %1 = sext i32 %"30" to i64 - %2 = sext i32 %"31" to i64 + store i64 %"15", ptr addrspace(5) %"5", align 8 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"55" = inttoptr i64 %"17" to ptr + %"54" = load i32, ptr %"55", align 4 + store i32 %"54", ptr addrspace(5) %"9", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"56" = inttoptr i64 %"19" to ptr + %"77" = getelementptr inbounds i8, ptr %"56", i64 4 + %"57" = load i32, ptr %"77", align 4 + store i32 %"57", ptr addrspace(5) %"10", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"58" = inttoptr i64 %"21" to ptr + %"79" = getelementptr inbounds i8, ptr %"58", i64 8 + %"20" = load i64, ptr %"79", align 8 + store i64 %"20", ptr addrspace(5) %"12", align 8 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"59" = inttoptr i64 %"23" to ptr + %"81" = getelementptr inbounds i8, ptr %"59", i64 16 + %"60" = load i32, ptr %"81", align 4 + store i32 %"60", ptr addrspace(5) %"11", align 4 + %"25" = load i32, ptr addrspace(5) %"9", align 4 + %"26" = load i32, ptr addrspace(5) %"10", align 4 + %"27" = load i32, ptr addrspace(5) %"11", align 4 + %0 = mul i32 %"25", %"26" + %"24" = add i32 %0, %"27" + store i32 %"24", ptr addrspace(5) %"6", align 4 + %"29" = load i32, ptr addrspace(5) %"9", align 4 + %"30" = load i32, ptr addrspace(5) %"10", align 4 + %"31" = load i32, ptr addrspace(5) %"11", align 4 + %1 = sext i32 %"29" to i64 + %2 = sext i32 %"30" to i64 %3 = mul nsw i64 %1, %2 %4 = lshr i64 %3, 32 %5 = trunc i64 %4 to i32 - %"29" = add i32 %5, %"32" - store i32 %"29", ptr addrspace(5) %"7", align 4 - %"34" = load i32, ptr addrspace(5) %"9", align 4 - %"35" = load i32, ptr addrspace(5) %"10", align 4 - %"36" = load i64, ptr addrspace(5) %"12", align 8 - %6 = sext i32 %"34" to i64 - %7 = sext i32 %"35" to i64 + %"28" = add i32 %5, %"31" + store i32 %"28", ptr addrspace(5) %"7", align 4 + %"33" = load i32, ptr addrspace(5) %"9", align 4 + %"34" = load i32, ptr addrspace(5) %"10", align 4 + %"35" = load i64, ptr addrspace(5) %"12", align 8 + %6 = sext i32 %"33" to i64 + %7 = sext i32 %"34" to i64 %8 = mul nsw i64 %6, %7 - %"68" = add i64 %8, %"36" - store i64 %"68", ptr addrspace(5) %"8", align 8 - %"37" = load i64, ptr addrspace(5) %"5", align 8 - %"38" = load i32, ptr addrspace(5) %"6", align 4 - %"72" = inttoptr i64 %"37" to ptr - store i32 %"38", ptr %"72", align 4 - %"39" = load i64, ptr addrspace(5) %"5", align 8 - %"40" = load i32, ptr addrspace(5) %"7", align 4 - %"73" = inttoptr i64 %"39" to ptr - %"84" = getelementptr inbounds i8, ptr %"73", i64 8 - store i32 %"40", ptr %"84", align 4 - %"41" = load i64, ptr addrspace(5) %"5", align 8 - %"42" = load i64, ptr addrspace(5) %"8", align 8 - %"74" = inttoptr i64 %"41" to ptr - %"86" = getelementptr inbounds i8, ptr %"74", i64 16 - store i64 %"42", ptr %"86", align 8 + %"67" = add i64 %8, %"35" + store i64 %"67", ptr addrspace(5) %"8", align 8 + %"36" = load i64, ptr addrspace(5) %"5", align 8 + %"37" = load i32, ptr addrspace(5) %"6", align 4 + %"71" = inttoptr i64 %"36" to ptr + store i32 %"37", ptr %"71", align 4 + %"38" = load i64, ptr addrspace(5) %"5", align 8 + %"39" = load i32, ptr addrspace(5) %"7", align 4 + %"72" = inttoptr i64 %"38" to ptr + %"83" = getelementptr inbounds i8, ptr %"72", i64 8 + store i32 %"39", ptr %"83", align 4 + %"40" = load i64, ptr addrspace(5) %"5", align 8 + %"41" = load i64, ptr addrspace(5) %"8", align 8 + %"73" = inttoptr i64 %"40" to ptr + %"85" = getelementptr inbounds i8, ptr %"73", i64 16 + store i64 %"41", ptr %"85", align 8 ret void } diff --git a/ptx/src/test/spirv_run/madc_cc.ll b/ptx/src/test/spirv_run/madc_cc.ll index 626149c..136f320 100644 --- a/ptx/src/test/spirv_run/madc_cc.ll +++ b/ptx/src/test/spirv_run/madc_cc.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { -"55": +define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { +"54": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,54 +12,54 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"41", %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"13", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"44" = inttoptr i64 %"16" to ptr - %"43" = load i32, ptr %"44", align 4 - store i32 %"43", ptr addrspace(5) %"8", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"45" = inttoptr i64 %"18" to ptr - %"57" = getelementptr inbounds i8, ptr %"45", i64 4 - %"46" = load i32, ptr %"57", align 4 - store i32 %"46", ptr addrspace(5) %"9", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"47" = inttoptr i64 %"20" to ptr - %"59" = getelementptr inbounds i8, ptr %"47", i64 8 - %"19" = load i32, ptr %"59", align 4 - store i32 %"19", ptr addrspace(5) %"10", align 4 - %"23" = load i32, ptr addrspace(5) %"8", align 4 - %"24" = load i32, ptr addrspace(5) %"9", align 4 - %"25" = load i32, ptr addrspace(5) %"10", align 4 - %0 = mul i32 %"23", %"24" - %1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"25") - %"21" = extractvalue { i32, i1 } %1, 0 - %"22" = extractvalue { i32, i1 } %1, 1 - store i32 %"21", ptr addrspace(5) %"6", align 4 - store i1 %"22", ptr addrspace(5) %"11", align 1 - %"27" = load i1, ptr addrspace(5) %"11", align 1 - %"28" = load i32, ptr addrspace(5) %"8", align 4 - %"29" = load i32, ptr addrspace(5) %"9", align 4 - %2 = sext i32 %"28" to i64 - %3 = sext i32 %"29" to i64 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"43" = inttoptr i64 %"15" to ptr + %"42" = load i32, ptr %"43", align 4 + store i32 %"42", ptr addrspace(5) %"8", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"17" to ptr + %"56" = getelementptr inbounds i8, ptr %"44", i64 4 + %"45" = load i32, ptr %"56", align 4 + store i32 %"45", ptr addrspace(5) %"9", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"46" = inttoptr i64 %"19" to ptr + %"58" = getelementptr inbounds i8, ptr %"46", i64 8 + %"18" = load i32, ptr %"58", align 4 + store i32 %"18", ptr addrspace(5) %"10", align 4 + %"22" = load i32, ptr addrspace(5) %"8", align 4 + %"23" = load i32, ptr addrspace(5) %"9", align 4 + %"24" = load i32, ptr addrspace(5) %"10", align 4 + %0 = mul i32 %"22", %"23" + %1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"24") + %"20" = extractvalue { i32, i1 } %1, 0 + %"21" = extractvalue { i32, i1 } %1, 1 + store i32 %"20", ptr addrspace(5) %"6", align 4 + store i1 %"21", ptr addrspace(5) %"11", align 1 + %"26" = load i1, ptr addrspace(5) %"11", align 1 + %"27" = load i32, ptr addrspace(5) %"8", align 4 + %"28" = load i32, ptr addrspace(5) %"9", align 4 + %2 = sext i32 %"27" to i64 + %3 = sext i32 %"28" to i64 %4 = mul nsw i64 %2, %3 %5 = lshr i64 %4, 32 %6 = trunc i64 %5 to i32 - %7 = zext i1 %"27" to i32 + %7 = zext i1 %"26" to i32 %8 = add i32 %6, 3 - %"26" = add i32 %8, %7 - store i32 %"26", ptr addrspace(5) %"7", align 4 - %"30" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = load i32, ptr addrspace(5) %"6", align 4 - %"53" = inttoptr i64 %"30" to ptr - store i32 %"31", ptr %"53", align 4 - %"32" = load i64, ptr addrspace(5) %"5", align 8 - %"33" = load i32, ptr addrspace(5) %"7", align 4 - %"54" = inttoptr i64 %"32" to ptr - %"61" = getelementptr inbounds i8, ptr %"54", i64 4 - store i32 %"33", ptr %"61", align 4 + %"25" = add i32 %8, %7 + store i32 %"25", ptr addrspace(5) %"7", align 4 + %"29" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = load i32, ptr addrspace(5) %"6", align 4 + %"52" = inttoptr i64 %"29" to ptr + store i32 %"30", ptr %"52", align 4 + %"31" = load i64, ptr addrspace(5) %"5", align 8 + %"32" = load i32, ptr addrspace(5) %"7", align 4 + %"53" = inttoptr i64 %"31" to ptr + %"60" = getelementptr inbounds i8, ptr %"53", i64 4 + store i32 %"32", ptr %"60", align 4 ret void } diff --git a/ptx/src/test/spirv_run/madc_cc2.ll b/ptx/src/test/spirv_run/madc_cc2.ll deleted file mode 100644 index bea7193..0000000 --- a/ptx/src/test/spirv_run/madc_cc2.ll +++ /dev/null @@ -1,73 +0,0 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -target triple = "amdgcn-amd-amdhsa" - -define protected amdgpu_kernel void @madc_cc2(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 { -"66": - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 - %"4" = alloca i64, align 8, addrspace(5) - %"5" = alloca i64, align 8, addrspace(5) - %"6" = alloca i32, align 4, addrspace(5) - %"7" = alloca i32, align 4, addrspace(5) - %"8" = alloca i32, align 4, addrspace(5) - %"9" = alloca i32, align 4, addrspace(5) - %"10" = alloca i32, align 4, addrspace(5) - %"13" = load i64, ptr addrspace(4) %"53", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1) - %"14" = extractvalue { i32, i1 } %0, 0 - %"15" = extractvalue { i32, i1 } %0, 1 - store i32 %"14", ptr addrspace(5) %"6", align 4 - store i1 %"15", ptr addrspace(5) %"11", align 1 - %"18" = load i1, ptr addrspace(5) %"11", align 1 - %1 = zext i1 %"18" to i32 - %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"54" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"17" = xor i1 %4, %6 - store i32 %"54", ptr addrspace(5) %"7", align 4 - store i1 %"17", ptr addrspace(5) %"11", align 1 - %"20" = load i1, ptr addrspace(5) %"11", align 1 - %7 = zext i1 %"20" to i32 - %"55" = add i32 0, %7 - store i32 %"55", ptr addrspace(5) %"8", align 4 - %"22" = load i1, ptr addrspace(5) %"11", align 1 - %8 = zext i1 %"22" to i32 - %"56" = add i32 0, %8 - store i32 %"56", ptr addrspace(5) %"9", align 4 - %"24" = load i1, ptr addrspace(5) %"12", align 1 - %9 = zext i1 %"24" to i32 - %"57" = sub i32 2, %9 - store i32 %"57", ptr addrspace(5) %"10", align 4 - %"25" = load i64, ptr addrspace(5) %"5", align 8 - %"26" = load i32, ptr addrspace(5) %"7", align 4 - %"58" = inttoptr i64 %"25" to ptr - store i32 %"26", ptr %"58", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load i32, ptr addrspace(5) %"8", align 4 - %"60" = inttoptr i64 %"27" to ptr - %"68" = getelementptr inbounds i8, ptr %"60", i64 4 - store i32 %"28", ptr %"68", align 4 - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load i32, ptr addrspace(5) %"9", align 4 - %"62" = inttoptr i64 %"29" to ptr - %"70" = getelementptr inbounds i8, ptr %"62", i64 8 - store i32 %"30", ptr %"70", align 4 - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load i32, ptr addrspace(5) %"10", align 4 - %"64" = inttoptr i64 %"31" to ptr - %"72" = getelementptr inbounds i8, ptr %"64", i64 12 - store i32 %"32", ptr %"72", align 4 - ret void -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 - -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/madc_cc2.ptx b/ptx/src/test/spirv_run/madc_cc2.ptx deleted file mode 100644 index 163c39b..0000000 --- a/ptx/src/test/spirv_run/madc_cc2.ptx +++ /dev/null @@ -1,38 +0,0 @@ -.version 6.5 -.target sm_30 -.address_size 64 - -.visible .entry madc_cc2( - .param .u64 input, - .param .u64 output -) -{ - .reg .u64 in_addr; - .reg .u64 out_addr; - .reg .u32 unused; - - .reg .b32 result_1; - .reg .b32 carry_out_1_1; - .reg .b32 carry_out_1_2; - .reg .b32 carry_out_1_3; - - ld.param.u64 out_addr, [output]; - - // set carry=1 - mad.lo.cc.u32 unused, 0, 0, 4294967295; - // overflow addition - madc.lo.cc.u32 result_1, 1, 1, 4294967295; - // write carry - madc.lo.u32 carry_out_1_1, 0, 0, 0; - // overflow is also detected by addc - addc.u32 carry_out_1_2, 0, 0; - // but not subc - subc.u32 carry_out_1_3, 2, 0; - - st.s32 [out_addr], result_1; - st.s32 [out_addr+4], carry_out_1_1; - st.s32 [out_addr+8], carry_out_1_2; - st.s32 [out_addr+12], carry_out_1_3; - - ret; -} diff --git a/ptx/src/test/spirv_run/max.ll b/ptx/src/test/spirv_run/max.ll index 79b6f48..6dcc74d 100644 --- a/ptx/src/test/spirv_run/max.ll +++ b/ptx/src/test/spirv_run/max.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"30", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = call i32 @llvm.smax.i32(i32 %"17", i32 %"18") - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = call i32 @llvm.smax.i32(i32 %"16", i32 %"17") + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/membar.ll b/ptx/src/test/spirv_run/membar.ll index c9ec8b9..78f60c8 100644 --- a/ptx/src/test/spirv_run/membar.ll +++ b/ptx/src/test/spirv_run/membar.ll @@ -1,28 +1,26 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"20": +define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { +"19": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"14", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"18" = inttoptr i64 %"12" to ptr - %"17" = load i32, ptr %"18", align 4 - store i32 %"17", ptr addrspace(5) %"6", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"17" = inttoptr i64 %"11" to ptr + %"16" = load i32, ptr %"17", align 4 + store i32 %"16", ptr addrspace(5) %"6", align 4 fence seq_cst - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"19" = inttoptr i64 %"13" to ptr - store i32 %"14", ptr %"19", align 4 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"18" = inttoptr i64 %"12" to ptr + store i32 %"13", ptr %"18", align 4 ret void } diff --git a/ptx/src/test/spirv_run/min.ll b/ptx/src/test/spirv_run/min.ll index 0828070..58cb36a 100644 --- a/ptx/src/test/spirv_run/min.ll +++ b/ptx/src/test/spirv_run/min.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"30", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = call i32 @llvm.smin.i32(i32 %"17", i32 %"18") - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = call i32 @llvm.smin.i32(i32 %"16", i32 %"17") + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 8f229c9..1ec030b 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -271,7 +271,11 @@ test_ptx!(const, [0u16], [10u16, 20, 30, 40]); test_ptx!(cvt_s16_s8, [0x139231C2u32], [0xFFFFFFC2u32]); test_ptx!(cvt_f64_f32, [0.125f32], [0.125f64]); test_ptx!(cvt_f32_f16, [0xa1u16], [0x37210000u32]); -test_ptx!(prmt, [0x70c507d6u32, 0x6fbd4b5cu32], [0x6fbdd65cu32, 0x6FFFD600]); +test_ptx!( + prmt, + [0x70c507d6u32, 0x6fbd4b5cu32], + [0x6fbdd65cu32, 0x6FFFD600] +); test_ptx!( prmt_non_immediate, [0x70c507d6u32, 0x6fbd4b5cu32], @@ -289,8 +293,11 @@ test_ptx!( [65521u32, 2147549199, 0x1000], [2147487519u32, 4294934539] ); -test_ptx!(madc_cc2, [0xDEADu32], [0u32, 1, 1, 2]); -test_ptx!(mad_hi_cc, [0x26223377u32, 0x70777766u32, 0x60666633u32], [0x71272866u32, 0u32, 1u32]); // Multi-tap :) +test_ptx!( + mad_hi_cc, + [0x26223377u32, 0x70777766u32, 0x60666633u32], + [0x71272866u32, 0u32, 1u32] +); // Multi-tap :) test_ptx!(mov_vector_cast, [0x200000001u64], [2u32, 1u32]); test_ptx!( cvt_clamp, @@ -323,11 +330,13 @@ test_ptx!( ], [4294967295u32, 0, 2] ); -test_ptx!(carry_mixed, [0xDEADu32], [1u32, 1u32]); test_ptx!( - subc_cc2, + carry_set_all, [0xDEADu32], - [0u32, 1, 0, 4294967295, 1, 4294967295, 1] + [ + 1u32, 0, 0, 1, 0, 1, 0, 0, 0u32, 4294967295, 4294967295, 0, 4294967295, 0, 4294967295, + 4294967295 + ] ); test_ptx!(vshr, [0x6f3650f4u32, 22, 0xc62d4586], [0xC62D4742u32]); test_ptx!(bfind, [0u32, 1u32, 0x64eb0414], [u32::MAX, 0, 30]); @@ -337,7 +346,11 @@ test_ptx!( [f16::from_f32(2.0), f16::from_f32(3.0)], [f16::from_f32(2.0), f16::from_f32(5.0)] ); -test_ptx!(set_f16x2, [0xc1690e6eu32, 0x13739444u32, 0x424834CC, 0x4248B4CC], [0xffffu32, 0x3C000000]); +test_ptx!( + set_f16x2, + [0xc1690e6eu32, 0x13739444u32, 0x424834CC, 0x4248B4CC], + [0xffffu32, 0x3C000000] +); test_ptx!( dp4a, [0xde3032f5u32, 0x2474fe15, 0xf51d8d6c], diff --git a/ptx/src/test/spirv_run/mov.ll b/ptx/src/test/spirv_run/mov.ll index e876ced..e24446a 100644 --- a/ptx/src/test/spirv_run/mov.ll +++ b/ptx/src/test/spirv_run/mov.ll @@ -1,33 +1,31 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": +define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"21": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"20", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"19", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 %0 = alloca i64, align 8, addrspace(5) - store i64 %"15", ptr addrspace(5) %0, align 8 - %"14" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"21" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"21", align 8 + store i64 %"14", ptr addrspace(5) %0, align 8 + %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"20" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"20", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mov_address.ll b/ptx/src/test/spirv_run/mov_address.ll index b9f3a8a..656410c 100644 --- a/ptx/src/test/spirv_run/mov_address.ll +++ b/ptx/src/test/spirv_run/mov_address.ll @@ -1,19 +1,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"9", ptr addrspace(4) byref(i64) %"10") #0 { -"12": +define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"8", ptr addrspace(4) byref(i64) %"9") #0 { +"11": %"6" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"6", align 1 - %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca [8 x i8], align 1, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) - %"11" = ptrtoint ptr addrspace(5) %"4" to i64 + %"10" = ptrtoint ptr addrspace(5) %"4" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"11", ptr addrspace(5) %0, align 8 - %"8" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"8", ptr addrspace(5) %"5", align 8 + store i64 %"10", ptr addrspace(5) %0, align 8 + %"7" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"7", ptr addrspace(5) %"5", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mov_vector_cast.ll b/ptx/src/test/spirv_run/mov_vector_cast.ll index 1f52a3b..e65ad94 100644 --- a/ptx/src/test/spirv_run/mov_vector_cast.ll +++ b/ptx/src/test/spirv_run/mov_vector_cast.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { -"50": +define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"49": %"15" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"15", align 1 - %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) @@ -16,51 +14,51 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"10" = alloca half, align 2, addrspace(5) %"11" = alloca half, align 2, addrspace(5) %"12" = alloca half, align 2, addrspace(5) + %"16" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"16", ptr addrspace(5) %"4", align 8 %"17" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"17", ptr addrspace(5) %"4", align 8 - %"18" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"18", ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"20" to ptr - %"19" = load i64, ptr %"37", align 8 - store i64 %"19", ptr addrspace(5) %"6", align 8 - %"21" = load i64, ptr addrspace(5) %"6", align 8 + store i64 %"17", ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"19" to ptr + %"18" = load i64, ptr %"36", align 8 + store i64 %"18", ptr addrspace(5) %"6", align 8 + %"20" = load i64, ptr addrspace(5) %"6", align 8 %0 = alloca i64, align 8, addrspace(5) - store i64 %"21", ptr addrspace(5) %0, align 8 + store i64 %"20", ptr addrspace(5) %0, align 8 %"13" = load i64, ptr addrspace(5) %0, align 8 - %"39" = bitcast i64 %"13" to <2 x i32> - %"40" = extractelement <2 x i32> %"39", i32 0 - %"41" = extractelement <2 x i32> %"39", i32 1 + %"38" = bitcast i64 %"13" to <2 x i32> + %"39" = extractelement <2 x i32> %"38", i32 0 + %"40" = extractelement <2 x i32> %"38", i32 1 + %"21" = bitcast i32 %"39" to float %"22" = bitcast i32 %"40" to float - %"23" = bitcast i32 %"41" to float - store float %"22", ptr addrspace(5) %"7", align 4 - store float %"23", ptr addrspace(5) %"8", align 4 - %"24" = load i64, ptr addrspace(5) %"6", align 8 + store float %"21", ptr addrspace(5) %"7", align 4 + store float %"22", ptr addrspace(5) %"8", align 4 + %"23" = load i64, ptr addrspace(5) %"6", align 8 %1 = alloca i64, align 8, addrspace(5) - store i64 %"24", ptr addrspace(5) %1, align 8 + store i64 %"23", ptr addrspace(5) %1, align 8 %"14" = load i64, ptr addrspace(5) %1, align 8 - %"43" = bitcast i64 %"14" to <4 x i16> - %"44" = extractelement <4 x i16> %"43", i32 0 - %"45" = extractelement <4 x i16> %"43", i32 1 - %"46" = extractelement <4 x i16> %"43", i32 2 - %"47" = extractelement <4 x i16> %"43", i32 3 + %"42" = bitcast i64 %"14" to <4 x i16> + %"43" = extractelement <4 x i16> %"42", i32 0 + %"44" = extractelement <4 x i16> %"42", i32 1 + %"45" = extractelement <4 x i16> %"42", i32 2 + %"46" = extractelement <4 x i16> %"42", i32 3 + %"24" = bitcast i16 %"43" to half %"25" = bitcast i16 %"44" to half %"26" = bitcast i16 %"45" to half %"27" = bitcast i16 %"46" to half - %"28" = bitcast i16 %"47" to half - store half %"25", ptr addrspace(5) %"9", align 2 - store half %"26", ptr addrspace(5) %"10", align 2 - store half %"27", ptr addrspace(5) %"11", align 2 - store half %"28", ptr addrspace(5) %"12", align 2 - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load float, ptr addrspace(5) %"8", align 4 - %"48" = inttoptr i64 %"29" to ptr - store float %"30", ptr %"48", align 4 - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load float, ptr addrspace(5) %"7", align 4 - %"49" = inttoptr i64 %"31" to ptr - %"52" = getelementptr inbounds i8, ptr %"49", i64 4 - store float %"32", ptr %"52", align 4 + store half %"24", ptr addrspace(5) %"9", align 2 + store half %"25", ptr addrspace(5) %"10", align 2 + store half %"26", ptr addrspace(5) %"11", align 2 + store half %"27", ptr addrspace(5) %"12", align 2 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load float, ptr addrspace(5) %"8", align 4 + %"47" = inttoptr i64 %"28" to ptr + store float %"29", ptr %"47", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load float, ptr addrspace(5) %"7", align 4 + %"48" = inttoptr i64 %"30" to ptr + %"51" = getelementptr inbounds i8, ptr %"48", i64 4 + store float %"31", ptr %"51", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mul_ftz.ll b/ptx/src/test/spirv_run/mul_ftz.ll index 04de6f2..3c32e73 100644 --- a/ptx/src/test/spirv_run/mul_ftz.ll +++ b/ptx/src/test/spirv_run/mul_ftz.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"25", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load float, ptr %"30", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"18" = load float, ptr addrspace(5) %"7", align 4 - %"16" = fmul float %"17", %"18" - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store float %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"24", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"29", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"17" = load float, ptr addrspace(5) %"7", align 4 + %"15" = fmul float %"16", %"17" + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store float %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mul_hi.ll b/ptx/src/test/spirv_run/mul_hi.ll index e57141b..7d8ffa9 100644 --- a/ptx/src/test/spirv_run/mul_hi.ll +++ b/ptx/src/test/spirv_run/mul_hi.ll @@ -3,31 +3,29 @@ target triple = "amdgcn-amd-amdhsa" declare i64 @__zluda_ptx_impl__mul_hi_u64(i64, i64) #0 -define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #1 { -"23": +define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #1 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = call i64 @__zluda_ptx_impl__mul_hi_u64(i64 %"15", i64 2) - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = call i64 @__zluda_ptx_impl__mul_hi_u64(i64 %"14", i64 2) + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mul_lo.ll b/ptx/src/test/spirv_run/mul_lo.ll index 1a915fa..57a767d 100644 --- a/ptx/src/test/spirv_run/mul_lo.ll +++ b/ptx/src/test/spirv_run/mul_lo.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = mul i64 %"15", 2 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = mul i64 %"14", 2 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mul_non_ftz.ll b/ptx/src/test/spirv_run/mul_non_ftz.ll index d0d2bcd..e6a3cc4 100644 --- a/ptx/src/test/spirv_run/mul_non_ftz.ll +++ b/ptx/src/test/spirv_run/mul_non_ftz.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"25", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load float, ptr %"30", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"18" = load float, ptr addrspace(5) %"7", align 4 - %"16" = fmul float %"17", %"18" - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store float %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"24", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"29", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"17" = load float, ptr addrspace(5) %"7", align 4 + %"15" = fmul float %"16", %"17" + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store float %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mul_wide.ll b/ptx/src/test/spirv_run/mul_wide.ll index b1dec22..e25a61d 100644 --- a/ptx/src/test/spirv_run/mul_wide.ll +++ b/ptx/src/test/spirv_run/mul_wide.ll @@ -1,40 +1,38 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"30": +define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"29": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"14" to ptr addrspace(1) - %"13" = load i32, ptr addrspace(1) %"26", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"16" to ptr addrspace(1) - %"32" = getelementptr inbounds i8, ptr addrspace(1) %"27", i64 4 - %"15" = load i32, ptr addrspace(1) %"32", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i32, ptr addrspace(5) %"6", align 4 - %"19" = load i32, ptr addrspace(5) %"7", align 4 - %0 = sext i32 %"18" to i64 - %1 = sext i32 %"19" to i64 - %"17" = mul nsw i64 %0, %1 - store i64 %"17", ptr addrspace(5) %"8", align 8 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(5) %"8", align 8 - %"28" = inttoptr i64 %"20" to ptr - store i64 %"21", ptr %"28", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"13" to ptr addrspace(1) + %"12" = load i32, ptr addrspace(1) %"25", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"15" to ptr addrspace(1) + %"31" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4 + %"14" = load i32, ptr addrspace(1) %"31", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i32, ptr addrspace(5) %"6", align 4 + %"18" = load i32, ptr addrspace(5) %"7", align 4 + %0 = sext i32 %"17" to i64 + %1 = sext i32 %"18" to i64 + %"16" = mul nsw i64 %0, %1 + store i64 %"16", ptr addrspace(5) %"8", align 8 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i64, ptr addrspace(5) %"8", align 8 + %"27" = inttoptr i64 %"19" to ptr + store i64 %"20", ptr %"27", align 8 ret void } diff --git a/ptx/src/test/spirv_run/multireg.ll b/ptx/src/test/spirv_run/multireg.ll index 3826c19..657d61f 100644 --- a/ptx/src/test/spirv_run/multireg.ll +++ b/ptx/src/test/spirv_run/multireg.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = add i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = add i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/neg.ll b/ptx/src/test/spirv_run/neg.ll index c1087b4..1e94ed1 100644 --- a/ptx/src/test/spirv_run/neg.ll +++ b/ptx/src/test/spirv_run/neg.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"19", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"13" = sub i32 0, %"14" - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"18", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"12" = sub i32 0, %"13" + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll index 718a512..69ea8d2 100644 --- a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll +++ b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll @@ -1,36 +1,34 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"27": +define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"26": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr addrspace(1) - %"29" = getelementptr inbounds i8, ptr addrspace(1) %"25", i64 8 - %"8" = load <2 x i32>, ptr addrspace(1) %"29", align 8 - %"14" = extractelement <2 x i32> %"8", i32 0 - %"15" = extractelement <2 x i32> %"8", i32 1 - store i32 %"14", ptr addrspace(5) %"6", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = add i32 %"17", %"18" - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"26" = inttoptr i64 %"19" to ptr addrspace(1) - store i32 %"20", ptr addrspace(1) %"26", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr addrspace(1) + %"28" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8 + %"8" = load <2 x i32>, ptr addrspace(1) %"28", align 8 + %"13" = extractelement <2 x i32> %"8", i32 0 + %"14" = extractelement <2 x i32> %"8", i32 1 + store i32 %"13", ptr addrspace(5) %"6", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = add i32 %"16", %"17" + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"25" = inttoptr i64 %"18" to ptr addrspace(1) + store i32 %"19", ptr addrspace(1) %"25", align 4 ret void } diff --git a/ptx/src/test/spirv_run/not.ll b/ptx/src/test/spirv_run/not.ll index 10dd56c..5e86545 100644 --- a/ptx/src/test/spirv_run/not.ll +++ b/ptx/src/test/spirv_run/not.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"20", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"21" = xor i64 %"15", -1 - store i64 %"21", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"23" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"23", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"19", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"20" = xor i64 %"14", -1 + store i64 %"20", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"22" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"22", align 8 ret void } diff --git a/ptx/src/test/spirv_run/ntid.ll b/ptx/src/test/spirv_run/ntid.ll index 93c95bf..53216ce 100644 --- a/ptx/src/test/spirv_run/ntid.ll +++ b/ptx/src/test/spirv_run/ntid.ll @@ -3,37 +3,35 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__sreg_ntid(i8) #0 -define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #1 { -"30": +define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #1 { +"29": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"25", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"28" = inttoptr i64 %"19" to ptr - %"18" = load i32, ptr %"28", align 4 - store i32 %"18", ptr addrspace(5) %"6", align 4 - %"12" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0) + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"27" = inttoptr i64 %"18" to ptr + %"17" = load i32, ptr %"27", align 4 + store i32 %"17", ptr addrspace(5) %"6", align 4 + %"11" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0) %0 = alloca i32, align 4, addrspace(5) - store i32 %"12", ptr addrspace(5) %0, align 4 - %"20" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"20", ptr addrspace(5) %"7", align 4 - %"22" = load i32, ptr addrspace(5) %"6", align 4 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %"21" = add i32 %"22", %"23" - store i32 %"21", ptr addrspace(5) %"6", align 4 - %"24" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = load i32, ptr addrspace(5) %"6", align 4 - %"29" = inttoptr i64 %"24" to ptr - store i32 %"25", ptr %"29", align 4 + store i32 %"11", ptr addrspace(5) %0, align 4 + %"19" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"19", ptr addrspace(5) %"7", align 4 + %"21" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"20" = add i32 %"21", %"22" + store i32 %"20", ptr addrspace(5) %"6", align 4 + %"23" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = load i32, ptr addrspace(5) %"6", align 4 + %"28" = inttoptr i64 %"23" to ptr + store i32 %"24", ptr %"28", align 4 ret void } diff --git a/ptx/src/test/spirv_run/or.ll b/ptx/src/test/spirv_run/or.ll index 13e844b..7b4bd7f 100644 --- a/ptx/src/test/spirv_run/or.ll +++ b/ptx/src/test/spirv_run/or.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"31": +define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"30": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"25", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"33" = getelementptr inbounds i8, ptr %"26", i64 8 - %"14" = load i64, ptr %"33", align 8 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"17" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = load i64, ptr addrspace(5) %"7", align 8 - %"27" = or i64 %"17", %"18" - store i64 %"27", ptr addrspace(5) %"6", align 8 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"6", align 8 - %"30" = inttoptr i64 %"19" to ptr - store i64 %"20", ptr %"30", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"24", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"32" = getelementptr inbounds i8, ptr %"25", i64 8 + %"13" = load i64, ptr %"32", align 8 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"16" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = load i64, ptr addrspace(5) %"7", align 8 + %"26" = or i64 %"16", %"17" + store i64 %"26", ptr addrspace(5) %"6", align 8 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"6", align 8 + %"29" = inttoptr i64 %"18" to ptr + store i64 %"19", ptr %"29", align 8 ret void } diff --git a/ptx/src/test/spirv_run/param_ptr.ll b/ptx/src/test/spirv_run/param_ptr.ll index 3634669..cea098c 100644 --- a/ptx/src/test/spirv_run/param_ptr.ll +++ b/ptx/src/test/spirv_run/param_ptr.ll @@ -1,39 +1,37 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"29": +define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 { +"28": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) - %"25" = ptrtoint ptr addrspace(4) %"22" to i64 + %"24" = ptrtoint ptr addrspace(4) %"21" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"25", ptr addrspace(5) %0, align 8 - %"24" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"24", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"13" to ptr addrspace(4) - %"12" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"14", ptr addrspace(5) %"6", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = inttoptr i64 %"16" to ptr - %"15" = load i64, ptr %"27", align 8 - store i64 %"15", ptr addrspace(5) %"7", align 8 - %"18" = load i64, ptr addrspace(5) %"7", align 8 - %"17" = add i64 %"18", 1 - store i64 %"17", ptr addrspace(5) %"8", align 8 - %"19" = load i64, ptr addrspace(5) %"6", align 8 - %"20" = load i64, ptr addrspace(5) %"8", align 8 - %"28" = inttoptr i64 %"19" to ptr - store i64 %"20", ptr %"28", align 8 + store i64 %"24", ptr addrspace(5) %0, align 8 + %"23" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"23", ptr addrspace(5) %"4", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"12" to ptr addrspace(4) + %"11" = load i64, ptr addrspace(4) %"25", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"13", ptr addrspace(5) %"6", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = inttoptr i64 %"15" to ptr + %"14" = load i64, ptr %"26", align 8 + store i64 %"14", ptr addrspace(5) %"7", align 8 + %"17" = load i64, ptr addrspace(5) %"7", align 8 + %"16" = add i64 %"17", 1 + store i64 %"16", ptr addrspace(5) %"8", align 8 + %"18" = load i64, ptr addrspace(5) %"6", align 8 + %"19" = load i64, ptr addrspace(5) %"8", align 8 + %"27" = inttoptr i64 %"18" to ptr + store i64 %"19", ptr %"27", align 8 ret void } diff --git a/ptx/src/test/spirv_run/popc.ll b/ptx/src/test/spirv_run/popc.ll index e93f8ad..be9c625 100644 --- a/ptx/src/test/spirv_run/popc.ll +++ b/ptx/src/test/spirv_run/popc.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"19", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"13" = call i32 @llvm.ctpop.i32(i32 %"14") - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"18", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"12" = call i32 @llvm.ctpop.i32(i32 %"13") + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/pred_not.ll b/ptx/src/test/spirv_run/pred_not.ll index 047f94a..69f7646 100644 --- a/ptx/src/test/spirv_run/pred_not.ll +++ b/ptx/src/test/spirv_run/pred_not.ll @@ -1,64 +1,62 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { -"42": +define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { +"41": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"37", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"38", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"39" = inttoptr i64 %"19" to ptr - %"18" = load i64, ptr %"39", align 8 - store i64 %"18", ptr addrspace(5) %"6", align 8 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"40" = inttoptr i64 %"21" to ptr - %"44" = getelementptr inbounds i8, ptr %"40", i64 8 - %"20" = load i64, ptr %"44", align 8 - store i64 %"20", ptr addrspace(5) %"7", align 8 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = icmp ult i64 %"23", %"24" - store i1 %"22", ptr addrspace(5) %"9", align 1 + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"38" = inttoptr i64 %"18" to ptr + %"17" = load i64, ptr %"38", align 8 + store i64 %"17", ptr addrspace(5) %"6", align 8 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"39" = inttoptr i64 %"20" to ptr + %"43" = getelementptr inbounds i8, ptr %"39", i64 8 + %"19" = load i64, ptr %"43", align 8 + store i64 %"19", ptr addrspace(5) %"7", align 8 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = icmp ult i64 %"22", %"23" + store i1 %"21", ptr addrspace(5) %"9", align 1 + %"25" = load i1, ptr addrspace(5) %"9", align 1 + %"24" = xor i1 %"25", true + store i1 %"24", ptr addrspace(5) %"9", align 1 %"26" = load i1, ptr addrspace(5) %"9", align 1 - %"25" = xor i1 %"26", true - store i1 %"25", ptr addrspace(5) %"9", align 1 - %"27" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"27", label %"10", label %"11" + br i1 %"26", label %"10", label %"11" -"10": ; preds = %"42" +"10": ; preds = %"41" %0 = alloca i64, align 8, addrspace(5) store i64 1, ptr addrspace(5) %0, align 8 - %"28" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"28", ptr addrspace(5) %"8", align 8 + %"27" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"27", ptr addrspace(5) %"8", align 8 br label %"11" -"11": ; preds = %"10", %"42" - %"29" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"29", label %"13", label %"12" +"11": ; preds = %"10", %"41" + %"28" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"28", label %"13", label %"12" "12": ; preds = %"11" %1 = alloca i64, align 8, addrspace(5) store i64 2, ptr addrspace(5) %1, align 8 - %"30" = load i64, ptr addrspace(5) %1, align 8 - store i64 %"30", ptr addrspace(5) %"8", align 8 + %"29" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"29", ptr addrspace(5) %"8", align 8 br label %"13" "13": ; preds = %"12", %"11" - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load i64, ptr addrspace(5) %"8", align 8 - %"41" = inttoptr i64 %"31" to ptr - store i64 %"32", ptr %"41", align 8 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load i64, ptr addrspace(5) %"8", align 8 + %"40" = inttoptr i64 %"30" to ptr + store i64 %"31", ptr %"40", align 8 ret void } diff --git a/ptx/src/test/spirv_run/prmt.ll b/ptx/src/test/spirv_run/prmt.ll index 87313c6..bdcb12d 100644 --- a/ptx/src/test/spirv_run/prmt.ll +++ b/ptx/src/test/spirv_run/prmt.ll @@ -1,42 +1,40 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 { -"44": +define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 { +"43": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"32", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"33", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"34" = inttoptr i64 %"15" to ptr - %"14" = load i32, ptr %"34", align 4 - store i32 %"14", ptr addrspace(5) %"6", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"35" = inttoptr i64 %"17" to ptr - %"46" = getelementptr inbounds i8, ptr %"35", i64 4 - %"16" = load i32, ptr %"46", align 4 - store i32 %"16", ptr addrspace(5) %"7", align 4 - %"19" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %0 = bitcast i32 %"19" to <4 x i8> - %1 = bitcast i32 %"20" to <4 x i8> + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"33" = inttoptr i64 %"14" to ptr + %"13" = load i32, ptr %"33", align 4 + store i32 %"13", ptr addrspace(5) %"6", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"34" = inttoptr i64 %"16" to ptr + %"45" = getelementptr inbounds i8, ptr %"34", i64 4 + %"15" = load i32, ptr %"45", align 4 + store i32 %"15", ptr addrspace(5) %"7", align 4 + %"18" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %0 = bitcast i32 %"18" to <4 x i8> + %1 = bitcast i32 %"19" to <4 x i8> %2 = shufflevector <4 x i8> %0, <4 x i8> %1, <4 x i32> - %"36" = bitcast <4 x i8> %2 to i32 - store i32 %"36", ptr addrspace(5) %"8", align 4 - %"22" = load i32, ptr addrspace(5) %"6", align 4 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %3 = bitcast i32 %"22" to <4 x i8> - %4 = bitcast i32 %"23" to <4 x i8> + %"35" = bitcast <4 x i8> %2 to i32 + store i32 %"35", ptr addrspace(5) %"8", align 4 + %"21" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %3 = bitcast i32 %"21" to <4 x i8> + %4 = bitcast i32 %"22" to <4 x i8> %5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> %6 = extractelement <4 x i8> %5, i32 0 %7 = ashr i8 %6, 7 @@ -44,17 +42,17 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"32", ptr %9 = extractelement <4 x i8> %8, i32 2 %10 = ashr i8 %9, 7 %11 = insertelement <4 x i8> %8, i8 %10, i32 2 - %"39" = bitcast <4 x i8> %11 to i32 - store i32 %"39", ptr addrspace(5) %"9", align 4 - %"24" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = load i32, ptr addrspace(5) %"8", align 4 - %"42" = inttoptr i64 %"24" to ptr - store i32 %"25", ptr %"42", align 4 - %"26" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = load i32, ptr addrspace(5) %"9", align 4 - %"43" = inttoptr i64 %"26" to ptr - %"48" = getelementptr inbounds i8, ptr %"43", i64 4 - store i32 %"27", ptr %"48", align 4 + %"38" = bitcast <4 x i8> %11 to i32 + store i32 %"38", ptr addrspace(5) %"9", align 4 + %"23" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = load i32, ptr addrspace(5) %"8", align 4 + %"41" = inttoptr i64 %"23" to ptr + store i32 %"24", ptr %"41", align 4 + %"25" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = load i32, ptr addrspace(5) %"9", align 4 + %"42" = inttoptr i64 %"25" to ptr + %"47" = getelementptr inbounds i8, ptr %"42", i64 4 + store i32 %"26", ptr %"47", align 4 ret void } diff --git a/ptx/src/test/spirv_run/prmt_non_immediate.ll b/ptx/src/test/spirv_run/prmt_non_immediate.ll index c1a1b9d..d503917 100644 --- a/ptx/src/test/spirv_run/prmt_non_immediate.ll +++ b/ptx/src/test/spirv_run/prmt_non_immediate.ll @@ -1,45 +1,43 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { -"34": +define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { +"33": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"25", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"28" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"28", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"29" = inttoptr i64 %"16" to ptr - %"36" = getelementptr inbounds i8, ptr %"29", i64 4 - %"15" = load i32, ptr %"36", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"27" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"27", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"28" = inttoptr i64 %"15" to ptr + %"35" = getelementptr inbounds i8, ptr %"28", i64 4 + %"14" = load i32, ptr %"35", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 %0 = alloca i32, align 4, addrspace(5) store i32 64, ptr addrspace(5) %0, align 4 - %"17" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"19" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %1 = bitcast i32 %"19" to <4 x i8> - %2 = bitcast i32 %"20" to <4 x i8> + %"16" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"18" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %1 = bitcast i32 %"18" to <4 x i8> + %2 = bitcast i32 %"19" to <4 x i8> %3 = shufflevector <4 x i8> %1, <4 x i8> %2, <4 x i32> - %"30" = bitcast <4 x i8> %3 to i32 - store i32 %"30", ptr addrspace(5) %"7", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"33" = inttoptr i64 %"21" to ptr - store i32 %"22", ptr %"33", align 4 + %"29" = bitcast <4 x i8> %3 to i32 + store i32 %"29", ptr addrspace(5) %"7", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"32" = inttoptr i64 %"20" to ptr + store i32 %"21", ptr %"32", align 4 ret void } diff --git a/ptx/src/test/spirv_run/rcp.ll b/ptx/src/test/spirv_run/rcp.ll index cb55c6a..116687b 100644 --- a/ptx/src/test/spirv_run/rcp.ll +++ b/ptx/src/test/spirv_run/rcp.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = fdiv arcp afn float 1.000000e+00, %"14" - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = fdiv arcp afn float 1.000000e+00, %"13" + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/reg_local.ll b/ptx/src/test/spirv_run/reg_local.ll index c01a5e0..48c881d 100644 --- a/ptx/src/test/spirv_run/reg_local.ll +++ b/ptx/src/test/spirv_run/reg_local.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"34": +define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"33": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca [8 x i8], align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = inttoptr i64 %"13" to ptr addrspace(1) - %"26" = load i64, ptr addrspace(1) %"27", align 8 - store i64 %"26", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(5) %"7", align 8 - %"19" = add i64 %"14", 1 - %"28" = addrspacecast ptr addrspace(5) %"4" to ptr - store i64 %"19", ptr %"28", align 8 - %"30" = addrspacecast ptr addrspace(5) %"4" to ptr - %"38" = getelementptr inbounds i8, ptr %"30", i64 0 - %"31" = load i64, ptr %"38", align 8 - store i64 %"31", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"32" = inttoptr i64 %"16" to ptr addrspace(1) - %"40" = getelementptr inbounds i8, ptr addrspace(1) %"32", i64 0 - store i64 %"17", ptr addrspace(1) %"40", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = inttoptr i64 %"12" to ptr addrspace(1) + %"25" = load i64, ptr addrspace(1) %"26", align 8 + store i64 %"25", ptr addrspace(5) %"7", align 8 + %"13" = load i64, ptr addrspace(5) %"7", align 8 + %"18" = add i64 %"13", 1 + %"27" = addrspacecast ptr addrspace(5) %"4" to ptr + store i64 %"18", ptr %"27", align 8 + %"29" = addrspacecast ptr addrspace(5) %"4" to ptr + %"37" = getelementptr inbounds i8, ptr %"29", i64 0 + %"30" = load i64, ptr %"37", align 8 + store i64 %"30", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"31" = inttoptr i64 %"15" to ptr addrspace(1) + %"39" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0 + store i64 %"16", ptr addrspace(1) %"39", align 8 ret void } diff --git a/ptx/src/test/spirv_run/rem.ll b/ptx/src/test/spirv_run/rem.ll index 3a1e26c..4535f49 100644 --- a/ptx/src/test/spirv_run/rem.ll +++ b/ptx/src/test/spirv_run/rem.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"30", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = srem i32 %"17", %"18" - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = srem i32 %"16", %"17" + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/rsqrt.ll b/ptx/src/test/spirv_run/rsqrt.ll index ffdd662..7797260 100644 --- a/ptx/src/test/spirv_run/rsqrt.ll +++ b/ptx/src/test/spirv_run/rsqrt.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca double, align 8, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load double, ptr %"19", align 8 - store double %"11", ptr addrspace(5) %"6", align 8 - %"14" = load double, ptr addrspace(5) %"6", align 8 - %0 = call afn double @llvm.sqrt.f64(double %"14") - %"13" = fdiv arcp afn double 1.000000e+00, %0 - store double %"13", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load double, ptr addrspace(5) %"6", align 8 - %"20" = inttoptr i64 %"15" to ptr - store double %"16", ptr %"20", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load double, ptr %"18", align 8 + store double %"10", ptr addrspace(5) %"6", align 8 + %"13" = load double, ptr addrspace(5) %"6", align 8 + %0 = call afn double @llvm.sqrt.f64(double %"13") + %"12" = fdiv arcp afn double 1.000000e+00, %0 + store double %"12", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load double, ptr addrspace(5) %"6", align 8 + %"19" = inttoptr i64 %"14" to ptr + store double %"15", ptr %"19", align 8 ret void } diff --git a/ptx/src/test/spirv_run/s64_min.ll b/ptx/src/test/spirv_run/s64_min.ll index 3f741e7..98eee04 100644 --- a/ptx/src/test/spirv_run/s64_min.ll +++ b/ptx/src/test/spirv_run/s64_min.ll @@ -1,24 +1,22 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @s64_min(ptr addrspace(4) byref(i64) %"13", ptr addrspace(4) byref(i64) %"14") #0 { -"16": +define protected amdgpu_kernel void @s64_min(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #0 { +"15": %"6" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"6", align 1 - %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) - %"8" = load i64, ptr addrspace(4) %"14", align 8 - store i64 %"8", ptr addrspace(5) %"4", align 8 + %"7" = load i64, ptr addrspace(4) %"13", align 8 + store i64 %"7", ptr addrspace(5) %"4", align 8 %0 = alloca i64, align 8, addrspace(5) store i64 -9223372036854775808, ptr addrspace(5) %0, align 8 - %"9" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"9", ptr addrspace(5) %"5", align 8 - %"10" = load i64, ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(5) %"5", align 8 - %"15" = inttoptr i64 %"10" to ptr - store i64 %"11", ptr %"15", align 8 + %"8" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"8", ptr addrspace(5) %"5", align 8 + %"9" = load i64, ptr addrspace(5) %"4", align 8 + %"10" = load i64, ptr addrspace(5) %"5", align 8 + %"14" = inttoptr i64 %"9" to ptr + store i64 %"10", ptr %"14", align 8 ret void } diff --git a/ptx/src/test/spirv_run/selp.ll b/ptx/src/test/spirv_run/selp.ll index 6124887..073ec38 100644 --- a/ptx/src/test/spirv_run/selp.ll +++ b/ptx/src/test/spirv_run/selp.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"29": +define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"28": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"13" to ptr - %"12" = load i16, ptr %"26", align 2 - store i16 %"12", ptr addrspace(5) %"6", align 2 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"15" to ptr - %"31" = getelementptr inbounds i8, ptr %"27", i64 2 - %"14" = load i16, ptr %"31", align 2 - store i16 %"14", ptr addrspace(5) %"7", align 2 - %"17" = load i16, ptr addrspace(5) %"6", align 2 - %"18" = load i16, ptr addrspace(5) %"7", align 2 - %"16" = select i1 false, i16 %"17", i16 %"18" - store i16 %"16", ptr addrspace(5) %"6", align 2 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i16, ptr addrspace(5) %"6", align 2 - %"28" = inttoptr i64 %"19" to ptr - store i16 %"20", ptr %"28", align 2 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"12" to ptr + %"11" = load i16, ptr %"25", align 2 + store i16 %"11", ptr addrspace(5) %"6", align 2 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"14" to ptr + %"30" = getelementptr inbounds i8, ptr %"26", i64 2 + %"13" = load i16, ptr %"30", align 2 + store i16 %"13", ptr addrspace(5) %"7", align 2 + %"16" = load i16, ptr addrspace(5) %"6", align 2 + %"17" = load i16, ptr addrspace(5) %"7", align 2 + %"15" = select i1 false, i16 %"16", i16 %"17" + store i16 %"15", ptr addrspace(5) %"6", align 2 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i16, ptr addrspace(5) %"6", align 2 + %"27" = inttoptr i64 %"18" to ptr + store i16 %"19", ptr %"27", align 2 ret void } diff --git a/ptx/src/test/spirv_run/selp_true.ll b/ptx/src/test/spirv_run/selp_true.ll index 283eb81..4eda981 100644 --- a/ptx/src/test/spirv_run/selp_true.ll +++ b/ptx/src/test/spirv_run/selp_true.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"29": +define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"28": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"13" to ptr - %"12" = load i16, ptr %"26", align 2 - store i16 %"12", ptr addrspace(5) %"6", align 2 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"15" to ptr - %"31" = getelementptr inbounds i8, ptr %"27", i64 2 - %"14" = load i16, ptr %"31", align 2 - store i16 %"14", ptr addrspace(5) %"7", align 2 - %"17" = load i16, ptr addrspace(5) %"6", align 2 - %"18" = load i16, ptr addrspace(5) %"7", align 2 - %"16" = select i1 true, i16 %"17", i16 %"18" - store i16 %"16", ptr addrspace(5) %"6", align 2 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i16, ptr addrspace(5) %"6", align 2 - %"28" = inttoptr i64 %"19" to ptr - store i16 %"20", ptr %"28", align 2 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"12" to ptr + %"11" = load i16, ptr %"25", align 2 + store i16 %"11", ptr addrspace(5) %"6", align 2 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"14" to ptr + %"30" = getelementptr inbounds i8, ptr %"26", i64 2 + %"13" = load i16, ptr %"30", align 2 + store i16 %"13", ptr addrspace(5) %"7", align 2 + %"16" = load i16, ptr addrspace(5) %"6", align 2 + %"17" = load i16, ptr addrspace(5) %"7", align 2 + %"15" = select i1 true, i16 %"16", i16 %"17" + store i16 %"15", ptr addrspace(5) %"6", align 2 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i16, ptr addrspace(5) %"6", align 2 + %"27" = inttoptr i64 %"18" to ptr + store i16 %"19", ptr %"27", align 2 ret void } diff --git a/ptx/src/test/spirv_run/set_f16x2.ll b/ptx/src/test/spirv_run/set_f16x2.ll index 4a2c8ea..2a8caf3 100644 --- a/ptx/src/test/spirv_run/set_f16x2.ll +++ b/ptx/src/test/spirv_run/set_f16x2.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { -"59": +define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { +"58": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,54 +12,54 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"41" %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca <2 x half>, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"13", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"44" = inttoptr i64 %"16" to ptr - %"43" = load i32, ptr %"44", align 4 - store i32 %"43", ptr addrspace(5) %"6", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"45" = inttoptr i64 %"18" to ptr - %"61" = getelementptr inbounds i8, ptr %"45", i64 4 - %"46" = load i32, ptr %"61", align 4 - store i32 %"46", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"47" = inttoptr i64 %"20" to ptr - %"63" = getelementptr inbounds i8, ptr %"47", i64 8 - %"48" = load i32, ptr %"63", align 4 - store i32 %"48", ptr addrspace(5) %"8", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"49" = inttoptr i64 %"22" to ptr - %"65" = getelementptr inbounds i8, ptr %"49", i64 12 - %"50" = load i32, ptr %"65", align 4 - store i32 %"50", ptr addrspace(5) %"9", align 4 - %"24" = load i32, ptr addrspace(5) %"6", align 4 - %"25" = load i32, ptr addrspace(5) %"7", align 4 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"43" = inttoptr i64 %"15" to ptr + %"42" = load i32, ptr %"43", align 4 + store i32 %"42", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"17" to ptr + %"60" = getelementptr inbounds i8, ptr %"44", i64 4 + %"45" = load i32, ptr %"60", align 4 + store i32 %"45", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"46" = inttoptr i64 %"19" to ptr + %"62" = getelementptr inbounds i8, ptr %"46", i64 8 + %"47" = load i32, ptr %"62", align 4 + store i32 %"47", ptr addrspace(5) %"8", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"48" = inttoptr i64 %"21" to ptr + %"64" = getelementptr inbounds i8, ptr %"48", i64 12 + %"49" = load i32, ptr %"64", align 4 + store i32 %"49", ptr addrspace(5) %"9", align 4 + %"23" = load i32, ptr addrspace(5) %"6", align 4 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %"51" = bitcast i32 %"23" to <2 x half> %"52" = bitcast i32 %"24" to <2 x half> - %"53" = bitcast i32 %"25" to <2 x half> - %0 = fcmp ugt <2 x half> %"52", %"53" + %0 = fcmp ugt <2 x half> %"51", %"52" %1 = sext <2 x i1> %0 to <2 x i16> - %"51" = bitcast <2 x i16> %1 to i32 - store i32 %"51", ptr addrspace(5) %"6", align 4 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %"28" = load i32, ptr addrspace(5) %"9", align 4 + %"50" = bitcast <2 x i16> %1 to i32 + store i32 %"50", ptr addrspace(5) %"6", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %"27" = load i32, ptr addrspace(5) %"9", align 4 + %"54" = bitcast i32 %"26" to <2 x half> %"55" = bitcast i32 %"27" to <2 x half> - %"56" = bitcast i32 %"28" to <2 x half> - %2 = fcmp oeq <2 x half> %"55", %"56" - %"54" = uitofp <2 x i1> %2 to <2 x half> - %"26" = bitcast <2 x half> %"54" to i32 - store i32 %"26", ptr addrspace(5) %"8", align 4 - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load i32, ptr addrspace(5) %"6", align 4 - %"57" = inttoptr i64 %"29" to ptr - store i32 %"30", ptr %"57", align 4 - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load i32, ptr addrspace(5) %"8", align 4 - %"58" = inttoptr i64 %"31" to ptr - %"67" = getelementptr inbounds i8, ptr %"58", i64 4 - store i32 %"32", ptr %"67", align 4 + %2 = fcmp oeq <2 x half> %"54", %"55" + %"53" = uitofp <2 x i1> %2 to <2 x half> + %"25" = bitcast <2 x half> %"53" to i32 + store i32 %"25", ptr addrspace(5) %"8", align 4 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load i32, ptr addrspace(5) %"6", align 4 + %"56" = inttoptr i64 %"28" to ptr + store i32 %"29", ptr %"56", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load i32, ptr addrspace(5) %"8", align 4 + %"57" = inttoptr i64 %"30" to ptr + %"66" = getelementptr inbounds i8, ptr %"57", i64 4 + store i32 %"31", ptr %"66", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp.ll b/ptx/src/test/spirv_run/setp.ll index a54f8f6..2f95556 100644 --- a/ptx/src/test/spirv_run/setp.ll +++ b/ptx/src/test/spirv_run/setp.ll @@ -1,61 +1,59 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { -"40": +define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"39": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"19" to ptr - %"18" = load i64, ptr %"37", align 8 - store i64 %"18", ptr addrspace(5) %"6", align 8 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"38" = inttoptr i64 %"21" to ptr - %"42" = getelementptr inbounds i8, ptr %"38", i64 8 - %"20" = load i64, ptr %"42", align 8 - store i64 %"20", ptr addrspace(5) %"7", align 8 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = icmp ult i64 %"23", %"24" - store i1 %"22", ptr addrspace(5) %"9", align 1 - %"25" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"25", label %"10", label %"11" + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"18" to ptr + %"17" = load i64, ptr %"36", align 8 + store i64 %"17", ptr addrspace(5) %"6", align 8 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"37" = inttoptr i64 %"20" to ptr + %"41" = getelementptr inbounds i8, ptr %"37", i64 8 + %"19" = load i64, ptr %"41", align 8 + store i64 %"19", ptr addrspace(5) %"7", align 8 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = icmp ult i64 %"22", %"23" + store i1 %"21", ptr addrspace(5) %"9", align 1 + %"24" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"24", label %"10", label %"11" -"10": ; preds = %"40" +"10": ; preds = %"39" %0 = alloca i64, align 8, addrspace(5) store i64 1, ptr addrspace(5) %0, align 8 - %"26" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"26", ptr addrspace(5) %"8", align 8 + %"25" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"25", ptr addrspace(5) %"8", align 8 br label %"11" -"11": ; preds = %"10", %"40" - %"27" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"27", label %"13", label %"12" +"11": ; preds = %"10", %"39" + %"26" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"26", label %"13", label %"12" "12": ; preds = %"11" %1 = alloca i64, align 8, addrspace(5) store i64 2, ptr addrspace(5) %1, align 8 - %"28" = load i64, ptr addrspace(5) %1, align 8 - store i64 %"28", ptr addrspace(5) %"8", align 8 + %"27" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"27", ptr addrspace(5) %"8", align 8 br label %"13" "13": ; preds = %"12", %"11" - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load i64, ptr addrspace(5) %"8", align 8 - %"39" = inttoptr i64 %"29" to ptr - store i64 %"30", ptr %"39", align 8 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load i64, ptr addrspace(5) %"8", align 8 + %"38" = inttoptr i64 %"28" to ptr + store i64 %"29", ptr %"38", align 8 ret void } diff --git a/ptx/src/test/spirv_run/setp_bool.ll b/ptx/src/test/spirv_run/setp_bool.ll index 1707a3d..ac1b2bb 100644 --- a/ptx/src/test/spirv_run/setp_bool.ll +++ b/ptx/src/test/spirv_run/setp_bool.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { -"51": +define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { +"50": %"16" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"16", align 1 - %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -15,65 +13,65 @@ define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"45" %"9" = alloca i1, align 1, addrspace(5) %"10" = alloca i1, align 1, addrspace(5) %"11" = alloca i1, align 1, addrspace(5) + %"17" = load i64, ptr addrspace(4) %"44", align 8 + store i64 %"17", ptr addrspace(5) %"4", align 8 %"18" = load i64, ptr addrspace(4) %"45", align 8 - store i64 %"18", ptr addrspace(5) %"4", align 8 - %"19" = load i64, ptr addrspace(4) %"46", align 8 - store i64 %"19", ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"47" = inttoptr i64 %"21" to ptr - %"20" = load float, ptr %"47", align 4 - store float %"20", ptr addrspace(5) %"6", align 4 - %"23" = load i64, ptr addrspace(5) %"4", align 8 - %"48" = inttoptr i64 %"23" to ptr - %"53" = getelementptr inbounds i8, ptr %"48", i64 4 - %"22" = load float, ptr %"53", align 4 - store float %"22", ptr addrspace(5) %"7", align 4 - %"25" = load i64, ptr addrspace(5) %"4", align 8 - %"49" = inttoptr i64 %"25" to ptr - %"55" = getelementptr inbounds i8, ptr %"49", i64 8 - %"24" = load float, ptr %"55", align 4 - store float %"24", ptr addrspace(5) %"8", align 4 + store i64 %"18", ptr addrspace(5) %"5", align 8 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"46" = inttoptr i64 %"20" to ptr + %"19" = load float, ptr %"46", align 4 + store float %"19", ptr addrspace(5) %"6", align 4 + %"22" = load i64, ptr addrspace(5) %"4", align 8 + %"47" = inttoptr i64 %"22" to ptr + %"52" = getelementptr inbounds i8, ptr %"47", i64 4 + %"21" = load float, ptr %"52", align 4 + store float %"21", ptr addrspace(5) %"7", align 4 + %"24" = load i64, ptr addrspace(5) %"4", align 8 + %"48" = inttoptr i64 %"24" to ptr + %"54" = getelementptr inbounds i8, ptr %"48", i64 8 + %"23" = load float, ptr %"54", align 4 + store float %"23", ptr addrspace(5) %"8", align 4 %0 = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %0, align 1 - %"26" = load i1, ptr addrspace(5) %0, align 1 - store i1 %"26", ptr addrspace(5) %"9", align 1 - %"29" = load float, ptr addrspace(5) %"6", align 4 - %"30" = load float, ptr addrspace(5) %"7", align 4 - %"31" = load i1, ptr addrspace(5) %"9", align 1 - %1 = fcmp ogt float %"29", %"30" + %"25" = load i1, ptr addrspace(5) %0, align 1 + store i1 %"25", ptr addrspace(5) %"9", align 1 + %"28" = load float, ptr addrspace(5) %"6", align 4 + %"29" = load float, ptr addrspace(5) %"7", align 4 + %"30" = load i1, ptr addrspace(5) %"9", align 1 + %1 = fcmp ogt float %"28", %"29" %2 = xor i1 %1, true - %"27" = and i1 %1, %"31" - %"28" = and i1 %2, %"31" - store i1 %"27", ptr addrspace(5) %"10", align 1 - store i1 %"28", ptr addrspace(5) %"11", align 1 - %"32" = load i1, ptr addrspace(5) %"10", align 1 - br i1 %"32", label %"12", label %"13" + %"26" = and i1 %1, %"30" + %"27" = and i1 %2, %"30" + store i1 %"26", ptr addrspace(5) %"10", align 1 + store i1 %"27", ptr addrspace(5) %"11", align 1 + %"31" = load i1, ptr addrspace(5) %"10", align 1 + br i1 %"31", label %"12", label %"13" -"12": ; preds = %"51" - %"34" = load float, ptr addrspace(5) %"6", align 4 +"12": ; preds = %"50" + %"33" = load float, ptr addrspace(5) %"6", align 4 %3 = alloca float, align 4, addrspace(5) - store float %"34", ptr addrspace(5) %3, align 4 - %"33" = load float, ptr addrspace(5) %3, align 4 - store float %"33", ptr addrspace(5) %"8", align 4 + store float %"33", ptr addrspace(5) %3, align 4 + %"32" = load float, ptr addrspace(5) %3, align 4 + store float %"32", ptr addrspace(5) %"8", align 4 br label %"13" -"13": ; preds = %"12", %"51" - %"35" = load i1, ptr addrspace(5) %"11", align 1 - br i1 %"35", label %"14", label %"15" +"13": ; preds = %"12", %"50" + %"34" = load i1, ptr addrspace(5) %"11", align 1 + br i1 %"34", label %"14", label %"15" "14": ; preds = %"13" - %"37" = load float, ptr addrspace(5) %"7", align 4 + %"36" = load float, ptr addrspace(5) %"7", align 4 %4 = alloca float, align 4, addrspace(5) - store float %"37", ptr addrspace(5) %4, align 4 - %"36" = load float, ptr addrspace(5) %4, align 4 - store float %"36", ptr addrspace(5) %"8", align 4 + store float %"36", ptr addrspace(5) %4, align 4 + %"35" = load float, ptr addrspace(5) %4, align 4 + store float %"35", ptr addrspace(5) %"8", align 4 br label %"15" "15": ; preds = %"14", %"13" - %"38" = load i64, ptr addrspace(5) %"5", align 8 - %"39" = load float, ptr addrspace(5) %"8", align 4 - %"50" = inttoptr i64 %"38" to ptr - store float %"39", ptr %"50", align 4 + %"37" = load i64, ptr addrspace(5) %"5", align 8 + %"38" = load float, ptr addrspace(5) %"8", align 4 + %"49" = inttoptr i64 %"37" to ptr + store float %"38", ptr %"49", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_gt.ll b/ptx/src/test/spirv_run/setp_gt.ll index 0aa4831..3a8b965 100644 --- a/ptx/src/test/spirv_run/setp_gt.ll +++ b/ptx/src/test/spirv_run/setp_gt.ll @@ -1,63 +1,61 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { -"40": +define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"39": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"19" to ptr - %"18" = load float, ptr %"37", align 4 - store float %"18", ptr addrspace(5) %"6", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"38" = inttoptr i64 %"21" to ptr - %"42" = getelementptr inbounds i8, ptr %"38", i64 4 - %"20" = load float, ptr %"42", align 4 - store float %"20", ptr addrspace(5) %"7", align 4 - %"23" = load float, ptr addrspace(5) %"6", align 4 - %"24" = load float, ptr addrspace(5) %"7", align 4 - %"22" = fcmp ogt float %"23", %"24" - store i1 %"22", ptr addrspace(5) %"9", align 1 - %"25" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"25", label %"10", label %"11" + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"18" to ptr + %"17" = load float, ptr %"36", align 4 + store float %"17", ptr addrspace(5) %"6", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"37" = inttoptr i64 %"20" to ptr + %"41" = getelementptr inbounds i8, ptr %"37", i64 4 + %"19" = load float, ptr %"41", align 4 + store float %"19", ptr addrspace(5) %"7", align 4 + %"22" = load float, ptr addrspace(5) %"6", align 4 + %"23" = load float, ptr addrspace(5) %"7", align 4 + %"21" = fcmp ogt float %"22", %"23" + store i1 %"21", ptr addrspace(5) %"9", align 1 + %"24" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"24", label %"10", label %"11" -"10": ; preds = %"40" - %"27" = load float, ptr addrspace(5) %"6", align 4 +"10": ; preds = %"39" + %"26" = load float, ptr addrspace(5) %"6", align 4 %0 = alloca float, align 4, addrspace(5) - store float %"27", ptr addrspace(5) %0, align 4 - %"26" = load float, ptr addrspace(5) %0, align 4 - store float %"26", ptr addrspace(5) %"8", align 4 + store float %"26", ptr addrspace(5) %0, align 4 + %"25" = load float, ptr addrspace(5) %0, align 4 + store float %"25", ptr addrspace(5) %"8", align 4 br label %"11" -"11": ; preds = %"10", %"40" - %"28" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"28", label %"13", label %"12" +"11": ; preds = %"10", %"39" + %"27" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"27", label %"13", label %"12" "12": ; preds = %"11" - %"30" = load float, ptr addrspace(5) %"7", align 4 + %"29" = load float, ptr addrspace(5) %"7", align 4 %1 = alloca float, align 4, addrspace(5) - store float %"30", ptr addrspace(5) %1, align 4 - %"29" = load float, ptr addrspace(5) %1, align 4 - store float %"29", ptr addrspace(5) %"8", align 4 + store float %"29", ptr addrspace(5) %1, align 4 + %"28" = load float, ptr addrspace(5) %1, align 4 + store float %"28", ptr addrspace(5) %"8", align 4 br label %"13" "13": ; preds = %"12", %"11" - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load float, ptr addrspace(5) %"8", align 4 - %"39" = inttoptr i64 %"31" to ptr - store float %"32", ptr %"39", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load float, ptr addrspace(5) %"8", align 4 + %"38" = inttoptr i64 %"30" to ptr + store float %"31", ptr %"38", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_leu.ll b/ptx/src/test/spirv_run/setp_leu.ll index 4105d59..9699fde 100644 --- a/ptx/src/test/spirv_run/setp_leu.ll +++ b/ptx/src/test/spirv_run/setp_leu.ll @@ -1,63 +1,61 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { -"40": +define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"39": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"19" to ptr - %"18" = load float, ptr %"37", align 4 - store float %"18", ptr addrspace(5) %"6", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"38" = inttoptr i64 %"21" to ptr - %"42" = getelementptr inbounds i8, ptr %"38", i64 4 - %"20" = load float, ptr %"42", align 4 - store float %"20", ptr addrspace(5) %"7", align 4 - %"23" = load float, ptr addrspace(5) %"6", align 4 - %"24" = load float, ptr addrspace(5) %"7", align 4 - %"22" = fcmp ule float %"23", %"24" - store i1 %"22", ptr addrspace(5) %"9", align 1 - %"25" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"25", label %"10", label %"11" + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"18" to ptr + %"17" = load float, ptr %"36", align 4 + store float %"17", ptr addrspace(5) %"6", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"37" = inttoptr i64 %"20" to ptr + %"41" = getelementptr inbounds i8, ptr %"37", i64 4 + %"19" = load float, ptr %"41", align 4 + store float %"19", ptr addrspace(5) %"7", align 4 + %"22" = load float, ptr addrspace(5) %"6", align 4 + %"23" = load float, ptr addrspace(5) %"7", align 4 + %"21" = fcmp ule float %"22", %"23" + store i1 %"21", ptr addrspace(5) %"9", align 1 + %"24" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"24", label %"10", label %"11" -"10": ; preds = %"40" - %"27" = load float, ptr addrspace(5) %"6", align 4 +"10": ; preds = %"39" + %"26" = load float, ptr addrspace(5) %"6", align 4 %0 = alloca float, align 4, addrspace(5) - store float %"27", ptr addrspace(5) %0, align 4 - %"26" = load float, ptr addrspace(5) %0, align 4 - store float %"26", ptr addrspace(5) %"8", align 4 + store float %"26", ptr addrspace(5) %0, align 4 + %"25" = load float, ptr addrspace(5) %0, align 4 + store float %"25", ptr addrspace(5) %"8", align 4 br label %"11" -"11": ; preds = %"10", %"40" - %"28" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"28", label %"13", label %"12" +"11": ; preds = %"10", %"39" + %"27" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"27", label %"13", label %"12" "12": ; preds = %"11" - %"30" = load float, ptr addrspace(5) %"7", align 4 + %"29" = load float, ptr addrspace(5) %"7", align 4 %1 = alloca float, align 4, addrspace(5) - store float %"30", ptr addrspace(5) %1, align 4 - %"29" = load float, ptr addrspace(5) %1, align 4 - store float %"29", ptr addrspace(5) %"8", align 4 + store float %"29", ptr addrspace(5) %1, align 4 + %"28" = load float, ptr addrspace(5) %1, align 4 + store float %"28", ptr addrspace(5) %"8", align 4 br label %"13" "13": ; preds = %"12", %"11" - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load float, ptr addrspace(5) %"8", align 4 - %"39" = inttoptr i64 %"31" to ptr - store float %"32", ptr %"39", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load float, ptr addrspace(5) %"8", align 4 + %"38" = inttoptr i64 %"30" to ptr + store float %"31", ptr %"38", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_nan.ll b/ptx/src/test/spirv_run/setp_nan.ll index da9c62a..1368386 100644 --- a/ptx/src/test/spirv_run/setp_nan.ll +++ b/ptx/src/test/spirv_run/setp_nan.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"116", ptr addrspace(4) byref(i64) %"117") #0 { -"130": +define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 { +"129": %"32" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"32", align 1 - %"33" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"33", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -19,172 +17,172 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"116" %"13" = alloca float, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i1, align 1, addrspace(5) + %"33" = load i64, ptr addrspace(4) %"115", align 8 + store i64 %"33", ptr addrspace(5) %"4", align 8 %"34" = load i64, ptr addrspace(4) %"116", align 8 - store i64 %"34", ptr addrspace(5) %"4", align 8 - %"35" = load i64, ptr addrspace(4) %"117", align 8 - store i64 %"35", ptr addrspace(5) %"5", align 8 - %"37" = load i64, ptr addrspace(5) %"4", align 8 - %"118" = inttoptr i64 %"37" to ptr - %"36" = load float, ptr %"118", align 4 - store float %"36", ptr addrspace(5) %"6", align 4 - %"39" = load i64, ptr addrspace(5) %"4", align 8 - %"119" = inttoptr i64 %"39" to ptr - %"132" = getelementptr inbounds i8, ptr %"119", i64 4 - %"38" = load float, ptr %"132", align 4 - store float %"38", ptr addrspace(5) %"7", align 4 - %"41" = load i64, ptr addrspace(5) %"4", align 8 - %"120" = inttoptr i64 %"41" to ptr - %"134" = getelementptr inbounds i8, ptr %"120", i64 8 - %"40" = load float, ptr %"134", align 4 - store float %"40", ptr addrspace(5) %"8", align 4 - %"43" = load i64, ptr addrspace(5) %"4", align 8 - %"121" = inttoptr i64 %"43" to ptr - %"136" = getelementptr inbounds i8, ptr %"121", i64 12 - %"42" = load float, ptr %"136", align 4 - store float %"42", ptr addrspace(5) %"9", align 4 - %"45" = load i64, ptr addrspace(5) %"4", align 8 - %"122" = inttoptr i64 %"45" to ptr - %"138" = getelementptr inbounds i8, ptr %"122", i64 16 - %"44" = load float, ptr %"138", align 4 - store float %"44", ptr addrspace(5) %"10", align 4 - %"47" = load i64, ptr addrspace(5) %"4", align 8 - %"123" = inttoptr i64 %"47" to ptr - %"140" = getelementptr inbounds i8, ptr %"123", i64 20 - %"46" = load float, ptr %"140", align 4 - store float %"46", ptr addrspace(5) %"11", align 4 - %"49" = load i64, ptr addrspace(5) %"4", align 8 - %"124" = inttoptr i64 %"49" to ptr - %"142" = getelementptr inbounds i8, ptr %"124", i64 24 - %"48" = load float, ptr %"142", align 4 - store float %"48", ptr addrspace(5) %"12", align 4 - %"51" = load i64, ptr addrspace(5) %"4", align 8 - %"125" = inttoptr i64 %"51" to ptr - %"144" = getelementptr inbounds i8, ptr %"125", i64 28 - %"50" = load float, ptr %"144", align 4 - store float %"50", ptr addrspace(5) %"13", align 4 - %"53" = load float, ptr addrspace(5) %"6", align 4 - %"54" = load float, ptr addrspace(5) %"7", align 4 - %"52" = fcmp uno float %"53", %"54" - store i1 %"52", ptr addrspace(5) %"15", align 1 - %"55" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"55", label %"16", label %"17" + store i64 %"34", ptr addrspace(5) %"5", align 8 + %"36" = load i64, ptr addrspace(5) %"4", align 8 + %"117" = inttoptr i64 %"36" to ptr + %"35" = load float, ptr %"117", align 4 + store float %"35", ptr addrspace(5) %"6", align 4 + %"38" = load i64, ptr addrspace(5) %"4", align 8 + %"118" = inttoptr i64 %"38" to ptr + %"131" = getelementptr inbounds i8, ptr %"118", i64 4 + %"37" = load float, ptr %"131", align 4 + store float %"37", ptr addrspace(5) %"7", align 4 + %"40" = load i64, ptr addrspace(5) %"4", align 8 + %"119" = inttoptr i64 %"40" to ptr + %"133" = getelementptr inbounds i8, ptr %"119", i64 8 + %"39" = load float, ptr %"133", align 4 + store float %"39", ptr addrspace(5) %"8", align 4 + %"42" = load i64, ptr addrspace(5) %"4", align 8 + %"120" = inttoptr i64 %"42" to ptr + %"135" = getelementptr inbounds i8, ptr %"120", i64 12 + %"41" = load float, ptr %"135", align 4 + store float %"41", ptr addrspace(5) %"9", align 4 + %"44" = load i64, ptr addrspace(5) %"4", align 8 + %"121" = inttoptr i64 %"44" to ptr + %"137" = getelementptr inbounds i8, ptr %"121", i64 16 + %"43" = load float, ptr %"137", align 4 + store float %"43", ptr addrspace(5) %"10", align 4 + %"46" = load i64, ptr addrspace(5) %"4", align 8 + %"122" = inttoptr i64 %"46" to ptr + %"139" = getelementptr inbounds i8, ptr %"122", i64 20 + %"45" = load float, ptr %"139", align 4 + store float %"45", ptr addrspace(5) %"11", align 4 + %"48" = load i64, ptr addrspace(5) %"4", align 8 + %"123" = inttoptr i64 %"48" to ptr + %"141" = getelementptr inbounds i8, ptr %"123", i64 24 + %"47" = load float, ptr %"141", align 4 + store float %"47", ptr addrspace(5) %"12", align 4 + %"50" = load i64, ptr addrspace(5) %"4", align 8 + %"124" = inttoptr i64 %"50" to ptr + %"143" = getelementptr inbounds i8, ptr %"124", i64 28 + %"49" = load float, ptr %"143", align 4 + store float %"49", ptr addrspace(5) %"13", align 4 + %"52" = load float, ptr addrspace(5) %"6", align 4 + %"53" = load float, ptr addrspace(5) %"7", align 4 + %"51" = fcmp uno float %"52", %"53" + store i1 %"51", ptr addrspace(5) %"15", align 1 + %"54" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"54", label %"16", label %"17" -"16": ; preds = %"130" +"16": ; preds = %"129" %0 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %0, align 4 - %"56" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"56", ptr addrspace(5) %"14", align 4 + %"55" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"55", ptr addrspace(5) %"14", align 4 br label %"17" -"17": ; preds = %"16", %"130" - %"57" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"57", label %"19", label %"18" +"17": ; preds = %"16", %"129" + %"56" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"56", label %"19", label %"18" "18": ; preds = %"17" %1 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %1, align 4 - %"58" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"58", ptr addrspace(5) %"14", align 4 + %"57" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"57", ptr addrspace(5) %"14", align 4 br label %"19" "19": ; preds = %"18", %"17" - %"59" = load i64, ptr addrspace(5) %"5", align 8 - %"60" = load i32, ptr addrspace(5) %"14", align 4 - %"126" = inttoptr i64 %"59" to ptr - store i32 %"60", ptr %"126", align 4 - %"62" = load float, ptr addrspace(5) %"8", align 4 - %"63" = load float, ptr addrspace(5) %"9", align 4 - %"61" = fcmp uno float %"62", %"63" - store i1 %"61", ptr addrspace(5) %"15", align 1 - %"64" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"64", label %"20", label %"21" + %"58" = load i64, ptr addrspace(5) %"5", align 8 + %"59" = load i32, ptr addrspace(5) %"14", align 4 + %"125" = inttoptr i64 %"58" to ptr + store i32 %"59", ptr %"125", align 4 + %"61" = load float, ptr addrspace(5) %"8", align 4 + %"62" = load float, ptr addrspace(5) %"9", align 4 + %"60" = fcmp uno float %"61", %"62" + store i1 %"60", ptr addrspace(5) %"15", align 1 + %"63" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"63", label %"20", label %"21" "20": ; preds = %"19" %2 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %2, align 4 - %"65" = load i32, ptr addrspace(5) %2, align 4 - store i32 %"65", ptr addrspace(5) %"14", align 4 + %"64" = load i32, ptr addrspace(5) %2, align 4 + store i32 %"64", ptr addrspace(5) %"14", align 4 br label %"21" "21": ; preds = %"20", %"19" - %"66" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"66", label %"23", label %"22" + %"65" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"65", label %"23", label %"22" "22": ; preds = %"21" %3 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %3, align 4 - %"67" = load i32, ptr addrspace(5) %3, align 4 - store i32 %"67", ptr addrspace(5) %"14", align 4 + %"66" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"66", ptr addrspace(5) %"14", align 4 br label %"23" "23": ; preds = %"22", %"21" - %"68" = load i64, ptr addrspace(5) %"5", align 8 - %"69" = load i32, ptr addrspace(5) %"14", align 4 - %"127" = inttoptr i64 %"68" to ptr - %"146" = getelementptr inbounds i8, ptr %"127", i64 4 - store i32 %"69", ptr %"146", align 4 - %"71" = load float, ptr addrspace(5) %"10", align 4 - %"72" = load float, ptr addrspace(5) %"11", align 4 - %"70" = fcmp uno float %"71", %"72" - store i1 %"70", ptr addrspace(5) %"15", align 1 - %"73" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"73", label %"24", label %"25" + %"67" = load i64, ptr addrspace(5) %"5", align 8 + %"68" = load i32, ptr addrspace(5) %"14", align 4 + %"126" = inttoptr i64 %"67" to ptr + %"145" = getelementptr inbounds i8, ptr %"126", i64 4 + store i32 %"68", ptr %"145", align 4 + %"70" = load float, ptr addrspace(5) %"10", align 4 + %"71" = load float, ptr addrspace(5) %"11", align 4 + %"69" = fcmp uno float %"70", %"71" + store i1 %"69", ptr addrspace(5) %"15", align 1 + %"72" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"72", label %"24", label %"25" "24": ; preds = %"23" %4 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %4, align 4 - %"74" = load i32, ptr addrspace(5) %4, align 4 - store i32 %"74", ptr addrspace(5) %"14", align 4 + %"73" = load i32, ptr addrspace(5) %4, align 4 + store i32 %"73", ptr addrspace(5) %"14", align 4 br label %"25" "25": ; preds = %"24", %"23" - %"75" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"75", label %"27", label %"26" + %"74" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"74", label %"27", label %"26" "26": ; preds = %"25" %5 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %5, align 4 - %"76" = load i32, ptr addrspace(5) %5, align 4 - store i32 %"76", ptr addrspace(5) %"14", align 4 + %"75" = load i32, ptr addrspace(5) %5, align 4 + store i32 %"75", ptr addrspace(5) %"14", align 4 br label %"27" "27": ; preds = %"26", %"25" - %"77" = load i64, ptr addrspace(5) %"5", align 8 - %"78" = load i32, ptr addrspace(5) %"14", align 4 - %"128" = inttoptr i64 %"77" to ptr - %"148" = getelementptr inbounds i8, ptr %"128", i64 8 - store i32 %"78", ptr %"148", align 4 - %"80" = load float, ptr addrspace(5) %"12", align 4 - %"81" = load float, ptr addrspace(5) %"13", align 4 - %"79" = fcmp uno float %"80", %"81" - store i1 %"79", ptr addrspace(5) %"15", align 1 - %"82" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"82", label %"28", label %"29" + %"76" = load i64, ptr addrspace(5) %"5", align 8 + %"77" = load i32, ptr addrspace(5) %"14", align 4 + %"127" = inttoptr i64 %"76" to ptr + %"147" = getelementptr inbounds i8, ptr %"127", i64 8 + store i32 %"77", ptr %"147", align 4 + %"79" = load float, ptr addrspace(5) %"12", align 4 + %"80" = load float, ptr addrspace(5) %"13", align 4 + %"78" = fcmp uno float %"79", %"80" + store i1 %"78", ptr addrspace(5) %"15", align 1 + %"81" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"81", label %"28", label %"29" "28": ; preds = %"27" %6 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %6, align 4 - %"83" = load i32, ptr addrspace(5) %6, align 4 - store i32 %"83", ptr addrspace(5) %"14", align 4 + %"82" = load i32, ptr addrspace(5) %6, align 4 + store i32 %"82", ptr addrspace(5) %"14", align 4 br label %"29" "29": ; preds = %"28", %"27" - %"84" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"84", label %"31", label %"30" + %"83" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"83", label %"31", label %"30" "30": ; preds = %"29" %7 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %7, align 4 - %"85" = load i32, ptr addrspace(5) %7, align 4 - store i32 %"85", ptr addrspace(5) %"14", align 4 + %"84" = load i32, ptr addrspace(5) %7, align 4 + store i32 %"84", ptr addrspace(5) %"14", align 4 br label %"31" "31": ; preds = %"30", %"29" - %"86" = load i64, ptr addrspace(5) %"5", align 8 - %"87" = load i32, ptr addrspace(5) %"14", align 4 - %"129" = inttoptr i64 %"86" to ptr - %"150" = getelementptr inbounds i8, ptr %"129", i64 12 - store i32 %"87", ptr %"150", align 4 + %"85" = load i64, ptr addrspace(5) %"5", align 8 + %"86" = load i32, ptr addrspace(5) %"14", align 4 + %"128" = inttoptr i64 %"85" to ptr + %"149" = getelementptr inbounds i8, ptr %"128", i64 12 + store i32 %"86", ptr %"149", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_num.ll b/ptx/src/test/spirv_run/setp_num.ll index 07cf161..a6254a2 100644 --- a/ptx/src/test/spirv_run/setp_num.ll +++ b/ptx/src/test/spirv_run/setp_num.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"116", ptr addrspace(4) byref(i64) %"117") #0 { -"130": +define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 { +"129": %"32" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"32", align 1 - %"33" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"33", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -19,172 +17,172 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"116" %"13" = alloca float, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i1, align 1, addrspace(5) + %"33" = load i64, ptr addrspace(4) %"115", align 8 + store i64 %"33", ptr addrspace(5) %"4", align 8 %"34" = load i64, ptr addrspace(4) %"116", align 8 - store i64 %"34", ptr addrspace(5) %"4", align 8 - %"35" = load i64, ptr addrspace(4) %"117", align 8 - store i64 %"35", ptr addrspace(5) %"5", align 8 - %"37" = load i64, ptr addrspace(5) %"4", align 8 - %"118" = inttoptr i64 %"37" to ptr - %"36" = load float, ptr %"118", align 4 - store float %"36", ptr addrspace(5) %"6", align 4 - %"39" = load i64, ptr addrspace(5) %"4", align 8 - %"119" = inttoptr i64 %"39" to ptr - %"132" = getelementptr inbounds i8, ptr %"119", i64 4 - %"38" = load float, ptr %"132", align 4 - store float %"38", ptr addrspace(5) %"7", align 4 - %"41" = load i64, ptr addrspace(5) %"4", align 8 - %"120" = inttoptr i64 %"41" to ptr - %"134" = getelementptr inbounds i8, ptr %"120", i64 8 - %"40" = load float, ptr %"134", align 4 - store float %"40", ptr addrspace(5) %"8", align 4 - %"43" = load i64, ptr addrspace(5) %"4", align 8 - %"121" = inttoptr i64 %"43" to ptr - %"136" = getelementptr inbounds i8, ptr %"121", i64 12 - %"42" = load float, ptr %"136", align 4 - store float %"42", ptr addrspace(5) %"9", align 4 - %"45" = load i64, ptr addrspace(5) %"4", align 8 - %"122" = inttoptr i64 %"45" to ptr - %"138" = getelementptr inbounds i8, ptr %"122", i64 16 - %"44" = load float, ptr %"138", align 4 - store float %"44", ptr addrspace(5) %"10", align 4 - %"47" = load i64, ptr addrspace(5) %"4", align 8 - %"123" = inttoptr i64 %"47" to ptr - %"140" = getelementptr inbounds i8, ptr %"123", i64 20 - %"46" = load float, ptr %"140", align 4 - store float %"46", ptr addrspace(5) %"11", align 4 - %"49" = load i64, ptr addrspace(5) %"4", align 8 - %"124" = inttoptr i64 %"49" to ptr - %"142" = getelementptr inbounds i8, ptr %"124", i64 24 - %"48" = load float, ptr %"142", align 4 - store float %"48", ptr addrspace(5) %"12", align 4 - %"51" = load i64, ptr addrspace(5) %"4", align 8 - %"125" = inttoptr i64 %"51" to ptr - %"144" = getelementptr inbounds i8, ptr %"125", i64 28 - %"50" = load float, ptr %"144", align 4 - store float %"50", ptr addrspace(5) %"13", align 4 - %"53" = load float, ptr addrspace(5) %"6", align 4 - %"54" = load float, ptr addrspace(5) %"7", align 4 - %"52" = fcmp ord float %"53", %"54" - store i1 %"52", ptr addrspace(5) %"15", align 1 - %"55" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"55", label %"16", label %"17" + store i64 %"34", ptr addrspace(5) %"5", align 8 + %"36" = load i64, ptr addrspace(5) %"4", align 8 + %"117" = inttoptr i64 %"36" to ptr + %"35" = load float, ptr %"117", align 4 + store float %"35", ptr addrspace(5) %"6", align 4 + %"38" = load i64, ptr addrspace(5) %"4", align 8 + %"118" = inttoptr i64 %"38" to ptr + %"131" = getelementptr inbounds i8, ptr %"118", i64 4 + %"37" = load float, ptr %"131", align 4 + store float %"37", ptr addrspace(5) %"7", align 4 + %"40" = load i64, ptr addrspace(5) %"4", align 8 + %"119" = inttoptr i64 %"40" to ptr + %"133" = getelementptr inbounds i8, ptr %"119", i64 8 + %"39" = load float, ptr %"133", align 4 + store float %"39", ptr addrspace(5) %"8", align 4 + %"42" = load i64, ptr addrspace(5) %"4", align 8 + %"120" = inttoptr i64 %"42" to ptr + %"135" = getelementptr inbounds i8, ptr %"120", i64 12 + %"41" = load float, ptr %"135", align 4 + store float %"41", ptr addrspace(5) %"9", align 4 + %"44" = load i64, ptr addrspace(5) %"4", align 8 + %"121" = inttoptr i64 %"44" to ptr + %"137" = getelementptr inbounds i8, ptr %"121", i64 16 + %"43" = load float, ptr %"137", align 4 + store float %"43", ptr addrspace(5) %"10", align 4 + %"46" = load i64, ptr addrspace(5) %"4", align 8 + %"122" = inttoptr i64 %"46" to ptr + %"139" = getelementptr inbounds i8, ptr %"122", i64 20 + %"45" = load float, ptr %"139", align 4 + store float %"45", ptr addrspace(5) %"11", align 4 + %"48" = load i64, ptr addrspace(5) %"4", align 8 + %"123" = inttoptr i64 %"48" to ptr + %"141" = getelementptr inbounds i8, ptr %"123", i64 24 + %"47" = load float, ptr %"141", align 4 + store float %"47", ptr addrspace(5) %"12", align 4 + %"50" = load i64, ptr addrspace(5) %"4", align 8 + %"124" = inttoptr i64 %"50" to ptr + %"143" = getelementptr inbounds i8, ptr %"124", i64 28 + %"49" = load float, ptr %"143", align 4 + store float %"49", ptr addrspace(5) %"13", align 4 + %"52" = load float, ptr addrspace(5) %"6", align 4 + %"53" = load float, ptr addrspace(5) %"7", align 4 + %"51" = fcmp ord float %"52", %"53" + store i1 %"51", ptr addrspace(5) %"15", align 1 + %"54" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"54", label %"16", label %"17" -"16": ; preds = %"130" +"16": ; preds = %"129" %0 = alloca i32, align 4, addrspace(5) store i32 2, ptr addrspace(5) %0, align 4 - %"56" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"56", ptr addrspace(5) %"14", align 4 + %"55" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"55", ptr addrspace(5) %"14", align 4 br label %"17" -"17": ; preds = %"16", %"130" - %"57" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"57", label %"19", label %"18" +"17": ; preds = %"16", %"129" + %"56" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"56", label %"19", label %"18" "18": ; preds = %"17" %1 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %1, align 4 - %"58" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"58", ptr addrspace(5) %"14", align 4 + %"57" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"57", ptr addrspace(5) %"14", align 4 br label %"19" "19": ; preds = %"18", %"17" - %"59" = load i64, ptr addrspace(5) %"5", align 8 - %"60" = load i32, ptr addrspace(5) %"14", align 4 - %"126" = inttoptr i64 %"59" to ptr - store i32 %"60", ptr %"126", align 4 - %"62" = load float, ptr addrspace(5) %"8", align 4 - %"63" = load float, ptr addrspace(5) %"9", align 4 - %"61" = fcmp ord float %"62", %"63" - store i1 %"61", ptr addrspace(5) %"15", align 1 - %"64" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"64", label %"20", label %"21" + %"58" = load i64, ptr addrspace(5) %"5", align 8 + %"59" = load i32, ptr addrspace(5) %"14", align 4 + %"125" = inttoptr i64 %"58" to ptr + store i32 %"59", ptr %"125", align 4 + %"61" = load float, ptr addrspace(5) %"8", align 4 + %"62" = load float, ptr addrspace(5) %"9", align 4 + %"60" = fcmp ord float %"61", %"62" + store i1 %"60", ptr addrspace(5) %"15", align 1 + %"63" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"63", label %"20", label %"21" "20": ; preds = %"19" %2 = alloca i32, align 4, addrspace(5) store i32 2, ptr addrspace(5) %2, align 4 - %"65" = load i32, ptr addrspace(5) %2, align 4 - store i32 %"65", ptr addrspace(5) %"14", align 4 + %"64" = load i32, ptr addrspace(5) %2, align 4 + store i32 %"64", ptr addrspace(5) %"14", align 4 br label %"21" "21": ; preds = %"20", %"19" - %"66" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"66", label %"23", label %"22" + %"65" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"65", label %"23", label %"22" "22": ; preds = %"21" %3 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %3, align 4 - %"67" = load i32, ptr addrspace(5) %3, align 4 - store i32 %"67", ptr addrspace(5) %"14", align 4 + %"66" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"66", ptr addrspace(5) %"14", align 4 br label %"23" "23": ; preds = %"22", %"21" - %"68" = load i64, ptr addrspace(5) %"5", align 8 - %"69" = load i32, ptr addrspace(5) %"14", align 4 - %"127" = inttoptr i64 %"68" to ptr - %"146" = getelementptr inbounds i8, ptr %"127", i64 4 - store i32 %"69", ptr %"146", align 4 - %"71" = load float, ptr addrspace(5) %"10", align 4 - %"72" = load float, ptr addrspace(5) %"11", align 4 - %"70" = fcmp ord float %"71", %"72" - store i1 %"70", ptr addrspace(5) %"15", align 1 - %"73" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"73", label %"24", label %"25" + %"67" = load i64, ptr addrspace(5) %"5", align 8 + %"68" = load i32, ptr addrspace(5) %"14", align 4 + %"126" = inttoptr i64 %"67" to ptr + %"145" = getelementptr inbounds i8, ptr %"126", i64 4 + store i32 %"68", ptr %"145", align 4 + %"70" = load float, ptr addrspace(5) %"10", align 4 + %"71" = load float, ptr addrspace(5) %"11", align 4 + %"69" = fcmp ord float %"70", %"71" + store i1 %"69", ptr addrspace(5) %"15", align 1 + %"72" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"72", label %"24", label %"25" "24": ; preds = %"23" %4 = alloca i32, align 4, addrspace(5) store i32 2, ptr addrspace(5) %4, align 4 - %"74" = load i32, ptr addrspace(5) %4, align 4 - store i32 %"74", ptr addrspace(5) %"14", align 4 + %"73" = load i32, ptr addrspace(5) %4, align 4 + store i32 %"73", ptr addrspace(5) %"14", align 4 br label %"25" "25": ; preds = %"24", %"23" - %"75" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"75", label %"27", label %"26" + %"74" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"74", label %"27", label %"26" "26": ; preds = %"25" %5 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %5, align 4 - %"76" = load i32, ptr addrspace(5) %5, align 4 - store i32 %"76", ptr addrspace(5) %"14", align 4 + %"75" = load i32, ptr addrspace(5) %5, align 4 + store i32 %"75", ptr addrspace(5) %"14", align 4 br label %"27" "27": ; preds = %"26", %"25" - %"77" = load i64, ptr addrspace(5) %"5", align 8 - %"78" = load i32, ptr addrspace(5) %"14", align 4 - %"128" = inttoptr i64 %"77" to ptr - %"148" = getelementptr inbounds i8, ptr %"128", i64 8 - store i32 %"78", ptr %"148", align 4 - %"80" = load float, ptr addrspace(5) %"12", align 4 - %"81" = load float, ptr addrspace(5) %"13", align 4 - %"79" = fcmp ord float %"80", %"81" - store i1 %"79", ptr addrspace(5) %"15", align 1 - %"82" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"82", label %"28", label %"29" + %"76" = load i64, ptr addrspace(5) %"5", align 8 + %"77" = load i32, ptr addrspace(5) %"14", align 4 + %"127" = inttoptr i64 %"76" to ptr + %"147" = getelementptr inbounds i8, ptr %"127", i64 8 + store i32 %"77", ptr %"147", align 4 + %"79" = load float, ptr addrspace(5) %"12", align 4 + %"80" = load float, ptr addrspace(5) %"13", align 4 + %"78" = fcmp ord float %"79", %"80" + store i1 %"78", ptr addrspace(5) %"15", align 1 + %"81" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"81", label %"28", label %"29" "28": ; preds = %"27" %6 = alloca i32, align 4, addrspace(5) store i32 2, ptr addrspace(5) %6, align 4 - %"83" = load i32, ptr addrspace(5) %6, align 4 - store i32 %"83", ptr addrspace(5) %"14", align 4 + %"82" = load i32, ptr addrspace(5) %6, align 4 + store i32 %"82", ptr addrspace(5) %"14", align 4 br label %"29" "29": ; preds = %"28", %"27" - %"84" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"84", label %"31", label %"30" + %"83" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"83", label %"31", label %"30" "30": ; preds = %"29" %7 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %7, align 4 - %"85" = load i32, ptr addrspace(5) %7, align 4 - store i32 %"85", ptr addrspace(5) %"14", align 4 + %"84" = load i32, ptr addrspace(5) %7, align 4 + store i32 %"84", ptr addrspace(5) %"14", align 4 br label %"31" "31": ; preds = %"30", %"29" - %"86" = load i64, ptr addrspace(5) %"5", align 8 - %"87" = load i32, ptr addrspace(5) %"14", align 4 - %"129" = inttoptr i64 %"86" to ptr - %"150" = getelementptr inbounds i8, ptr %"129", i64 12 - store i32 %"87", ptr %"150", align 4 + %"85" = load i64, ptr addrspace(5) %"5", align 8 + %"86" = load i32, ptr addrspace(5) %"14", align 4 + %"128" = inttoptr i64 %"85" to ptr + %"149" = getelementptr inbounds i8, ptr %"128", i64 12 + store i32 %"86", ptr %"149", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_pred2.ll b/ptx/src/test/spirv_run/setp_pred2.ll index 9ce8135..8220fc0 100644 --- a/ptx/src/test/spirv_run/setp_pred2.ll +++ b/ptx/src/test/spirv_run/setp_pred2.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { -"42": +define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { +"41": %"15" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"15", align 1 - %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -14,53 +12,53 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"37 %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) %"10" = alloca i1, align 1, addrspace(5) + %"16" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"16", ptr addrspace(5) %"4", align 8 %"17" = load i64, ptr addrspace(4) %"37", align 8 - store i64 %"17", ptr addrspace(5) %"4", align 8 - %"18" = load i64, ptr addrspace(4) %"38", align 8 - store i64 %"18", ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"39" = inttoptr i64 %"20" to ptr - %"19" = load float, ptr %"39", align 4 - store float %"19", ptr addrspace(5) %"6", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"40" = inttoptr i64 %"22" to ptr - %"44" = getelementptr inbounds i8, ptr %"40", i64 4 - %"21" = load float, ptr %"44", align 4 - store float %"21", ptr addrspace(5) %"7", align 4 - %"25" = load float, ptr addrspace(5) %"6", align 4 - %"26" = load float, ptr addrspace(5) %"7", align 4 - %"23" = fcmp ogt float %"25", %"26" - %"24" = xor i1 %"23", true - store i1 %"23", ptr addrspace(5) %"9", align 1 - store i1 %"24", ptr addrspace(5) %"10", align 1 - %"27" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"27", label %"11", label %"12" + store i64 %"17", ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"38" = inttoptr i64 %"19" to ptr + %"18" = load float, ptr %"38", align 4 + store float %"18", ptr addrspace(5) %"6", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"39" = inttoptr i64 %"21" to ptr + %"43" = getelementptr inbounds i8, ptr %"39", i64 4 + %"20" = load float, ptr %"43", align 4 + store float %"20", ptr addrspace(5) %"7", align 4 + %"24" = load float, ptr addrspace(5) %"6", align 4 + %"25" = load float, ptr addrspace(5) %"7", align 4 + %"22" = fcmp ogt float %"24", %"25" + %"23" = xor i1 %"22", true + store i1 %"22", ptr addrspace(5) %"9", align 1 + store i1 %"23", ptr addrspace(5) %"10", align 1 + %"26" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"26", label %"11", label %"12" -"11": ; preds = %"42" - %"29" = load float, ptr addrspace(5) %"6", align 4 +"11": ; preds = %"41" + %"28" = load float, ptr addrspace(5) %"6", align 4 %0 = alloca float, align 4, addrspace(5) - store float %"29", ptr addrspace(5) %0, align 4 - %"28" = load float, ptr addrspace(5) %0, align 4 - store float %"28", ptr addrspace(5) %"8", align 4 + store float %"28", ptr addrspace(5) %0, align 4 + %"27" = load float, ptr addrspace(5) %0, align 4 + store float %"27", ptr addrspace(5) %"8", align 4 br label %"12" -"12": ; preds = %"11", %"42" - %"30" = load i1, ptr addrspace(5) %"10", align 1 - br i1 %"30", label %"13", label %"14" +"12": ; preds = %"11", %"41" + %"29" = load i1, ptr addrspace(5) %"10", align 1 + br i1 %"29", label %"13", label %"14" "13": ; preds = %"12" - %"32" = load float, ptr addrspace(5) %"7", align 4 + %"31" = load float, ptr addrspace(5) %"7", align 4 %1 = alloca float, align 4, addrspace(5) - store float %"32", ptr addrspace(5) %1, align 4 - %"31" = load float, ptr addrspace(5) %1, align 4 - store float %"31", ptr addrspace(5) %"8", align 4 + store float %"31", ptr addrspace(5) %1, align 4 + %"30" = load float, ptr addrspace(5) %1, align 4 + store float %"30", ptr addrspace(5) %"8", align 4 br label %"14" "14": ; preds = %"13", %"12" - %"33" = load i64, ptr addrspace(5) %"5", align 8 - %"34" = load float, ptr addrspace(5) %"8", align 4 - %"41" = inttoptr i64 %"33" to ptr - store float %"34", ptr %"41", align 4 + %"32" = load i64, ptr addrspace(5) %"5", align 8 + %"33" = load float, ptr addrspace(5) %"8", align 4 + %"40" = inttoptr i64 %"32" to ptr + store float %"33", ptr %"40", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shared_ptr_32.ll b/ptx/src/test/spirv_run/shared_ptr_32.ll index a132a58..8705967 100644 --- a/ptx/src/test/spirv_run/shared_ptr_32.ll +++ b/ptx/src/test/spirv_run/shared_ptr_32.ll @@ -3,42 +3,40 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [128 x i8] undef, align 4 -define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"32": +define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { +"31": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"24", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"13", ptr addrspace(5) %"6", align 8 + store i64 %"12", ptr addrspace(5) %"6", align 8 %0 = alloca i32, align 4, addrspace(5) store i32 ptrtoint (ptr addrspace(3) @"4" to i32), ptr addrspace(5) %0, align 4 - %"14" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = inttoptr i64 %"16" to ptr addrspace(1) - %"15" = load i64, ptr addrspace(1) %"28", align 8 - store i64 %"15", ptr addrspace(5) %"8", align 8 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"8", align 8 - %"29" = inttoptr i32 %"17" to ptr addrspace(3) - store i64 %"18", ptr addrspace(3) %"29", align 8 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %"30" = inttoptr i32 %"20" to ptr addrspace(3) - %"34" = getelementptr inbounds i8, ptr addrspace(3) %"30", i64 0 - %"19" = load i64, ptr addrspace(3) %"34", align 8 - store i64 %"19", ptr addrspace(5) %"9", align 8 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load i64, ptr addrspace(5) %"9", align 8 - %"31" = inttoptr i64 %"21" to ptr addrspace(1) - store i64 %"22", ptr addrspace(1) %"31", align 8 + %"13" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = inttoptr i64 %"15" to ptr addrspace(1) + %"14" = load i64, ptr addrspace(1) %"27", align 8 + store i64 %"14", ptr addrspace(5) %"8", align 8 + %"16" = load i32, ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"8", align 8 + %"28" = inttoptr i32 %"16" to ptr addrspace(3) + store i64 %"17", ptr addrspace(3) %"28", align 8 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %"29" = inttoptr i32 %"19" to ptr addrspace(3) + %"33" = getelementptr inbounds i8, ptr addrspace(3) %"29", i64 0 + %"18" = load i64, ptr addrspace(3) %"33", align 8 + store i64 %"18", ptr addrspace(5) %"9", align 8 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load i64, ptr addrspace(5) %"9", align 8 + %"30" = inttoptr i64 %"20" to ptr addrspace(1) + store i64 %"21", ptr addrspace(1) %"30", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_ptr_take_address.ll b/ptx/src/test/spirv_run/shared_ptr_take_address.ll index a3d3e5d..6c430a2 100644 --- a/ptx/src/test/spirv_run/shared_ptr_take_address.ll +++ b/ptx/src/test/spirv_run/shared_ptr_take_address.ll @@ -3,41 +3,39 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i8], align 4 -define protected amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"30": +define protected amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"29": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"13", ptr addrspace(5) %"6", align 8 + store i64 %"12", ptr addrspace(5) %"6", align 8 %0 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %0, align 8 - %"14" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"26" = inttoptr i64 %"16" to ptr addrspace(1) - %"15" = load i64, ptr addrspace(1) %"26", align 8 - store i64 %"15", ptr addrspace(5) %"8", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"18" = load i64, ptr addrspace(5) %"8", align 8 - %"27" = inttoptr i64 %"17" to ptr addrspace(3) - store i64 %"18", ptr addrspace(3) %"27", align 8 - %"20" = load i64, ptr addrspace(5) %"7", align 8 - %"28" = inttoptr i64 %"20" to ptr addrspace(3) - %"19" = load i64, ptr addrspace(3) %"28", align 8 - store i64 %"19", ptr addrspace(5) %"9", align 8 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load i64, ptr addrspace(5) %"9", align 8 - %"29" = inttoptr i64 %"21" to ptr addrspace(1) - store i64 %"22", ptr addrspace(1) %"29", align 8 + %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"25" = inttoptr i64 %"15" to ptr addrspace(1) + %"14" = load i64, ptr addrspace(1) %"25", align 8 + store i64 %"14", ptr addrspace(5) %"8", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"17" = load i64, ptr addrspace(5) %"8", align 8 + %"26" = inttoptr i64 %"16" to ptr addrspace(3) + store i64 %"17", ptr addrspace(3) %"26", align 8 + %"19" = load i64, ptr addrspace(5) %"7", align 8 + %"27" = inttoptr i64 %"19" to ptr addrspace(3) + %"18" = load i64, ptr addrspace(3) %"27", align 8 + store i64 %"18", ptr addrspace(5) %"9", align 8 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load i64, ptr addrspace(5) %"9", align 8 + %"28" = inttoptr i64 %"20" to ptr addrspace(1) + store i64 %"21", ptr addrspace(1) %"28", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_unify_decl.ll b/ptx/src/test/spirv_run/shared_unify_decl.ll index 1079e59..4cc24fb 100644 --- a/ptx/src/test/spirv_run/shared_unify_decl.ll +++ b/ptx/src/test/spirv_run/shared_unify_decl.ll @@ -4,76 +4,70 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @shared_mod = private addrspace(3) global [4 x i32] undef -define private i64 @"3"(ptr addrspace(3) %"69", ptr addrspace(3) %"70") #0 { -"62": +define private i64 @"3"(ptr addrspace(3) %"66", ptr addrspace(3) %"67") #0 { +"59": %"8" = alloca i64, align 8, addrspace(5) %"20" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"20", align 1 - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) - %"26" = load i64, ptr addrspace(3) %"70", align 8 - store i64 %"26", ptr addrspace(5) %"9", align 8 - %"27" = load i64, ptr addrspace(3) %"69", align 8 - store i64 %"27", ptr addrspace(5) %"10", align 8 - %"29" = load i64, ptr addrspace(5) %"10", align 8 - %"30" = load i64, ptr addrspace(5) %"9", align 8 - %"53" = add i64 %"29", %"30" - store i64 %"53", ptr addrspace(5) %"8", align 8 - %"31" = load i64, ptr addrspace(5) %"8", align 8 - ret i64 %"31" + %"23" = load i64, ptr addrspace(3) %"67", align 8 + store i64 %"23", ptr addrspace(5) %"9", align 8 + %"24" = load i64, ptr addrspace(3) %"66", align 8 + store i64 %"24", ptr addrspace(5) %"10", align 8 + %"26" = load i64, ptr addrspace(5) %"10", align 8 + %"27" = load i64, ptr addrspace(5) %"9", align 8 + %"50" = add i64 %"26", %"27" + store i64 %"50", ptr addrspace(5) %"8", align 8 + %"28" = load i64, ptr addrspace(5) %"8", align 8 + ret i64 %"28" } -define private i64 @"5"(i64 %"32", ptr addrspace(3) %"71", ptr addrspace(3) %"72") #0 { -"63": +define private i64 @"5"(i64 %"29", ptr addrspace(3) %"68", ptr addrspace(3) %"69") #0 { +"60": %"12" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 - store i64 %"32", ptr addrspace(5) %"12", align 8 - %"33" = load i64, ptr addrspace(5) %"12", align 8 - store i64 %"33", ptr addrspace(3) %"71", align 8 - %"34" = call i64 @"3"(ptr addrspace(3) %"71", ptr addrspace(3) %"72") - store i64 %"34", ptr addrspace(5) %"11", align 8 - %"35" = load i64, ptr addrspace(5) %"11", align 8 - ret i64 %"35" + %"21" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"21", align 1 + store i64 %"29", ptr addrspace(5) %"12", align 8 + %"30" = load i64, ptr addrspace(5) %"12", align 8 + store i64 %"30", ptr addrspace(3) %"68", align 8 + %"31" = call i64 @"3"(ptr addrspace(3) %"68", ptr addrspace(3) %"69") + store i64 %"31", ptr addrspace(5) %"11", align 8 + %"32" = load i64, ptr addrspace(5) %"11", align 8 + ret i64 %"32" } -define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #0 { -"64": - %"24" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"24", align 1 - %"25" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"25", align 1 +define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { +"61": + %"22" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"22", align 1 %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"18" = alloca i64, align 8, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) - %"36" = load i64, ptr addrspace(4) %"49", align 8 - store i64 %"36", ptr addrspace(5) %"16", align 8 - %"37" = load i64, ptr addrspace(4) %"50", align 8 - store i64 %"37", ptr addrspace(5) %"17", align 8 - %"39" = load i64, ptr addrspace(5) %"16", align 8 - %"56" = inttoptr i64 %"39" to ptr addrspace(1) - %"38" = load i64, ptr addrspace(1) %"56", align 8 - store i64 %"38", ptr addrspace(5) %"18", align 8 - %"41" = load i64, ptr addrspace(5) %"16", align 8 - %"57" = inttoptr i64 %"41" to ptr addrspace(1) - %"74" = getelementptr inbounds i8, ptr addrspace(1) %"57", i64 8 - %"40" = load i64, ptr addrspace(1) %"74", align 8 - store i64 %"40", ptr addrspace(5) %"19", align 8 - %"42" = load i64, ptr addrspace(5) %"19", align 8 - store i64 %"42", ptr addrspace(3) @shared_mod, align 8 - %"44" = load i64, ptr addrspace(5) %"18", align 8 - %"59" = call i64 @"5"(i64 %"44", ptr addrspace(3) @shared_ex, ptr addrspace(3) @shared_mod) - store i64 %"59", ptr addrspace(5) %"19", align 8 - %"45" = load i64, ptr addrspace(5) %"17", align 8 - %"46" = load i64, ptr addrspace(5) %"19", align 8 - %"61" = inttoptr i64 %"45" to ptr - store i64 %"46", ptr %"61", align 8 + %"33" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"33", ptr addrspace(5) %"16", align 8 + %"34" = load i64, ptr addrspace(4) %"47", align 8 + store i64 %"34", ptr addrspace(5) %"17", align 8 + %"36" = load i64, ptr addrspace(5) %"16", align 8 + %"53" = inttoptr i64 %"36" to ptr addrspace(1) + %"35" = load i64, ptr addrspace(1) %"53", align 8 + store i64 %"35", ptr addrspace(5) %"18", align 8 + %"38" = load i64, ptr addrspace(5) %"16", align 8 + %"54" = inttoptr i64 %"38" to ptr addrspace(1) + %"71" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 + %"37" = load i64, ptr addrspace(1) %"71", align 8 + store i64 %"37", ptr addrspace(5) %"19", align 8 + %"39" = load i64, ptr addrspace(5) %"19", align 8 + store i64 %"39", ptr addrspace(3) @shared_mod, align 8 + %"41" = load i64, ptr addrspace(5) %"18", align 8 + %"56" = call i64 @"5"(i64 %"41", ptr addrspace(3) @shared_ex, ptr addrspace(3) @shared_mod) + store i64 %"56", ptr addrspace(5) %"19", align 8 + %"42" = load i64, ptr addrspace(5) %"17", align 8 + %"43" = load i64, ptr addrspace(5) %"19", align 8 + %"58" = inttoptr i64 %"42" to ptr + store i64 %"43", ptr %"58", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_unify_extern.ll b/ptx/src/test/spirv_run/shared_unify_extern.ll index d83ea7a..819e8a1 100644 --- a/ptx/src/test/spirv_run/shared_unify_extern.ll +++ b/ptx/src/test/spirv_run/shared_unify_extern.ll @@ -4,76 +4,70 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @shared_mod = private addrspace(3) global [4 x i32] undef -define private i64 @"3"(ptr addrspace(3) %"62", ptr addrspace(3) %"63") #0 { -"59": +define private i64 @"3"(ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 { +"56": %"4" = alloca i64, align 8, addrspace(5) %"17" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"17", align 1 - %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) - %"23" = load i64, ptr addrspace(3) %"63", align 8 - store i64 %"23", ptr addrspace(5) %"5", align 8 - %"24" = load i64, ptr addrspace(3) %"62", align 8 - store i64 %"24", ptr addrspace(5) %"6", align 8 - %"26" = load i64, ptr addrspace(5) %"6", align 8 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"50" = add i64 %"26", %"27" - store i64 %"50", ptr addrspace(5) %"4", align 8 - %"28" = load i64, ptr addrspace(5) %"4", align 8 - ret i64 %"28" + %"20" = load i64, ptr addrspace(3) %"60", align 8 + store i64 %"20", ptr addrspace(5) %"5", align 8 + %"21" = load i64, ptr addrspace(3) %"59", align 8 + store i64 %"21", ptr addrspace(5) %"6", align 8 + %"23" = load i64, ptr addrspace(5) %"6", align 8 + %"24" = load i64, ptr addrspace(5) %"5", align 8 + %"47" = add i64 %"23", %"24" + store i64 %"47", ptr addrspace(5) %"4", align 8 + %"25" = load i64, ptr addrspace(5) %"4", align 8 + ret i64 %"25" } -define private i64 @"7"(i64 %"29", ptr addrspace(3) %"64", ptr addrspace(3) %"65") #0 { -"60": +define private i64 @"7"(i64 %"26", ptr addrspace(3) %"61", ptr addrspace(3) %"62") #0 { +"57": %"9" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) - %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 - %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 - store i64 %"29", ptr addrspace(5) %"9", align 8 - %"30" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"30", ptr addrspace(3) %"64", align 8 - %"31" = call i64 @"3"(ptr addrspace(3) %"64", ptr addrspace(3) %"65") - store i64 %"31", ptr addrspace(5) %"8", align 8 - %"32" = load i64, ptr addrspace(5) %"8", align 8 - ret i64 %"32" + %"18" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"18", align 1 + store i64 %"26", ptr addrspace(5) %"9", align 8 + %"27" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"27", ptr addrspace(3) %"61", align 8 + %"28" = call i64 @"3"(ptr addrspace(3) %"61", ptr addrspace(3) %"62") + store i64 %"28", ptr addrspace(5) %"8", align 8 + %"29" = load i64, ptr addrspace(5) %"8", align 8 + ret i64 %"29" } -define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"61": - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 +define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 { +"58": + %"19" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"19", align 1 %"13" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) - %"33" = load i64, ptr addrspace(4) %"46", align 8 - store i64 %"33", ptr addrspace(5) %"13", align 8 - %"34" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"34", ptr addrspace(5) %"14", align 8 - %"36" = load i64, ptr addrspace(5) %"13", align 8 - %"53" = inttoptr i64 %"36" to ptr addrspace(1) - %"35" = load i64, ptr addrspace(1) %"53", align 8 - store i64 %"35", ptr addrspace(5) %"15", align 8 - %"38" = load i64, ptr addrspace(5) %"13", align 8 - %"54" = inttoptr i64 %"38" to ptr addrspace(1) - %"67" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 - %"37" = load i64, ptr addrspace(1) %"67", align 8 - store i64 %"37", ptr addrspace(5) %"16", align 8 - %"39" = load i64, ptr addrspace(5) %"16", align 8 - store i64 %"39", ptr addrspace(3) @shared_mod, align 8 - %"41" = load i64, ptr addrspace(5) %"15", align 8 - %"56" = call i64 @"7"(i64 %"41", ptr addrspace(3) @shared_ex, ptr addrspace(3) @shared_mod) - store i64 %"56", ptr addrspace(5) %"16", align 8 - %"42" = load i64, ptr addrspace(5) %"14", align 8 - %"43" = load i64, ptr addrspace(5) %"16", align 8 - %"58" = inttoptr i64 %"42" to ptr - store i64 %"43", ptr %"58", align 8 + %"30" = load i64, ptr addrspace(4) %"43", align 8 + store i64 %"30", ptr addrspace(5) %"13", align 8 + %"31" = load i64, ptr addrspace(4) %"44", align 8 + store i64 %"31", ptr addrspace(5) %"14", align 8 + %"33" = load i64, ptr addrspace(5) %"13", align 8 + %"50" = inttoptr i64 %"33" to ptr addrspace(1) + %"32" = load i64, ptr addrspace(1) %"50", align 8 + store i64 %"32", ptr addrspace(5) %"15", align 8 + %"35" = load i64, ptr addrspace(5) %"13", align 8 + %"51" = inttoptr i64 %"35" to ptr addrspace(1) + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 8 + %"34" = load i64, ptr addrspace(1) %"64", align 8 + store i64 %"34", ptr addrspace(5) %"16", align 8 + %"36" = load i64, ptr addrspace(5) %"16", align 8 + store i64 %"36", ptr addrspace(3) @shared_mod, align 8 + %"38" = load i64, ptr addrspace(5) %"15", align 8 + %"53" = call i64 @"7"(i64 %"38", ptr addrspace(3) @shared_ex, ptr addrspace(3) @shared_mod) + store i64 %"53", ptr addrspace(5) %"16", align 8 + %"39" = load i64, ptr addrspace(5) %"14", align 8 + %"40" = load i64, ptr addrspace(5) %"16", align 8 + %"55" = inttoptr i64 %"39" to ptr + store i64 %"40", ptr %"55", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_unify_local.ll b/ptx/src/test/spirv_run/shared_unify_local.ll index e3a1db7..b98b280 100644 --- a/ptx/src/test/spirv_run/shared_unify_local.ll +++ b/ptx/src/test/spirv_run/shared_unify_local.ll @@ -4,81 +4,75 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @"5" = private addrspace(3) global i64 undef, align 4 -define private i64 @"2"(i64 %"24", ptr addrspace(3) %"65", ptr addrspace(3) %"66") #0 { -"62": +define private i64 @"2"(i64 %"21", ptr addrspace(3) %"62", ptr addrspace(3) %"63") #0 { +"59": %"4" = alloca i64, align 8, addrspace(5) %"3" = alloca i64, align 8, addrspace(5) %"18" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"18", align 1 - %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"6" = alloca i64, align 8, addrspace(5) + store i64 %"21", ptr addrspace(5) %"4", align 8 + %"22" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"22", ptr addrspace(3) %"63", align 8 + %"23" = load i64, ptr addrspace(3) %"63", align 8 + store i64 %"23", ptr addrspace(5) %"6", align 8 + %"24" = load i64, ptr addrspace(3) %"62", align 8 store i64 %"24", ptr addrspace(5) %"4", align 8 - %"25" = load i64, ptr addrspace(5) %"4", align 8 - store i64 %"25", ptr addrspace(3) %"66", align 8 - %"26" = load i64, ptr addrspace(3) %"66", align 8 - store i64 %"26", ptr addrspace(5) %"6", align 8 - %"27" = load i64, ptr addrspace(3) %"65", align 8 - store i64 %"27", ptr addrspace(5) %"4", align 8 - %"29" = load i64, ptr addrspace(5) %"4", align 8 - %"30" = load i64, ptr addrspace(5) %"6", align 8 - %"54" = add i64 %"29", %"30" - store i64 %"54", ptr addrspace(5) %"3", align 8 - %"31" = load i64, ptr addrspace(5) %"3", align 8 - ret i64 %"31" + %"26" = load i64, ptr addrspace(5) %"4", align 8 + %"27" = load i64, ptr addrspace(5) %"6", align 8 + %"51" = add i64 %"26", %"27" + store i64 %"51", ptr addrspace(5) %"3", align 8 + %"28" = load i64, ptr addrspace(5) %"3", align 8 + ret i64 %"28" } -define private i64 @"7"(i64 %"32", i64 %"33", ptr addrspace(3) %"67", ptr addrspace(3) %"68") #0 { -"63": +define private i64 @"7"(i64 %"29", i64 %"30", ptr addrspace(3) %"64", ptr addrspace(3) %"65") #0 { +"60": %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) - %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 - store i64 %"32", ptr addrspace(5) %"9", align 8 - store i64 %"33", ptr addrspace(5) %"10", align 8 - %"34" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"34", ptr addrspace(3) %"67", align 8 - %"36" = load i64, ptr addrspace(5) %"10", align 8 - %"35" = call i64 @"2"(i64 %"36", ptr addrspace(3) %"67", ptr addrspace(3) %"68") - store i64 %"35", ptr addrspace(5) %"8", align 8 - %"37" = load i64, ptr addrspace(5) %"8", align 8 - ret i64 %"37" + %"19" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"19", align 1 + store i64 %"29", ptr addrspace(5) %"9", align 8 + store i64 %"30", ptr addrspace(5) %"10", align 8 + %"31" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"31", ptr addrspace(3) %"64", align 8 + %"33" = load i64, ptr addrspace(5) %"10", align 8 + %"32" = call i64 @"2"(i64 %"33", ptr addrspace(3) %"64", ptr addrspace(3) %"65") + store i64 %"32", ptr addrspace(5) %"8", align 8 + %"34" = load i64, ptr addrspace(5) %"8", align 8 + ret i64 %"34" } -define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"51", ptr addrspace(4) byref(i64) %"52") #0 { -"64": - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 +define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { +"61": + %"20" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"20", align 1 %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) - %"38" = load i64, ptr addrspace(4) %"51", align 8 - store i64 %"38", ptr addrspace(5) %"14", align 8 - %"39" = load i64, ptr addrspace(4) %"52", align 8 - store i64 %"39", ptr addrspace(5) %"15", align 8 - %"41" = load i64, ptr addrspace(5) %"14", align 8 - %"57" = inttoptr i64 %"41" to ptr addrspace(1) - %"40" = load i64, ptr addrspace(1) %"57", align 8 - store i64 %"40", ptr addrspace(5) %"16", align 8 - %"43" = load i64, ptr addrspace(5) %"14", align 8 - %"58" = inttoptr i64 %"43" to ptr addrspace(1) - %"70" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 8 - %"42" = load i64, ptr addrspace(1) %"70", align 8 - store i64 %"42", ptr addrspace(5) %"17", align 8 - %"45" = load i64, ptr addrspace(5) %"16", align 8 - %"46" = load i64, ptr addrspace(5) %"17", align 8 - %"59" = call i64 @"7"(i64 %"45", i64 %"46", ptr addrspace(3) @shared_ex, ptr addrspace(3) @"5") - store i64 %"59", ptr addrspace(5) %"17", align 8 - %"47" = load i64, ptr addrspace(5) %"15", align 8 - %"48" = load i64, ptr addrspace(5) %"17", align 8 - %"61" = inttoptr i64 %"47" to ptr - store i64 %"48", ptr %"61", align 8 + %"35" = load i64, ptr addrspace(4) %"48", align 8 + store i64 %"35", ptr addrspace(5) %"14", align 8 + %"36" = load i64, ptr addrspace(4) %"49", align 8 + store i64 %"36", ptr addrspace(5) %"15", align 8 + %"38" = load i64, ptr addrspace(5) %"14", align 8 + %"54" = inttoptr i64 %"38" to ptr addrspace(1) + %"37" = load i64, ptr addrspace(1) %"54", align 8 + store i64 %"37", ptr addrspace(5) %"16", align 8 + %"40" = load i64, ptr addrspace(5) %"14", align 8 + %"55" = inttoptr i64 %"40" to ptr addrspace(1) + %"67" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8 + %"39" = load i64, ptr addrspace(1) %"67", align 8 + store i64 %"39", ptr addrspace(5) %"17", align 8 + %"42" = load i64, ptr addrspace(5) %"16", align 8 + %"43" = load i64, ptr addrspace(5) %"17", align 8 + %"56" = call i64 @"7"(i64 %"42", i64 %"43", ptr addrspace(3) @shared_ex, ptr addrspace(3) @"5") + store i64 %"56", ptr addrspace(5) %"17", align 8 + %"44" = load i64, ptr addrspace(5) %"15", align 8 + %"45" = load i64, ptr addrspace(5) %"17", align 8 + %"58" = inttoptr i64 %"44" to ptr + store i64 %"45", ptr %"58", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_variable.ll b/ptx/src/test/spirv_run/shared_variable.ll index 2c2678a..859a767 100644 --- a/ptx/src/test/spirv_run/shared_variable.ll +++ b/ptx/src/test/spirv_run/shared_variable.ll @@ -3,32 +3,30 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [128 x i8] undef, align 4 -define protected amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"25": +define protected amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"24": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = inttoptr i64 %"14" to ptr addrspace(1) - %"13" = load i64, ptr addrspace(1) %"21", align 8 - store i64 %"13", ptr addrspace(5) %"7", align 8 - %"15" = load i64, ptr addrspace(5) %"7", align 8 - store i64 %"15", ptr addrspace(3) @"4", align 8 - %"16" = load i64, ptr addrspace(3) @"4", align 8 - store i64 %"16", ptr addrspace(5) %"8", align 8 - %"17" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = load i64, ptr addrspace(5) %"8", align 8 - %"24" = inttoptr i64 %"17" to ptr addrspace(1) - store i64 %"18", ptr addrspace(1) %"24", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = inttoptr i64 %"13" to ptr addrspace(1) + %"12" = load i64, ptr addrspace(1) %"20", align 8 + store i64 %"12", ptr addrspace(5) %"7", align 8 + %"14" = load i64, ptr addrspace(5) %"7", align 8 + store i64 %"14", ptr addrspace(3) @"4", align 8 + %"15" = load i64, ptr addrspace(3) @"4", align 8 + store i64 %"15", ptr addrspace(5) %"8", align 8 + %"16" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = load i64, ptr addrspace(5) %"8", align 8 + %"23" = inttoptr i64 %"16" to ptr addrspace(1) + store i64 %"17", ptr addrspace(1) %"23", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shf.ll b/ptx/src/test/spirv_run/shf.ll index 6eb5aa0..22be32a 100644 --- a/ptx/src/test/spirv_run/shf.ll +++ b/ptx/src/test/spirv_run/shf.ll @@ -1,38 +1,36 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"33": +define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { +"32": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"24", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"27", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"28" = inttoptr i64 %"16" to ptr - %"35" = getelementptr inbounds i8, ptr %"28", i64 4 - %"15" = load i32, ptr %"35", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i32, ptr addrspace(5) %"6", align 4 - %"19" = load i32, ptr addrspace(5) %"7", align 4 - %"29" = call i32 @llvm.fshl.i32(i32 %"19", i32 %"18", i32 14) - store i32 %"29", ptr addrspace(5) %"8", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i32, ptr addrspace(5) %"8", align 4 - %"32" = inttoptr i64 %"20" to ptr - store i32 %"21", ptr %"32", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"26", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"27" = inttoptr i64 %"15" to ptr + %"34" = getelementptr inbounds i8, ptr %"27", i64 4 + %"14" = load i32, ptr %"34", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i32, ptr addrspace(5) %"6", align 4 + %"18" = load i32, ptr addrspace(5) %"7", align 4 + %"28" = call i32 @llvm.fshl.i32(i32 %"18", i32 %"17", i32 14) + store i32 %"28", ptr addrspace(5) %"8", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i32, ptr addrspace(5) %"8", align 4 + %"31" = inttoptr i64 %"19" to ptr + store i32 %"20", ptr %"31", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shl.ll b/ptx/src/test/spirv_run/shl.ll index a353e07..40c3365 100644 --- a/ptx/src/test/spirv_run/shl.ll +++ b/ptx/src/test/spirv_run/shl.ll @@ -1,32 +1,30 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"25": +define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"24": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %0 = shl i64 %"15", 2 - %"22" = select i1 false, i64 0, i64 %0 - store i64 %"22", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"24" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"24", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %0 = shl i64 %"14", 2 + %"21" = select i1 false, i64 0, i64 %0 + store i64 %"21", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"23" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"23", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shl_link_hack.ll b/ptx/src/test/spirv_run/shl_link_hack.ll index 8d695ad..9ac3883 100644 --- a/ptx/src/test/spirv_run/shl_link_hack.ll +++ b/ptx/src/test/spirv_run/shl_link_hack.ll @@ -3,37 +3,35 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0 -define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #1 { -"30": +define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #1 { +"29": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = inttoptr i64 %"14" to ptr - %"13" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"25", i32 2000000) - store i32 %"13", ptr addrspace(5) %"8", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"16" to ptr - %"15" = load i64, ptr %"26", align 8 - store i64 %"15", ptr addrspace(5) %"6", align 8 - %"18" = load i64, ptr addrspace(5) %"6", align 8 - %0 = shl i64 %"18", 2 - %"27" = select i1 false, i64 0, i64 %0 - store i64 %"27", ptr addrspace(5) %"7", align 8 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"7", align 8 - %"29" = inttoptr i64 %"19" to ptr - store i64 %"20", ptr %"29", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = inttoptr i64 %"13" to ptr + %"12" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"24", i32 2000000) + store i32 %"12", ptr addrspace(5) %"8", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"15" to ptr + %"14" = load i64, ptr %"25", align 8 + store i64 %"14", ptr addrspace(5) %"6", align 8 + %"17" = load i64, ptr addrspace(5) %"6", align 8 + %0 = shl i64 %"17", 2 + %"26" = select i1 false, i64 0, i64 %0 + store i64 %"26", ptr addrspace(5) %"7", align 8 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"7", align 8 + %"28" = inttoptr i64 %"18" to ptr + store i64 %"19", ptr %"28", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shl_overflow.ll b/ptx/src/test/spirv_run/shl_overflow.ll index 0213149..80d4871 100644 --- a/ptx/src/test/spirv_run/shl_overflow.ll +++ b/ptx/src/test/spirv_run/shl_overflow.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { -"63": +define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { +"62": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,61 +12,61 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %" %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"47", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"13", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(4) %"49", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"50" = inttoptr i64 %"16" to ptr - %"15" = load i32, ptr %"50", align 4 - store i32 %"15", ptr addrspace(5) %"6", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"51" = inttoptr i64 %"18" to ptr - %"65" = getelementptr inbounds i8, ptr %"51", i64 4 - %"17" = load i32, ptr %"65", align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"52" = inttoptr i64 %"20" to ptr - %"67" = getelementptr inbounds i8, ptr %"52", i64 8 - %"19" = load i32, ptr %"67", align 4 - store i32 %"19", ptr addrspace(5) %"9", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"53" = inttoptr i64 %"22" to ptr - %"69" = getelementptr inbounds i8, ptr %"53", i64 12 - %"21" = load i32, ptr %"69", align 4 - store i32 %"21", ptr addrspace(5) %"10", align 4 - %"24" = load i32, ptr addrspace(5) %"6", align 4 - %"25" = load i32, ptr addrspace(5) %"8", align 4 - %0 = icmp ugt i32 %"25", 31 - %1 = shl i32 %"24", %"25" - %"54" = select i1 %0, i32 0, i32 %1 - store i32 %"54", ptr addrspace(5) %"7", align 4 - %"26" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = load i32, ptr addrspace(5) %"7", align 4 - %"56" = inttoptr i64 %"26" to ptr - store i32 %"27", ptr %"56", align 4 - %"29" = load i32, ptr addrspace(5) %"6", align 4 - %"30" = load i32, ptr addrspace(5) %"9", align 4 - %2 = icmp ugt i32 %"30", 31 - %3 = shl i32 %"29", %"30" - %"57" = select i1 %2, i32 0, i32 %3 - store i32 %"57", ptr addrspace(5) %"7", align 4 - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load i32, ptr addrspace(5) %"7", align 4 - %"59" = inttoptr i64 %"31" to ptr - %"71" = getelementptr inbounds i8, ptr %"59", i64 4 - store i32 %"32", ptr %"71", align 4 - %"34" = load i32, ptr addrspace(5) %"6", align 4 - %"35" = load i32, ptr addrspace(5) %"10", align 4 - %4 = icmp ugt i32 %"35", 31 - %5 = shl i32 %"34", %"35" - %"60" = select i1 %4, i32 0, i32 %5 - store i32 %"60", ptr addrspace(5) %"7", align 4 - %"36" = load i64, ptr addrspace(5) %"5", align 8 - %"37" = load i32, ptr addrspace(5) %"7", align 4 - %"62" = inttoptr i64 %"36" to ptr - %"73" = getelementptr inbounds i8, ptr %"62", i64 8 - store i32 %"37", ptr %"73", align 4 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"49" = inttoptr i64 %"15" to ptr + %"14" = load i32, ptr %"49", align 4 + store i32 %"14", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"50" = inttoptr i64 %"17" to ptr + %"64" = getelementptr inbounds i8, ptr %"50", i64 4 + %"16" = load i32, ptr %"64", align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"51" = inttoptr i64 %"19" to ptr + %"66" = getelementptr inbounds i8, ptr %"51", i64 8 + %"18" = load i32, ptr %"66", align 4 + store i32 %"18", ptr addrspace(5) %"9", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"52" = inttoptr i64 %"21" to ptr + %"68" = getelementptr inbounds i8, ptr %"52", i64 12 + %"20" = load i32, ptr %"68", align 4 + store i32 %"20", ptr addrspace(5) %"10", align 4 + %"23" = load i32, ptr addrspace(5) %"6", align 4 + %"24" = load i32, ptr addrspace(5) %"8", align 4 + %0 = icmp ugt i32 %"24", 31 + %1 = shl i32 %"23", %"24" + %"53" = select i1 %0, i32 0, i32 %1 + store i32 %"53", ptr addrspace(5) %"7", align 4 + %"25" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = load i32, ptr addrspace(5) %"7", align 4 + %"55" = inttoptr i64 %"25" to ptr + store i32 %"26", ptr %"55", align 4 + %"28" = load i32, ptr addrspace(5) %"6", align 4 + %"29" = load i32, ptr addrspace(5) %"9", align 4 + %2 = icmp ugt i32 %"29", 31 + %3 = shl i32 %"28", %"29" + %"56" = select i1 %2, i32 0, i32 %3 + store i32 %"56", ptr addrspace(5) %"7", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load i32, ptr addrspace(5) %"7", align 4 + %"58" = inttoptr i64 %"30" to ptr + %"70" = getelementptr inbounds i8, ptr %"58", i64 4 + store i32 %"31", ptr %"70", align 4 + %"33" = load i32, ptr addrspace(5) %"6", align 4 + %"34" = load i32, ptr addrspace(5) %"10", align 4 + %4 = icmp ugt i32 %"34", 31 + %5 = shl i32 %"33", %"34" + %"59" = select i1 %4, i32 0, i32 %5 + store i32 %"59", ptr addrspace(5) %"7", align 4 + %"35" = load i64, ptr addrspace(5) %"5", align 8 + %"36" = load i32, ptr addrspace(5) %"7", align 4 + %"61" = inttoptr i64 %"35" to ptr + %"72" = getelementptr inbounds i8, ptr %"61", i64 8 + store i32 %"36", ptr %"72", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shr_s32.ll b/ptx/src/test/spirv_run/shr_s32.ll index 7bc5489..77c71f9 100644 --- a/ptx/src/test/spirv_run/shr_s32.ll +++ b/ptx/src/test/spirv_run/shr_s32.ll @@ -1,39 +1,37 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"29": +define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"28": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"31" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"31", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %0 = icmp ugt i32 %"18", 31 - %1 = ashr i32 %"17", %"18" - %"16" = select i1 %0, i32 -1, i32 %1 - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"28" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"28", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"30" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"30", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %0 = icmp ugt i32 %"17", 31 + %1 = ashr i32 %"16", %"17" + %"15" = select i1 %0, i32 -1, i32 %1 + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"27" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"27", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shr_u32.ll b/ptx/src/test/spirv_run/shr_u32.ll index f337c1b..22c8761 100644 --- a/ptx/src/test/spirv_run/shr_u32.ll +++ b/ptx/src/test/spirv_run/shr_u32.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { -"46": +define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { +"45": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,45 +12,45 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"37", %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"37", align 8 - store i64 %"13", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(4) %"38", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"39" = inttoptr i64 %"16" to ptr - %"15" = load i32, ptr %"39", align 4 - store i32 %"15", ptr addrspace(5) %"6", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"40" = inttoptr i64 %"18" to ptr - %"48" = getelementptr inbounds i8, ptr %"40", i64 4 - %"17" = load i32, ptr %"48", align 4 - store i32 %"17", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"41" = inttoptr i64 %"20" to ptr - %"50" = getelementptr inbounds i8, ptr %"41", i64 8 - %"19" = load i32, ptr %"50", align 4 - store i32 %"19", ptr addrspace(5) %"8", align 4 - %"22" = load i32, ptr addrspace(5) %"6", align 4 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %0 = icmp ugt i32 %"23", 31 - %1 = lshr i32 %"22", %"23" - %"21" = select i1 %0, i32 0, i32 %1 - store i32 %"21", ptr addrspace(5) %"9", align 4 - %"25" = load i32, ptr addrspace(5) %"6", align 4 - %"26" = load i32, ptr addrspace(5) %"8", align 4 - %2 = icmp ugt i32 %"26", 31 - %3 = lshr i32 %"25", %"26" - %"24" = select i1 %2, i32 0, i32 %3 - store i32 %"24", ptr addrspace(5) %"10", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load i32, ptr addrspace(5) %"9", align 4 - %"44" = inttoptr i64 %"27" to ptr - store i32 %"28", ptr %"44", align 4 - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load i32, ptr addrspace(5) %"10", align 4 - %"45" = inttoptr i64 %"29" to ptr - %"52" = getelementptr inbounds i8, ptr %"45", i64 4 - store i32 %"30", ptr %"52", align 4 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"38" = inttoptr i64 %"15" to ptr + %"14" = load i32, ptr %"38", align 4 + store i32 %"14", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"39" = inttoptr i64 %"17" to ptr + %"47" = getelementptr inbounds i8, ptr %"39", i64 4 + %"16" = load i32, ptr %"47", align 4 + store i32 %"16", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"40" = inttoptr i64 %"19" to ptr + %"49" = getelementptr inbounds i8, ptr %"40", i64 8 + %"18" = load i32, ptr %"49", align 4 + store i32 %"18", ptr addrspace(5) %"8", align 4 + %"21" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %0 = icmp ugt i32 %"22", 31 + %1 = lshr i32 %"21", %"22" + %"20" = select i1 %0, i32 0, i32 %1 + store i32 %"20", ptr addrspace(5) %"9", align 4 + %"24" = load i32, ptr addrspace(5) %"6", align 4 + %"25" = load i32, ptr addrspace(5) %"8", align 4 + %2 = icmp ugt i32 %"25", 31 + %3 = lshr i32 %"24", %"25" + %"23" = select i1 %2, i32 0, i32 %3 + store i32 %"23", ptr addrspace(5) %"10", align 4 + %"26" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = load i32, ptr addrspace(5) %"9", align 4 + %"43" = inttoptr i64 %"26" to ptr + store i32 %"27", ptr %"43", align 4 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load i32, ptr addrspace(5) %"10", align 4 + %"44" = inttoptr i64 %"28" to ptr + %"51" = getelementptr inbounds i8, ptr %"44", i64 4 + store i32 %"29", ptr %"51", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sign_extend.ll b/ptx/src/test/spirv_run/sign_extend.ll index bb72576..ef26261 100644 --- a/ptx/src/test/spirv_run/sign_extend.ll +++ b/ptx/src/test/spirv_run/sign_extend.ll @@ -1,28 +1,26 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"20": +define protected amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { +"19": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"14", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"17" = inttoptr i64 %"11" to ptr + %"16" = load i16, ptr %"17", align 2 + %"10" = sext i16 %"16" to i32 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"13" = load i32, ptr addrspace(5) %"6", align 4 %"18" = inttoptr i64 %"12" to ptr - %"17" = load i16, ptr %"18", align 2 - %"11" = sext i16 %"17" to i32 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"19" = inttoptr i64 %"13" to ptr - store i32 %"14", ptr %"19", align 4 + store i32 %"13", ptr %"18", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sin.ll b/ptx/src/test/spirv_run/sin.ll index 40ce553..f38aedd 100644 --- a/ptx/src/test/spirv_run/sin.ll +++ b/ptx/src/test/spirv_run/sin.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.sin.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.sin.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sqrt.ll b/ptx/src/test/spirv_run/sqrt.ll index 332f67a..c8e4ec0 100644 --- a/ptx/src/test/spirv_run/sqrt.ll +++ b/ptx/src/test/spirv_run/sqrt.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.sqrt.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.sqrt.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sub.ll b/ptx/src/test/spirv_run/sub.ll index 2383be0..83fec5f 100644 --- a/ptx/src/test/spirv_run/sub.ll +++ b/ptx/src/test/spirv_run/sub.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = sub i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = sub i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/subc_cc.ll b/ptx/src/test/spirv_run/subc_cc.ll index 9a08872..0101b83 100644 --- a/ptx/src/test/spirv_run/subc_cc.ll +++ b/ptx/src/test/spirv_run/subc_cc.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #0 { -"69": +define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #0 { +"72": %"13" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"13", align 1 - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -16,70 +14,74 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"54", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) - %"15" = load i64, ptr addrspace(4) %"54", align 8 - store i64 %"15", ptr addrspace(5) %"4", align 8 - %"16" = load i64, ptr addrspace(4) %"55", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"57" = inttoptr i64 %"18" to ptr - %"56" = load i32, ptr %"57", align 4 - store i32 %"56", ptr addrspace(5) %"9", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"58" = inttoptr i64 %"20" to ptr - %"71" = getelementptr inbounds i8, ptr %"58", i64 4 - %"59" = load i32, ptr %"71", align 4 - store i32 %"59", ptr addrspace(5) %"10", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"60" = inttoptr i64 %"22" to ptr - %"73" = getelementptr inbounds i8, ptr %"60", i64 8 - %"21" = load i32, ptr %"73", align 4 - store i32 %"21", ptr addrspace(5) %"11", align 4 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"61" = inttoptr i64 %"24" to ptr - %"75" = getelementptr inbounds i8, ptr %"61", i64 12 - %"23" = load i32, ptr %"75", align 4 - store i32 %"23", ptr addrspace(5) %"12", align 4 - %"27" = load i32, ptr addrspace(5) %"9", align 4 - %"28" = load i32, ptr addrspace(5) %"10", align 4 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"27", i32 %"28") - %"25" = extractvalue { i32, i1 } %0, 0 - %"26" = extractvalue { i32, i1 } %0, 1 - store i32 %"25", ptr addrspace(5) %"6", align 4 - store i1 %"26", ptr addrspace(5) %"14", align 1 - %"31" = load i1, ptr addrspace(5) %"14", align 1 - %"32" = load i32, ptr addrspace(5) %"6", align 4 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %1 = zext i1 %"31" to i32 - %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"32", i32 %"33") + %"18" = load i64, ptr addrspace(4) %"57", align 8 + store i64 %"18", ptr addrspace(5) %"4", align 8 + %"19" = load i64, ptr addrspace(4) %"58", align 8 + store i64 %"19", ptr addrspace(5) %"5", align 8 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"60" = inttoptr i64 %"21" to ptr + %"59" = load i32, ptr %"60", align 4 + store i32 %"59", ptr addrspace(5) %"9", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"61" = inttoptr i64 %"23" to ptr + %"74" = getelementptr inbounds i8, ptr %"61", i64 4 + %"62" = load i32, ptr %"74", align 4 + store i32 %"62", ptr addrspace(5) %"10", align 4 + %"25" = load i64, ptr addrspace(5) %"4", align 8 + %"63" = inttoptr i64 %"25" to ptr + %"76" = getelementptr inbounds i8, ptr %"63", i64 8 + %"24" = load i32, ptr %"76", align 4 + store i32 %"24", ptr addrspace(5) %"11", align 4 + %"27" = load i64, ptr addrspace(5) %"4", align 8 + %"64" = inttoptr i64 %"27" to ptr + %"78" = getelementptr inbounds i8, ptr %"64", i64 12 + %"26" = load i32, ptr %"78", align 4 + store i32 %"26", ptr addrspace(5) %"12", align 4 + %"29" = load i32, ptr addrspace(5) %"9", align 4 + %"30" = load i32, ptr addrspace(5) %"10", align 4 + %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"29", i32 %"30") + %"28" = extractvalue { i32, i1 } %0, 0 + %"14" = extractvalue { i32, i1 } %0, 1 + store i32 %"28", ptr addrspace(5) %"6", align 4 + %"31" = xor i1 %"14", true + store i1 %"31", ptr addrspace(5) %"13", align 1 + %"32" = load i1, ptr addrspace(5) %"13", align 1 + %"15" = xor i1 %"32", true + %"34" = load i32, ptr addrspace(5) %"6", align 4 + %"35" = load i32, ptr addrspace(5) %"11", align 4 + %1 = zext i1 %"15" to i32 + %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"34", i32 %"35") %3 = extractvalue { i32, i1 } %2, 0 %4 = extractvalue { i32, i1 } %2, 1 %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %1) - %"29" = extractvalue { i32, i1 } %5, 0 + %"33" = extractvalue { i32, i1 } %5, 0 %6 = extractvalue { i32, i1 } %5, 1 - %"30" = xor i1 %4, %6 - store i32 %"29", ptr addrspace(5) %"7", align 4 - store i1 %"30", ptr addrspace(5) %"14", align 1 - %"35" = load i1, ptr addrspace(5) %"14", align 1 - %"36" = load i32, ptr addrspace(5) %"7", align 4 - %"37" = load i32, ptr addrspace(5) %"12", align 4 - %7 = zext i1 %"35" to i32 - %8 = sub i32 %"36", %"37" - %"34" = sub i32 %8, %7 - store i32 %"34", ptr addrspace(5) %"8", align 4 - %"38" = load i64, ptr addrspace(5) %"5", align 8 - %"39" = load i32, ptr addrspace(5) %"6", align 4 - %"66" = inttoptr i64 %"38" to ptr - store i32 %"39", ptr %"66", align 4 - %"40" = load i64, ptr addrspace(5) %"5", align 8 - %"41" = load i32, ptr addrspace(5) %"7", align 4 - %"67" = inttoptr i64 %"40" to ptr - %"77" = getelementptr inbounds i8, ptr %"67", i64 4 - store i32 %"41", ptr %"77", align 4 - %"42" = load i64, ptr addrspace(5) %"5", align 8 - %"43" = load i32, ptr addrspace(5) %"8", align 4 - %"68" = inttoptr i64 %"42" to ptr - %"79" = getelementptr inbounds i8, ptr %"68", i64 8 - store i32 %"43", ptr %"79", align 4 + %"16" = xor i1 %4, %6 + store i32 %"33", ptr addrspace(5) %"7", align 4 + %"36" = xor i1 %"16", true + store i1 %"36", ptr addrspace(5) %"13", align 1 + %"37" = load i1, ptr addrspace(5) %"13", align 1 + %"17" = xor i1 %"37", true + %"39" = load i32, ptr addrspace(5) %"7", align 4 + %"40" = load i32, ptr addrspace(5) %"12", align 4 + %7 = zext i1 %"17" to i32 + %8 = sub i32 %"39", %"40" + %"38" = sub i32 %8, %7 + store i32 %"38", ptr addrspace(5) %"8", align 4 + %"41" = load i64, ptr addrspace(5) %"5", align 8 + %"42" = load i32, ptr addrspace(5) %"6", align 4 + %"69" = inttoptr i64 %"41" to ptr + store i32 %"42", ptr %"69", align 4 + %"43" = load i64, ptr addrspace(5) %"5", align 8 + %"44" = load i32, ptr addrspace(5) %"7", align 4 + %"70" = inttoptr i64 %"43" to ptr + %"80" = getelementptr inbounds i8, ptr %"70", i64 4 + store i32 %"44", ptr %"80", align 4 + %"45" = load i64, ptr addrspace(5) %"5", align 8 + %"46" = load i32, ptr addrspace(5) %"8", align 4 + %"71" = inttoptr i64 %"45" to ptr + %"82" = getelementptr inbounds i8, ptr %"71", i64 8 + store i32 %"46", ptr %"82", align 4 ret void } diff --git a/ptx/src/test/spirv_run/subc_cc2.ll b/ptx/src/test/spirv_run/subc_cc2.ll deleted file mode 100644 index aded371..0000000 --- a/ptx/src/test/spirv_run/subc_cc2.ll +++ /dev/null @@ -1,127 +0,0 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -target triple = "amdgcn-amd-amdhsa" - -define protected amdgpu_kernel void @subc_cc2(ptr addrspace(4) byref(i64) %"86", ptr addrspace(4) byref(i64) %"87") #0 { -"112": - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 - %"4" = alloca i64, align 8, addrspace(5) - %"5" = alloca i64, align 8, addrspace(5) - %"6" = alloca i32, align 4, addrspace(5) - %"7" = alloca i32, align 4, addrspace(5) - %"8" = alloca i32, align 4, addrspace(5) - %"9" = alloca i32, align 4, addrspace(5) - %"10" = alloca i32, align 4, addrspace(5) - %"11" = alloca i32, align 4, addrspace(5) - %"12" = alloca i32, align 4, addrspace(5) - %"13" = alloca i32, align 4, addrspace(5) - %"16" = load i64, ptr addrspace(4) %"87", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"88" = extractvalue { i32, i1 } %0, 0 - %"18" = extractvalue { i32, i1 } %0, 1 - store i32 %"88", ptr addrspace(5) %"6", align 4 - store i1 %"18", ptr addrspace(5) %"15", align 1 - %"21" = load i1, ptr addrspace(5) %"15", align 1 - %1 = zext i1 %"21" to i32 - %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 -1) - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %1) - %"89" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"20" = xor i1 %4, %6 - store i32 %"89", ptr addrspace(5) %"7", align 4 - store i1 %"20", ptr addrspace(5) %"15", align 1 - %"23" = load i1, ptr addrspace(5) %"15", align 1 - %7 = zext i1 %"23" to i32 - %"90" = sub i32 2, %7 - store i32 %"90", ptr addrspace(5) %"8", align 4 - %"25" = load i1, ptr addrspace(5) %"14", align 1 - %8 = zext i1 %"25" to i32 - %"91" = add i32 0, %8 - store i32 %"91", ptr addrspace(5) %"9", align 4 - %9 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"92" = extractvalue { i32, i1 } %9, 0 - %"27" = extractvalue { i32, i1 } %9, 1 - store i32 %"92", ptr addrspace(5) %"6", align 4 - store i1 %"27", ptr addrspace(5) %"15", align 1 - %"30" = load i1, ptr addrspace(5) %"15", align 1 - %10 = zext i1 %"30" to i32 - %11 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) - %12 = extractvalue { i32, i1 } %11, 0 - %13 = extractvalue { i32, i1 } %11, 1 - %14 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %12, i32 %10) - %"93" = extractvalue { i32, i1 } %14, 0 - %15 = extractvalue { i32, i1 } %14, 1 - %"29" = xor i1 %13, %15 - store i32 %"93", ptr addrspace(5) %"10", align 4 - store i1 %"29", ptr addrspace(5) %"15", align 1 - %"32" = load i1, ptr addrspace(5) %"15", align 1 - %16 = zext i1 %"32" to i32 - %"94" = sub i32 2, %16 - store i32 %"94", ptr addrspace(5) %"11", align 4 - %17 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) - %"95" = extractvalue { i32, i1 } %17, 0 - %"34" = extractvalue { i32, i1 } %17, 1 - store i32 %"95", ptr addrspace(5) %"6", align 4 - store i1 %"34", ptr addrspace(5) %"15", align 1 - %"37" = load i1, ptr addrspace(5) %"15", align 1 - %18 = zext i1 %"37" to i32 - %19 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %20 = extractvalue { i32, i1 } %19, 0 - %21 = extractvalue { i32, i1 } %19, 1 - %22 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %20, i32 %18) - %"96" = extractvalue { i32, i1 } %22, 0 - %23 = extractvalue { i32, i1 } %22, 1 - %"36" = xor i1 %21, %23 - store i32 %"96", ptr addrspace(5) %"12", align 4 - store i1 %"36", ptr addrspace(5) %"15", align 1 - %"39" = load i1, ptr addrspace(5) %"15", align 1 - %24 = zext i1 %"39" to i32 - %"97" = sub i32 2, %24 - store i32 %"97", ptr addrspace(5) %"13", align 4 - %"40" = load i64, ptr addrspace(5) %"5", align 8 - %"41" = load i32, ptr addrspace(5) %"7", align 4 - %"98" = inttoptr i64 %"40" to ptr - store i32 %"41", ptr %"98", align 4 - %"42" = load i64, ptr addrspace(5) %"5", align 8 - %"43" = load i32, ptr addrspace(5) %"8", align 4 - %"100" = inttoptr i64 %"42" to ptr - %"114" = getelementptr inbounds i8, ptr %"100", i64 4 - store i32 %"43", ptr %"114", align 4 - %"44" = load i64, ptr addrspace(5) %"5", align 8 - %"45" = load i32, ptr addrspace(5) %"9", align 4 - %"102" = inttoptr i64 %"44" to ptr - %"116" = getelementptr inbounds i8, ptr %"102", i64 8 - store i32 %"45", ptr %"116", align 4 - %"46" = load i64, ptr addrspace(5) %"5", align 8 - %"47" = load i32, ptr addrspace(5) %"10", align 4 - %"104" = inttoptr i64 %"46" to ptr - %"118" = getelementptr inbounds i8, ptr %"104", i64 12 - store i32 %"47", ptr %"118", align 4 - %"48" = load i64, ptr addrspace(5) %"5", align 8 - %"49" = load i32, ptr addrspace(5) %"11", align 4 - %"106" = inttoptr i64 %"48" to ptr - %"120" = getelementptr inbounds i8, ptr %"106", i64 16 - store i32 %"49", ptr %"120", align 4 - %"50" = load i64, ptr addrspace(5) %"5", align 8 - %"51" = load i32, ptr addrspace(5) %"12", align 4 - %"108" = inttoptr i64 %"50" to ptr - %"122" = getelementptr inbounds i8, ptr %"108", i64 20 - store i32 %"51", ptr %"122", align 4 - %"52" = load i64, ptr addrspace(5) %"5", align 8 - %"53" = load i32, ptr addrspace(5) %"13", align 4 - %"110" = inttoptr i64 %"52" to ptr - %"124" = getelementptr inbounds i8, ptr %"110", i64 24 - store i32 %"53", ptr %"124", align 4 - ret void -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 - -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/subc_cc2.ptx b/ptx/src/test/spirv_run/subc_cc2.ptx deleted file mode 100644 index 2c776a4..0000000 --- a/ptx/src/test/spirv_run/subc_cc2.ptx +++ /dev/null @@ -1,55 +0,0 @@ -.version 6.5 -.target sm_30 -.address_size 64 - -.visible .entry subc_cc2( - .param .u64 input, - .param .u64 output -) -{ - .reg .u64 in_addr; - .reg .u64 out_addr; - .reg .b32 unused; - - .reg .b32 result_1; - .reg .b32 carry_out_1_1; - .reg .b32 carry_out_1_2; - .reg .b32 result_2; - .reg .b32 carry_out_2; - .reg .b32 result_3; - .reg .b32 carry_out_3; - - ld.param.u64 out_addr, [output]; - - // set carry=1 - sub.cc.s32 unused, 0, 1; - // overflow (b + CC.CF), no underflow in whole operation - subc.cc.s32 result_1, 0, 4294967295; - // write carry - subc.s32 carry_out_1_1, 2, 0; - // make sure the overflow in (b + CC.CF) is not detected by addc - addc.s32 carry_out_1_2, 0, 0; - - // set carry=1 - sub.cc.s32 unused, 0, 1; - // underflow in substraction, underflow in whole operation - subc.cc.s32 result_2, 0, 0; - // write carry - subc.s32 carry_out_2, 2, 0; - - // set carry=0 - sub.cc.s32 unused, 0, 0; - // same operation as bove, but 0-1-0 instead of 0-0-1 - subc.cc.s32 result_3, 0, 1; - // write carry - subc.s32 carry_out_3, 2, 0; - - st.s32 [out_addr], result_1; - st.s32 [out_addr+4], carry_out_1_1; - st.s32 [out_addr+8], carry_out_1_2; - st.s32 [out_addr+12], result_2; - st.s32 [out_addr+16], carry_out_2; - st.s32 [out_addr+20], result_3; - st.s32 [out_addr+24], carry_out_3; - ret; -} diff --git a/ptx/src/test/spirv_run/vector.ll b/ptx/src/test/spirv_run/vector.ll index a53904e..b60aaec 100644 --- a/ptx/src/test/spirv_run/vector.ll +++ b/ptx/src/test/spirv_run/vector.ll @@ -1,95 +1,91 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private <2 x i32> @"1"(<2 x i32> %"20") #0 { -"52": +define private <2 x i32> @"1"(<2 x i32> %"18") #0 { +"50": %"3" = alloca <2 x i32>, align 8, addrspace(5) %"2" = alloca <2 x i32>, align 8, addrspace(5) %"16" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"16", align 1 - %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"4" = alloca <2 x i32>, align 8, addrspace(5) %"5" = alloca i32, align 4, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) - store <2 x i32> %"20", ptr addrspace(5) %"3", align 8 + store <2 x i32> %"18", ptr addrspace(5) %"3", align 8 %0 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 0 - %"22" = load i32, ptr addrspace(5) %0, align 4 + %"20" = load i32, ptr addrspace(5) %0, align 4 %1 = alloca i32, align 4, addrspace(5) - store i32 %"22", ptr addrspace(5) %1, align 4 - %"21" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"21", ptr addrspace(5) %"5", align 4 + store i32 %"20", ptr addrspace(5) %1, align 4 + %"19" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"19", ptr addrspace(5) %"5", align 4 %2 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 1 - %"24" = load i32, ptr addrspace(5) %2, align 4 + %"22" = load i32, ptr addrspace(5) %2, align 4 %3 = alloca i32, align 4, addrspace(5) - store i32 %"24", ptr addrspace(5) %3, align 4 - %"23" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"22", ptr addrspace(5) %3, align 4 + %"21" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"21", ptr addrspace(5) %"6", align 4 + %"24" = load i32, ptr addrspace(5) %"5", align 4 + %"25" = load i32, ptr addrspace(5) %"6", align 4 + %"23" = add i32 %"24", %"25" store i32 %"23", ptr addrspace(5) %"6", align 4 - %"26" = load i32, ptr addrspace(5) %"5", align 4 %"27" = load i32, ptr addrspace(5) %"6", align 4 - %"25" = add i32 %"26", %"27" - store i32 %"25", ptr addrspace(5) %"6", align 4 - %"29" = load i32, ptr addrspace(5) %"6", align 4 %4 = alloca i32, align 4, addrspace(5) - store i32 %"29", ptr addrspace(5) %4, align 4 - %"28" = load i32, ptr addrspace(5) %4, align 4 + store i32 %"27", ptr addrspace(5) %4, align 4 + %"26" = load i32, ptr addrspace(5) %4, align 4 %5 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 - store i32 %"28", ptr addrspace(5) %5, align 4 - %"31" = load i32, ptr addrspace(5) %"6", align 4 + store i32 %"26", ptr addrspace(5) %5, align 4 + %"29" = load i32, ptr addrspace(5) %"6", align 4 %6 = alloca i32, align 4, addrspace(5) - store i32 %"31", ptr addrspace(5) %6, align 4 - %"30" = load i32, ptr addrspace(5) %6, align 4 + store i32 %"29", ptr addrspace(5) %6, align 4 + %"28" = load i32, ptr addrspace(5) %6, align 4 %7 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 - store i32 %"30", ptr addrspace(5) %7, align 4 + store i32 %"28", ptr addrspace(5) %7, align 4 %8 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 - %"33" = load i32, ptr addrspace(5) %8, align 4 + %"31" = load i32, ptr addrspace(5) %8, align 4 %9 = alloca i32, align 4, addrspace(5) - store i32 %"33", ptr addrspace(5) %9, align 4 - %"32" = load i32, ptr addrspace(5) %9, align 4 + store i32 %"31", ptr addrspace(5) %9, align 4 + %"30" = load i32, ptr addrspace(5) %9, align 4 %10 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 - store i32 %"32", ptr addrspace(5) %10, align 4 - %"35" = load <2 x i32>, ptr addrspace(5) %"4", align 8 + store i32 %"30", ptr addrspace(5) %10, align 4 + %"33" = load <2 x i32>, ptr addrspace(5) %"4", align 8 %11 = alloca <2 x i32>, align 8, addrspace(5) - store <2 x i32> %"35", ptr addrspace(5) %11, align 8 - %"34" = load <2 x i32>, ptr addrspace(5) %11, align 8 - store <2 x i32> %"34", ptr addrspace(5) %"2", align 8 - %"36" = load <2 x i32>, ptr addrspace(5) %"2", align 8 - ret <2 x i32> %"36" + store <2 x i32> %"33", ptr addrspace(5) %11, align 8 + %"32" = load <2 x i32>, ptr addrspace(5) %11, align 8 + store <2 x i32> %"32", ptr addrspace(5) %"2", align 8 + %"34" = load <2 x i32>, ptr addrspace(5) %"2", align 8 + ret <2 x i32> %"34" } -define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { -"53": - %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 - %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 +define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { +"51": + %"17" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"17", align 1 %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca <2 x i32>, align 8, addrspace(5) %"13" = alloca i32, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) - %"37" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"37", ptr addrspace(5) %"10", align 8 - %"38" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"38", ptr addrspace(5) %"11", align 8 - %"40" = load i64, ptr addrspace(5) %"10", align 8 - %"49" = inttoptr i64 %"40" to ptr - %"39" = load <2 x i32>, ptr %"49", align 8 + %"35" = load i64, ptr addrspace(4) %"45", align 8 + store i64 %"35", ptr addrspace(5) %"10", align 8 + %"36" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"36", ptr addrspace(5) %"11", align 8 + %"38" = load i64, ptr addrspace(5) %"10", align 8 + %"47" = inttoptr i64 %"38" to ptr + %"37" = load <2 x i32>, ptr %"47", align 8 + store <2 x i32> %"37", ptr addrspace(5) %"12", align 8 + %"40" = load <2 x i32>, ptr addrspace(5) %"12", align 8 + %"39" = call <2 x i32> @"1"(<2 x i32> %"40") store <2 x i32> %"39", ptr addrspace(5) %"12", align 8 %"42" = load <2 x i32>, ptr addrspace(5) %"12", align 8 - %"41" = call <2 x i32> @"1"(<2 x i32> %"42") - store <2 x i32> %"41", ptr addrspace(5) %"12", align 8 - %"44" = load <2 x i32>, ptr addrspace(5) %"12", align 8 - %"50" = bitcast <2 x i32> %"44" to i64 + %"48" = bitcast <2 x i32> %"42" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"50", ptr addrspace(5) %0, align 8 - %"43" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"43", ptr addrspace(5) %"15", align 8 - %"45" = load i64, ptr addrspace(5) %"11", align 8 - %"46" = load <2 x i32>, ptr addrspace(5) %"12", align 8 - %"51" = inttoptr i64 %"45" to ptr - store <2 x i32> %"46", ptr %"51", align 8 + store i64 %"48", ptr addrspace(5) %0, align 8 + %"41" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"41", ptr addrspace(5) %"15", align 8 + %"43" = load i64, ptr addrspace(5) %"11", align 8 + %"44" = load <2 x i32>, ptr addrspace(5) %"12", align 8 + %"49" = inttoptr i64 %"43" to ptr + store <2 x i32> %"44", ptr %"49", align 8 ret void } diff --git a/ptx/src/test/spirv_run/vector4.ll b/ptx/src/test/spirv_run/vector4.ll index 53187f7..494b1af 100644 --- a/ptx/src/test/spirv_run/vector4.ll +++ b/ptx/src/test/spirv_run/vector4.ll @@ -1,34 +1,32 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca <4 x i32>, align 16, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr - %"12" = load <4 x i32>, ptr %"20", align 16 - store <4 x i32> %"12", ptr addrspace(5) %"6", align 16 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr + %"11" = load <4 x i32>, ptr %"19", align 16 + store <4 x i32> %"11", ptr addrspace(5) %"6", align 16 %0 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %"6", i32 0, i32 3 - %"15" = load i32, ptr addrspace(5) %0, align 4 + %"14" = load i32, ptr addrspace(5) %0, align 4 %1 = alloca i32, align 4, addrspace(5) - store i32 %"15", ptr addrspace(5) %1, align 4 - %"21" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"21", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - %"23" = inttoptr i64 %"16" to ptr - store i32 %"17", ptr %"23", align 4 + store i32 %"14", ptr addrspace(5) %1, align 4 + %"20" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"20", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i32, ptr addrspace(5) %"7", align 4 + %"22" = inttoptr i64 %"15" to ptr + store i32 %"16", ptr %"22", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vector_extract.ll b/ptx/src/test/spirv_run/vector_extract.ll index bceac42..d877dc7 100644 --- a/ptx/src/test/spirv_run/vector_extract.ll +++ b/ptx/src/test/spirv_run/vector_extract.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #0 { -"61": +define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { +"60": %"17" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"17", align 1 - %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) @@ -14,83 +12,83 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"8" = alloca i16, align 2, addrspace(5) %"9" = alloca i16, align 2, addrspace(5) %"10" = alloca <4 x i16>, align 8, addrspace(5) + %"18" = load i64, ptr addrspace(4) %"48", align 8 + store i64 %"18", ptr addrspace(5) %"4", align 8 %"19" = load i64, ptr addrspace(4) %"49", align 8 - store i64 %"19", ptr addrspace(5) %"4", align 8 - %"20" = load i64, ptr addrspace(4) %"50", align 8 - store i64 %"20", ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"51" = inttoptr i64 %"21" to ptr addrspace(1) - %"11" = load <4 x i8>, ptr addrspace(1) %"51", align 4 - %"52" = extractelement <4 x i8> %"11", i32 0 - %"53" = extractelement <4 x i8> %"11", i32 1 - %"54" = extractelement <4 x i8> %"11", i32 2 - %"55" = extractelement <4 x i8> %"11", i32 3 + store i64 %"19", ptr addrspace(5) %"5", align 8 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"50" = inttoptr i64 %"20" to ptr addrspace(1) + %"11" = load <4 x i8>, ptr addrspace(1) %"50", align 4 + %"51" = extractelement <4 x i8> %"11", i32 0 + %"52" = extractelement <4 x i8> %"11", i32 1 + %"53" = extractelement <4 x i8> %"11", i32 2 + %"54" = extractelement <4 x i8> %"11", i32 3 + %"21" = zext i8 %"51" to i16 %"22" = zext i8 %"52" to i16 %"23" = zext i8 %"53" to i16 %"24" = zext i8 %"54" to i16 - %"25" = zext i8 %"55" to i16 - store i16 %"22", ptr addrspace(5) %"6", align 2 - store i16 %"23", ptr addrspace(5) %"7", align 2 - store i16 %"24", ptr addrspace(5) %"8", align 2 - store i16 %"25", ptr addrspace(5) %"9", align 2 - %"26" = load i16, ptr addrspace(5) %"7", align 2 - %"27" = load i16, ptr addrspace(5) %"8", align 2 - %"28" = load i16, ptr addrspace(5) %"9", align 2 - %"29" = load i16, ptr addrspace(5) %"6", align 2 - %0 = insertelement <4 x i16> undef, i16 %"26", i32 0 - %1 = insertelement <4 x i16> %0, i16 %"27", i32 1 - %2 = insertelement <4 x i16> %1, i16 %"28", i32 2 - %"12" = insertelement <4 x i16> %2, i16 %"29", i32 3 + store i16 %"21", ptr addrspace(5) %"6", align 2 + store i16 %"22", ptr addrspace(5) %"7", align 2 + store i16 %"23", ptr addrspace(5) %"8", align 2 + store i16 %"24", ptr addrspace(5) %"9", align 2 + %"25" = load i16, ptr addrspace(5) %"7", align 2 + %"26" = load i16, ptr addrspace(5) %"8", align 2 + %"27" = load i16, ptr addrspace(5) %"9", align 2 + %"28" = load i16, ptr addrspace(5) %"6", align 2 + %0 = insertelement <4 x i16> undef, i16 %"25", i32 0 + %1 = insertelement <4 x i16> %0, i16 %"26", i32 1 + %2 = insertelement <4 x i16> %1, i16 %"27", i32 2 + %"12" = insertelement <4 x i16> %2, i16 %"28", i32 3 %3 = alloca <4 x i16>, align 8, addrspace(5) store <4 x i16> %"12", ptr addrspace(5) %3, align 8 - %"30" = load <4 x i16>, ptr addrspace(5) %3, align 8 - store <4 x i16> %"30", ptr addrspace(5) %"10", align 8 - %"31" = load <4 x i16>, ptr addrspace(5) %"10", align 8 + %"29" = load <4 x i16>, ptr addrspace(5) %3, align 8 + store <4 x i16> %"29", ptr addrspace(5) %"10", align 8 + %"30" = load <4 x i16>, ptr addrspace(5) %"10", align 8 %4 = alloca <4 x i16>, align 8, addrspace(5) - store <4 x i16> %"31", ptr addrspace(5) %4, align 8 + store <4 x i16> %"30", ptr addrspace(5) %4, align 8 %"13" = load <4 x i16>, ptr addrspace(5) %4, align 8 - %"32" = extractelement <4 x i16> %"13", i32 0 - %"33" = extractelement <4 x i16> %"13", i32 1 - %"34" = extractelement <4 x i16> %"13", i32 2 - %"35" = extractelement <4 x i16> %"13", i32 3 - store i16 %"32", ptr addrspace(5) %"8", align 2 - store i16 %"33", ptr addrspace(5) %"9", align 2 - store i16 %"34", ptr addrspace(5) %"6", align 2 - store i16 %"35", ptr addrspace(5) %"7", align 2 - %"36" = load i16, ptr addrspace(5) %"8", align 2 - %"37" = load i16, ptr addrspace(5) %"9", align 2 - %"38" = load i16, ptr addrspace(5) %"6", align 2 - %"39" = load i16, ptr addrspace(5) %"7", align 2 - %5 = insertelement <4 x i16> undef, i16 %"36", i32 0 - %6 = insertelement <4 x i16> %5, i16 %"37", i32 1 - %7 = insertelement <4 x i16> %6, i16 %"38", i32 2 - %"15" = insertelement <4 x i16> %7, i16 %"39", i32 3 + %"31" = extractelement <4 x i16> %"13", i32 0 + %"32" = extractelement <4 x i16> %"13", i32 1 + %"33" = extractelement <4 x i16> %"13", i32 2 + %"34" = extractelement <4 x i16> %"13", i32 3 + store i16 %"31", ptr addrspace(5) %"8", align 2 + store i16 %"32", ptr addrspace(5) %"9", align 2 + store i16 %"33", ptr addrspace(5) %"6", align 2 + store i16 %"34", ptr addrspace(5) %"7", align 2 + %"35" = load i16, ptr addrspace(5) %"8", align 2 + %"36" = load i16, ptr addrspace(5) %"9", align 2 + %"37" = load i16, ptr addrspace(5) %"6", align 2 + %"38" = load i16, ptr addrspace(5) %"7", align 2 + %5 = insertelement <4 x i16> undef, i16 %"35", i32 0 + %6 = insertelement <4 x i16> %5, i16 %"36", i32 1 + %7 = insertelement <4 x i16> %6, i16 %"37", i32 2 + %"15" = insertelement <4 x i16> %7, i16 %"38", i32 3 %8 = alloca <4 x i16>, align 8, addrspace(5) store <4 x i16> %"15", ptr addrspace(5) %8, align 8 %"14" = load <4 x i16>, ptr addrspace(5) %8, align 8 - %"40" = extractelement <4 x i16> %"14", i32 0 - %"41" = extractelement <4 x i16> %"14", i32 1 - %"42" = extractelement <4 x i16> %"14", i32 2 - %"43" = extractelement <4 x i16> %"14", i32 3 - store i16 %"40", ptr addrspace(5) %"9", align 2 - store i16 %"41", ptr addrspace(5) %"6", align 2 - store i16 %"42", ptr addrspace(5) %"7", align 2 - store i16 %"43", ptr addrspace(5) %"8", align 2 - %"44" = load i16, ptr addrspace(5) %"6", align 2 - %"45" = load i16, ptr addrspace(5) %"7", align 2 - %"46" = load i16, ptr addrspace(5) %"8", align 2 - %"47" = load i16, ptr addrspace(5) %"9", align 2 + %"39" = extractelement <4 x i16> %"14", i32 0 + %"40" = extractelement <4 x i16> %"14", i32 1 + %"41" = extractelement <4 x i16> %"14", i32 2 + %"42" = extractelement <4 x i16> %"14", i32 3 + store i16 %"39", ptr addrspace(5) %"9", align 2 + store i16 %"40", ptr addrspace(5) %"6", align 2 + store i16 %"41", ptr addrspace(5) %"7", align 2 + store i16 %"42", ptr addrspace(5) %"8", align 2 + %"43" = load i16, ptr addrspace(5) %"6", align 2 + %"44" = load i16, ptr addrspace(5) %"7", align 2 + %"45" = load i16, ptr addrspace(5) %"8", align 2 + %"46" = load i16, ptr addrspace(5) %"9", align 2 + %"55" = trunc i16 %"43" to i8 %"56" = trunc i16 %"44" to i8 %"57" = trunc i16 %"45" to i8 %"58" = trunc i16 %"46" to i8 - %"59" = trunc i16 %"47" to i8 - %9 = insertelement <4 x i8> undef, i8 %"56", i32 0 - %10 = insertelement <4 x i8> %9, i8 %"57", i32 1 - %11 = insertelement <4 x i8> %10, i8 %"58", i32 2 - %"16" = insertelement <4 x i8> %11, i8 %"59", i32 3 - %"48" = load i64, ptr addrspace(5) %"5", align 8 - %"60" = inttoptr i64 %"48" to ptr addrspace(1) - store <4 x i8> %"16", ptr addrspace(1) %"60", align 4 + %9 = insertelement <4 x i8> undef, i8 %"55", i32 0 + %10 = insertelement <4 x i8> %9, i8 %"56", i32 1 + %11 = insertelement <4 x i8> %10, i8 %"57", i32 2 + %"16" = insertelement <4 x i8> %11, i8 %"58", i32 3 + %"47" = load i64, ptr addrspace(5) %"5", align 8 + %"59" = inttoptr i64 %"47" to ptr addrspace(1) + store <4 x i8> %"16", ptr addrspace(1) %"59", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vote_ballot.ll b/ptx/src/test/spirv_run/vote_ballot.ll index 200eccc..fd31f1a 100644 --- a/ptx/src/test/spirv_run/vote_ballot.ll +++ b/ptx/src/test/spirv_run/vote_ballot.ll @@ -3,48 +3,46 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1, i32) #0 -define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 { -"51": +define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 { +"50": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) - %"12" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"43" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 1) - store i32 %"43", ptr addrspace(5) %"6", align 4 - %"44" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 false, i32 16777215) - store i32 %"44", ptr addrspace(5) %"7", align 4 - %"45" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 2) - store i32 %"45", ptr addrspace(5) %"8", align 4 - %"46" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 3) - store i32 %"46", ptr addrspace(5) %"9", align 4 - %"17" = load i64, ptr addrspace(5) %"5", align 8 - %"18" = load i32, ptr addrspace(5) %"6", align 4 - %"47" = inttoptr i64 %"17" to ptr - %"57" = getelementptr inbounds i8, ptr %"47", i64 0 - store i32 %"18", ptr %"57", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %"48" = inttoptr i64 %"19" to ptr - %"59" = getelementptr inbounds i8, ptr %"48", i64 4 - store i32 %"20", ptr %"59", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"8", align 4 - %"49" = inttoptr i64 %"21" to ptr - %"61" = getelementptr inbounds i8, ptr %"49", i64 8 - store i32 %"22", ptr %"61", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"9", align 4 - %"50" = inttoptr i64 %"23" to ptr - %"63" = getelementptr inbounds i8, ptr %"50", i64 12 - store i32 %"24", ptr %"63", align 4 + %"11" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"42" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 1) + store i32 %"42", ptr addrspace(5) %"6", align 4 + %"43" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 false, i32 16777215) + store i32 %"43", ptr addrspace(5) %"7", align 4 + %"44" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 2) + store i32 %"44", ptr addrspace(5) %"8", align 4 + %"45" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 3) + store i32 %"45", ptr addrspace(5) %"9", align 4 + %"16" = load i64, ptr addrspace(5) %"5", align 8 + %"17" = load i32, ptr addrspace(5) %"6", align 4 + %"46" = inttoptr i64 %"16" to ptr + %"56" = getelementptr inbounds i8, ptr %"46", i64 0 + store i32 %"17", ptr %"56", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %"47" = inttoptr i64 %"18" to ptr + %"58" = getelementptr inbounds i8, ptr %"47", i64 4 + store i32 %"19", ptr %"58", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"8", align 4 + %"48" = inttoptr i64 %"20" to ptr + %"60" = getelementptr inbounds i8, ptr %"48", i64 8 + store i32 %"21", ptr %"60", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"9", align 4 + %"49" = inttoptr i64 %"22" to ptr + %"62" = getelementptr inbounds i8, ptr %"49", i64 12 + store i32 %"23", ptr %"62", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vshr.ll b/ptx/src/test/spirv_run/vshr.ll index e3b6b5e..4433bf2 100644 --- a/ptx/src/test/spirv_run/vshr.ll +++ b/ptx/src/test/spirv_run/vshr.ll @@ -1,48 +1,46 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 { -"39": +define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { +"38": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"29", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"31", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"15" to ptr - %"32" = load i32, ptr %"33", align 4 - store i32 %"32", ptr addrspace(5) %"7", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"34" = inttoptr i64 %"17" to ptr - %"41" = getelementptr inbounds i8, ptr %"34", i64 4 - %"35" = load i32, ptr %"41", align 4 - store i32 %"35", ptr addrspace(5) %"8", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"36" = inttoptr i64 %"19" to ptr - %"43" = getelementptr inbounds i8, ptr %"36", i64 8 - %"37" = load i32, ptr %"43", align 4 - store i32 %"37", ptr addrspace(5) %"9", align 4 - %"21" = load i32, ptr addrspace(5) %"7", align 4 - %"22" = load i32, ptr addrspace(5) %"8", align 4 - %"23" = load i32, ptr addrspace(5) %"9", align 4 - %0 = icmp ugt i32 %"22", 31 - %1 = lshr i32 %"21", %"22" + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"14" to ptr + %"31" = load i32, ptr %"32", align 4 + store i32 %"31", ptr addrspace(5) %"7", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"33" = inttoptr i64 %"16" to ptr + %"40" = getelementptr inbounds i8, ptr %"33", i64 4 + %"34" = load i32, ptr %"40", align 4 + store i32 %"34", ptr addrspace(5) %"8", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"35" = inttoptr i64 %"18" to ptr + %"42" = getelementptr inbounds i8, ptr %"35", i64 8 + %"36" = load i32, ptr %"42", align 4 + store i32 %"36", ptr addrspace(5) %"9", align 4 + %"20" = load i32, ptr addrspace(5) %"7", align 4 + %"21" = load i32, ptr addrspace(5) %"8", align 4 + %"22" = load i32, ptr addrspace(5) %"9", align 4 + %0 = icmp ugt i32 %"21", 31 + %1 = lshr i32 %"20", %"21" %2 = select i1 %0, i32 0, i32 %1 - %"20" = add i32 %2, %"23" - store i32 %"20", ptr addrspace(5) %"6", align 4 - %"24" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = load i32, ptr addrspace(5) %"6", align 4 - %"38" = inttoptr i64 %"24" to ptr - store i32 %"25", ptr %"38", align 4 + %"19" = add i32 %2, %"22" + store i32 %"19", ptr addrspace(5) %"6", align 4 + %"23" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = load i32, ptr addrspace(5) %"6", align 4 + %"37" = inttoptr i64 %"23" to ptr + store i32 %"24", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/xor.ll b/ptx/src/test/spirv_run/xor.ll index 7181bd1..96b2914 100644 --- a/ptx/src/test/spirv_run/xor.ll +++ b/ptx/src/test/spirv_run/xor.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"30", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = xor i32 %"17", %"18" - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = xor i32 %"16", %"17" + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 1a203bd..3b75ec9 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -1963,30 +1963,26 @@ fn insert_hardware_registers<'input>( } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#extended-precision-integer-arithmetic-instructions -// NVIDIA documentation is misleading. In fact there is no single CC.CF, -// but separate registers for overflow (`add` and `mad`) and underflow (`sub`) -// For reference check the .ptx tests +// NVIDIA documentation is slightly misleading when it comes to subc and sub.cc. +// They both invert the CC flag. Meaning that for sub: +// * sub.cc x, 0,1 will set CC to 0 +// * sub.cc x, 0,0 will set CC to 1 +// and for subc: +// * if CC is 1 then subc will compute d = a - b +// * if CC is 0 then subc will compute d = a - b - 1 fn insert_hardware_registers_impl<'input>( id_defs: &mut IdNameMapBuilder<'input>, typed_statements: Vec, ) -> Result, TranslateError> { let mut result = Vec::with_capacity(typed_statements.len()); - let overflow_flag_var = id_defs.register_variable_def( + let carry_flag_variable = id_defs.register_variable_def( None, ast::Type::Scalar(ast::ScalarType::Pred), ast::StateSpace::Reg, Some(ast::Initializer::Constant(ast::ImmediateValue::U64(0))), ); - let underflow_flag_var = id_defs.register_variable_def( - None, - ast::Type::Scalar(ast::ScalarType::Pred), - ast::StateSpace::Reg, - Some(ast::Initializer::Constant(ast::ImmediateValue::U64(0))), - ); - let overflow_flag = overflow_flag_var.name; - let underflow_flag = underflow_flag_var.name; - result.push(Statement::Variable(overflow_flag_var)); - result.push(Statement::Variable(underflow_flag_var)); + let carry_flag = carry_flag_variable.name; + result.push(Statement::Variable(carry_flag_variable)); for statement in typed_statements { match statement { Statement::Instruction(ast::Instruction::MadC { @@ -1997,38 +1993,88 @@ fn insert_hardware_registers_impl<'input>( }) => result.push(Statement::MadC(MadCDetails { type_, is_hi, - arg: Arg4CarryIn::new(arg, carry_out, TypedOperand::Reg(overflow_flag)), + arg: Arg4CarryIn::new(arg, carry_out, TypedOperand::Reg(carry_flag)), })), Statement::Instruction(ast::Instruction::MadCC { type_, is_hi, arg }) => { result.push(Statement::MadCC(MadCCDetails { type_, is_hi, - arg: Arg4CarryOut::new(arg, TypedOperand::Reg(overflow_flag)), + arg: Arg4CarryOut::new(arg, TypedOperand::Reg(carry_flag)), })) } Statement::Instruction(ast::Instruction::AddC(details, args)) => { result.push(Statement::AddC( details.type_, - Arg3CarryIn::new(args, details.carry_out, TypedOperand::Reg(overflow_flag)), + Arg3CarryIn::new(args, details.carry_out, TypedOperand::Reg(carry_flag)), )) } Statement::Instruction(ast::Instruction::AddCC(details, args)) => { result.push(Statement::AddCC( details, - Arg3CarryOut::new(args, TypedOperand::Reg(overflow_flag)), + Arg3CarryOut::new(args, TypedOperand::Reg(carry_flag)), )) } Statement::Instruction(ast::Instruction::SubC(details, args)) => { + let inverted_carry_in = id_defs.register_intermediate(Some(( + ast::Type::Scalar(ast::ScalarType::Pred), + ast::StateSpace::Reg, + ))); + result.push(Statement::Instruction(ast::Instruction::Not( + ast::ScalarType::Pred, + ast::Arg2 { + dst: TypedOperand::Reg(inverted_carry_in), + src: TypedOperand::Reg(carry_flag), + }, + ))); + let (carry_out_id, carry_out_postprocess) = if details.carry_out { + let inverted_carry_out = id_defs.register_intermediate(Some(( + ast::Type::Scalar(ast::ScalarType::Pred), + ast::StateSpace::Reg, + ))); + let invert_statement = Statement::Instruction(ast::Instruction::Not( + ast::ScalarType::Pred, + ast::Arg2 { + dst: TypedOperand::Reg(carry_flag), + src: TypedOperand::Reg(inverted_carry_out), + }, + )); + ( + Some(TypedOperand::Reg(inverted_carry_out)), + Some(invert_statement), + ) + } else { + (None, None) + }; result.push(Statement::SubC( details.type_, - Arg3CarryIn::new(args, details.carry_out, TypedOperand::Reg(underflow_flag)), - )) + Arg3CarryIn { + dst: args.dst, + carry_out: carry_out_id, + carry_in: TypedOperand::Reg(inverted_carry_in), + src1: args.src1, + src2: args.src2, + }, + )); + if let Some(carry_out_postprocess) = carry_out_postprocess { + result.push(carry_out_postprocess); + } } - Statement::Instruction(ast::Instruction::SubCC(details, args)) => { + Statement::Instruction(ast::Instruction::SubCC(type_, args)) => { + let temp = id_defs.register_intermediate(Some(( + ast::Type::Scalar(ast::ScalarType::Pred), + ast::StateSpace::Reg, + ))); result.push(Statement::SubCC( - details, - Arg3CarryOut::new(args, TypedOperand::Reg(underflow_flag)), - )) + type_, + Arg3CarryOut::new(args, TypedOperand::Reg(temp)), + )); + result.push(Statement::Instruction(ast::Instruction::Not( + ast::ScalarType::Pred, + ast::Arg2 { + dst: TypedOperand::Reg(carry_flag), + src: TypedOperand::Reg(temp), + }, + ))); } s => result.push(s), } From 774f4bcb37c39f876caf80ae0d39420fa4bc1c8b Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sat, 6 Apr 2024 01:23:53 +0200 Subject: [PATCH 08/14] Implement sad instruction (#198) --- ptx/src/ast.rs | 1 + ptx/src/emit.rs | 31 +++++++++++++++++ ptx/src/ptx.lalrpop | 10 ++++++ ptx/src/test/spirv_run/mod.rs | 1 + ptx/src/test/spirv_run/sad.ll | 63 ++++++++++++++++++++++++++++++++++ ptx/src/test/spirv_run/sad.ptx | 29 ++++++++++++++++ ptx/src/translate.rs | 4 +++ 7 files changed, 139 insertions(+) create mode 100644 ptx/src/test/spirv_run/sad.ll create mode 100644 ptx/src/test/spirv_run/sad.ptx diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs index 93793e6..e5b5f97 100644 --- a/ptx/src/ast.rs +++ b/ptx/src/ast.rs @@ -476,6 +476,7 @@ pub enum Instruction { MatchAny(Arg3

), Red(AtomDetails, Arg2St

), Nanosleep(Arg1

), + Sad(ScalarType, Arg4

), } #[derive(Copy, Clone)] diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs index d4d6df6..9e62d5b 100644 --- a/ptx/src/emit.rs +++ b/ptx/src/emit.rs @@ -13,6 +13,7 @@ use zluda_llvm::prelude::*; use zluda_llvm::zluda::*; use zluda_llvm::*; +use crate::ast::SetpData; use crate::translate::{ self, Arg4CarryOut, ConstType, ConversionKind, DenormSummary, ExpandedArgParams, FPDenormMode, MadCCDetails, MadCDetails, TranslationModule, TypeKind, TypeParts, @@ -1137,6 +1138,7 @@ fn emit_instruction( ast::Instruction::Vshr(arg) => emit_inst_vshr(ctx, arg)?, ast::Instruction::Set(details, arg) => emit_inst_set(ctx, details, arg)?, ast::Instruction::Red(details, arg) => emit_inst_red(ctx, details, arg)?, + ast::Instruction::Sad(type_, arg) => emit_inst_sad(ctx, *type_, arg)?, // replaced by function calls or Statement variants ast::Instruction::Activemask { .. } | ast::Instruction::Bar(..) @@ -1161,6 +1163,35 @@ fn emit_instruction( }) } +fn emit_inst_sad( + ctx: &mut EmitContext, + type_: ast::ScalarType, + arg: &ast::Arg4, +) -> Result<(), TranslateError> { + let builder = ctx.builder.get(); + let less_than = emit_inst_setp_int( + ctx, + &SetpData { + typ: type_, + flush_to_zero: None, + cmp_op: ast::SetpCompareOp::Greater, + }, + None, + arg.src1, + arg.src2, + )?; + let a = ctx.names.value(arg.src1)?; + let b = ctx.names.value(arg.src2)?; + let a_minus_b = unsafe { LLVMBuildSub(builder, a, b, LLVM_UNNAMED) }; + let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) }; + let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, a_minus_b, b_minus_a, LLVM_UNNAMED) }; + let src3 = ctx.names.value(arg.src3)?; + ctx.names.register_result(arg.dst, |dst_name| unsafe { + LLVMBuildAdd(builder, src3, a_or_b, dst_name) + }); + Ok(()) +} + fn emit_inst_red( ctx: &mut EmitContext, details: &ast::AtomDetails, diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index d5c9b61..5ec97e1 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -224,6 +224,7 @@ match { "rem", "ret", "rsqrt", + "sad", "selp", "set", "setp", @@ -305,6 +306,7 @@ ExtendedID : &'input str = { "rem", "ret", "rsqrt", + "sad", "selp", "set", "setp", @@ -839,6 +841,7 @@ Instruction: ast::Instruction> = { InstMatch, InstRed, InstNanosleep, + InstSad }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld @@ -2377,6 +2380,13 @@ InstNanosleep: ast::Instruction> = { } } +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-sad +InstSad: ast::Instruction> = { + "sad" => { + ast::Instruction::Sad(type_, a) + } +} + NegTypeFtz: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 1ec030b..5fb5a8b 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -364,6 +364,7 @@ test_ptx!( [1923569713u64, 1923569712], [1923569713u64, 1923569712] ); +test_ptx!(sad, [2147483648u32, 2, 13], [2147483659u32, 2147483663]); test_ptx_warp!( shfl, diff --git a/ptx/src/test/spirv_run/sad.ll b/ptx/src/test/spirv_run/sad.ll new file mode 100644 index 0000000..c7a5726 --- /dev/null +++ b/ptx/src/test/spirv_run/sad.ll @@ -0,0 +1,63 @@ +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" +target triple = "amdgcn-amd-amdhsa" + +define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { +"56": + %"11" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"11", align 1 + %"4" = alloca i64, align 8, addrspace(5) + %"5" = alloca i64, align 8, addrspace(5) + %"6" = alloca i32, align 4, addrspace(5) + %"7" = alloca i32, align 4, addrspace(5) + %"8" = alloca i32, align 4, addrspace(5) + %"9" = alloca i32, align 4, addrspace(5) + %"10" = alloca i32, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 + %"13" = load i64, ptr addrspace(4) %"39", align 8 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"41" = inttoptr i64 %"15" to ptr + %"40" = load i32, ptr %"41", align 4 + store i32 %"40", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"42" = inttoptr i64 %"17" to ptr + %"58" = getelementptr inbounds i8, ptr %"42", i64 4 + %"43" = load i32, ptr %"58", align 4 + store i32 %"43", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"19" to ptr + %"60" = getelementptr inbounds i8, ptr %"44", i64 8 + %"45" = load i32, ptr %"60", align 4 + store i32 %"45", ptr addrspace(5) %"8", align 4 + %"21" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %0 = icmp ugt i32 %"21", %"22" + %1 = sub i32 %"21", %"22" + %2 = sub i32 %"22", %"21" + %3 = select i1 %0, i32 %1, i32 %2 + %"46" = add i32 %"23", %3 + store i32 %"46", ptr addrspace(5) %"9", align 4 + %"25" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = load i32, ptr addrspace(5) %"7", align 4 + %"27" = load i32, ptr addrspace(5) %"8", align 4 + %4 = icmp sgt i32 %"25", %"26" + %5 = sub i32 %"25", %"26" + %6 = sub i32 %"26", %"25" + %7 = select i1 %4, i32 %5, i32 %6 + %"50" = add i32 %"27", %7 + store i32 %"50", ptr addrspace(5) %"10", align 4 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load i32, ptr addrspace(5) %"9", align 4 + %"54" = inttoptr i64 %"28" to ptr + store i32 %"29", ptr %"54", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load i32, ptr addrspace(5) %"10", align 4 + %"55" = inttoptr i64 %"30" to ptr + %"62" = getelementptr inbounds i8, ptr %"55", i64 4 + store i32 %"31", ptr %"62", align 4 + ret void +} + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/spirv_run/sad.ptx b/ptx/src/test/spirv_run/sad.ptx new file mode 100644 index 0000000..c7ed6c6 --- /dev/null +++ b/ptx/src/test/spirv_run/sad.ptx @@ -0,0 +1,29 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.entry sad( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .b32 a; + .reg .b32 b; + .reg .b32 c; + .reg .b32 result_u32; + .reg .b32 result_s32; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.u32 a, [in_addr]; + ld.u32 b, [in_addr+4]; + ld.u32 c, [in_addr+8]; + sad.u32 result_u32, a, b, c; + sad.s32 result_s32, a, b, c; + st.b32 [out_addr], result_u32; + st.b32 [out_addr+4], result_s32; + ret; +} diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 3b75ec9..61a74c9 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -6644,6 +6644,9 @@ impl ast::Instruction { ast::StateSpace::Reg, )), )?), + ast::Instruction::Sad(type_, a) => { + ast::Instruction::Sad(type_, a.map(visitor, &ast::Type::Scalar(type_), false)?) + } }) } } @@ -7000,6 +7003,7 @@ impl ast::Instruction { ast::Instruction::Shf(..) => None, ast::Instruction::Vote(..) => None, ast::Instruction::Nanosleep(..) => None, + ast::Instruction::Sad(_, _) => None, ast::Instruction::Sub(ast::ArithDetails::Float(float_control), _) | ast::Instruction::Add(ast::ArithDetails::Float(float_control), _) | ast::Instruction::Mul(ast::MulDetails::Float(float_control), _) From 5d5f7cca75115b1a47255120e4ca1236f01a2828 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sun, 14 Apr 2024 02:39:34 +0200 Subject: [PATCH 09/14] Rewrite surface implementation to more accurately support unofficial CUDA semantics (#203) This fixes black screen in some CompuBench tests (TV-L1 Optical Flow) and other apps that use CUDA surfaces incorrectly --- README.md | 4 - ptx/lib/zluda_ptx_impl.bc | Bin 144764 -> 232076 bytes ptx/lib/zluda_ptx_impl.cpp | 654 +++++++++++++++++++++++++++---------- ptx/src/translate.rs | 4 +- zluda/src/cuda.rs | 2 +- zluda/src/impl/surface.rs | 136 +++----- zluda/tests/kernel_suld.rs | 4 - zluda/tests/kernel_sust.rs | 12 +- 8 files changed, 543 insertions(+), 273 deletions(-) diff --git a/README.md b/README.md index 52927d0..5be6a8a 100644 --- a/README.md +++ b/README.md @@ -215,10 +215,6 @@ Performance is currently much lower than the native HIP backend, see the discuss This is a ROCm/HIP bug. Currently, CompuBench tests have to be run one at a time. -- Some tests output black screen. - - This is due to a bug (or an unintended hardware feature) in CompuBench that just happens to work on NVIDIA GPUs. - #### V-Ray Benchmark - Currently, ZLUDA crashes when running V-Ray benchmark. Nonetheless, certain "lucky" older combinations of ZLUDA and ROCm/HIP are known to run V-Ray Benchmark successfully. diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 48ea22b8902095dd36ce79adfedb42c3358515e1..1edcbd5cc43abb641e28b19d37651f905d1e0986 100644 GIT binary patch literal 232076 zcmZ>AK5)-egn@yTfq@~3$3Vp^a$j2Q?=A7%fek*43=9m4JPZs=)-W(|Gbu7KF)%QQ zF)%Q&H!4qbJi+40CSYVaiLr%8m6bzMj-@-PLBo~FX%e@Si=dH+1&4$;m(ijQkAfqL z$}U3fJpzuCm{Lw~Y!p#&cI!|$$!N5wMM6tMYl30ThM0*8r%o_1Fmx&~FoZEPFc>Ly za496YF(_)Q%+OF&Rhgk7sm{~7WJ1EClo$n-tR{s!&o~5X850B)71#tD68ac;M4B2L zTbcs6nAimQl2{I~a86(GGOK|p|ESwaINr-VX-ngKU6kH7&2 zW*!3&A>bg|!*qs0;vo}{MzbP=a?A$C!UPr`GX?&h0!9XwCP4#+1{Z@vYzzXN#Viv9 z^$Z&Q44Bx|0=V369AJq$$RRG^pew=1%=Cg)ImdzFXhI9O!~)5l21e$PWvw+?c!#5mYWG&XX-U{_{nU}Q~X5jI$$*yF&!7SbXpaiF2kfQdmmK*-G? zkvZxR2TP8FuS8=b?+Z?)13WA)jtriL9S$sM3JqLLE4mz6k_?y`!v!27c$5WN_yihe zo48If_{sjMfc=8Wkx!O_%s#S;oGTR?7??N|7&J4xG!j4k|L-ubUTIxtSBD`}!>$9~ zvy{AM{OR1Grll0NNI_|lV8;^y2L{II0!Ic$mV^dIwm1QY0}6-qSq^hBCbH=s;AlQ# z&@9H&;+Vpt%rZlA3&UX^tHV4|1_^9S4SO6C(ju693_5s?*Bo?^c+w_$jo~m)(_x-$ zg9NsdhCL23X$zQp3|8QPM8?g5faFslz-a1_^9$4SN&>(k?Ld*qz`v&N%2G z&vL?`sbQugw*kvS$1@99nVSL>L5ADC5C9o2Kcz$R8iTVy)DfO+0|gG#BsQx9FPh~f zT@*N+F9>?7G)+)RThQES*CAwlfw4^?rBm`nf`h=JBRo+J3t2Oh*mNZ>G+7unizTda zoHxUfo9&uHz?ltf5>XR0(juA}bvs0i*DxNFO6iikmXN^fbd;y0VIk|vBn~U3&J^CH zqdX-B3s`r8?2KqmFl3hHS>w1#!in2}L!lwVutkhVz=7dr5{K>q1qS9rM+Kq`92nS^ zbSqwCXl7vQ5O-d4kb#k9f__^BGY^x2LITf5HfELtheHxgd>{il3|VAB21q7zSb+?1 zIwnv8HlRmy3&;Qtg*2aqtjsI|4h)vbTwptRIHZCz4l*!GBsg4}k;Dr!g<;V#k!%A8 zkVU4;z!n{7*kE{6ibtShqfv6kLRo`D3gu@m@G&O}*rahV_ZWD{8MhpC zkZ?I~(8Ms~pq#;>1=VK~_?Z&}><)7@NzO==H#nqFcV+`WbK(U1v=_`h1{?~;Uk*A* zs9Z2$Vw};aU~p(b(-{W==EMt*hdEj#XDBKf98zdKGeLkkQNTG(fxXA>gre~pK~H&> ziIOK07w}Iy!SlA^B70y;Tbaa*W*sAEiG(kX5;GNf*nTPWo=M=Bs1|qS?oY*)idZqsGie1-4%bQ_oBgkf;^#PV?w! zv^$|{Jb}4QjAgRqwGJ2VNhf)p7%X7Dn9^2u;6hW5k+U^VhNHzyMIM7K3Ukj0urWso z_@`}P=g~c(ZhYf#gBZ&c$#0C#+(M^#ej6-c6-;fjJ8+??$H-ZpC&STWW}||^7KNoC z-$n@pr+IMj=$_CtRyooj#xhm$1YF7komD?(C< z2Nca)j9er@iKQ@=qxyhC1H-3N0^h)~K27l(*Z>{pH(&!I+g>n(4A{(b5gg$el1&OA z1FjgcID!n=nA*_-GN9%=&0K2r|k5Q{F$Ye&A8Jb|Xa3~z~*~kxaJ%eCcmmS0?gWxZq+#ul4 zF%#k-hM+SdEMON1Bro&e0J*tA#@N*fV$@7Up#%kh+W2{u7lk?9=R1Sgd_U{C0FSb&`n zHK7pX1iKT8Pe2CLPH;X2GT>K6`wWl)UoP@lrgyMNyin$80vT}8T|#B9q)qYy{zc~m zI1Mher>D%7v|(P%e+Vqwp1x$RqENyD{^|~Ekn!^0CX|DX|EBl>Z2TMNM_}V$G@pPN z&ptD~?WV+w<_pFyOgvu-f8^CmW>ec<8I?QjIk&YRfW=Fz~Y`$mz2cOrvC)D34L zDJ4d>EjQXVOk9`}GB)x@W^}MfTv)_^2x7eal7*6K$qw9)E(mZMEI69;WT9jlvokl- zMUdr3b5s_AEaz_OZ~+_7GO-hEypWOz*mxBeEwJ$ynj*l)FJ#@B!BHfkpup2~QH0aL z!9n85gq}79b{?if3R1ok86;R*9Qkf$bd-UV9D*u&JE0GpTMsG7L6ltN`F_hk!$~FVL{skCXgvk5G4y)C9^;guz=qQ zqD215ghk*8_@&?mR`SJ>&oT=X0Sow>pi16OSOSiKUkY9jB^UW3vp^BBfd3FwN#cZM z;0XAo;0IA6$X1yJihu?DOjp5fk)Jnl1vmnJDFi{3Jd~W71&V+L{7z6M-zKaAN5C(I zFo=>w{+(H%2w1@11Xc2T!WwV{{8EU5DA~w=GYb>}3-}K~mDEmH2abSW3ULr67ukPi zfg)f5KhrgcTQ*PJ0FHoP3P}(phSHMRpa@vN?*vuCGI0|)0)8o^L6kTOSZ0GFU;%#< zR0-$AE#L_FrH};obDKfHk#2SMd)=$<+N+dKeHeDA0H?dXL zD@r6ZFfet*1%r%Y5pei6tBH|KLZN}-P`SbZ4xSg@zoi-(YFEZ93otPJ&1PZaSkTZr zfq~IvgP`7y1}?KBtl}{RjIuc_JO&F|cpDknBoZ9Br4yTZS3I9r!A4#kD zk;vqcWGwK(fYD{6q@3D=#7#FiSOgp-W;Zf0$*J|8b#PZ<5jftqKzWjffkOY;4F}nz z1x~a@fVmTSH@c}zjrPI??d22N%>>%34A_e`*ef%dD?Qk2 zHP{*W8+cbF`~exvz|o+|P{(k9@dJAUOMrvCz`GB8xeolF1o%EM@V|V(_xu9eTLHc= z4SWo?C!B4kFgu)Kwrx3VvBp{YK(oyrX4@%;EiN3kKH)6g;cPpj*>Xy=&6a~UOPcMb zFgLItU<}pV+6yGwOEZ`&CbX9?XfKjruc~0L%wR5D!Cq|8UNoWIY({&@h4vzb z4-7XPxK4n)!GA!CVHv{@Mg>NZ3+ye{dz_^&G~2W=+Z=GV+Hu%&i=%Z5v+V+A2mVh1 z{7)GKK0EL~G~j!Ef$f6=|9c1irv`jqKJb0I!1gGB|5*axQw9E91OAr__@6R7VBFB? z!NhReL4#3&O}c}XVFNDhlrmYHBdD=6s~}b2c*ce7Ow#pFZ$?3E+F| z!2e!>KUab8YXaZ108rZgJb~|503U-KgS5{>g!>_}(?IQ9nUDbSg-}2)L+cw5o85uo z8iN5Vl?$=HZy>XoA3y`Dl;O)kwj^Z+!y66-ObTq$4hw&S~%gf#*oef*Ar5!cECe4L$;WDNPM3!Ys-&6=Y7a zFb1k|v>7>COygwQU$lU|Sb@FD zfW7Df+Xn+M11yruz+VDZ1JxCPB2@-f1JU&|fgPeUH-Wz}fxSwAz3c+pHwBPju>yN; z0zcgRZw~AgU^8L7uenzfE1TGu;&_ZgRB7#!K&OgPZU)3@?X8xunV0|SG?i3V9A&w>Mt z42%k(QD%n;jLupcJVd}8P%JSpH1J3SEoFTMQUDQPU`RPILCeI9gQJ0wfeB<01Cs)C zTgaY=z77mL3<4l?7z7w34VGNuU(3Xlz|a78T?50321U`{21hmqVMYef2sZ-*Bgi@{ z#%3OdWlUgg3=pHXFtJH8_^^WA%)r3nz{a!k1uKsc!wfD44zMFQK(4yTBT>L$APP1e z6atb4OBT)UVrDvkY-8xF21a&NGd7E#Ij0`^P$iek(4HL5hg8;H058h&DV=$0K zc39PRE}jMk2T+0mB?gegT#Mu-7!H&mJ1pysmH`7310!-g-m6VQci8RCN6;Pi_IoF~ z!y*heU?@wHsKGETfJes=&C;v7Z1d6Gc1n)f8{G>>CmnQ0_i5DW2TJGx(wX*QBD(Vq z8SglO?u#FiH7V$xmFCqmLUZ2K*=+KXXhF9sw7DDId9LdiCFU?P_;4b}mxmccvoS-1 zCL<_U!eaj>TS5UFgMlh?lqub1;5o;@u#5#czdVs{aFjx`x~qj*n-?u|mmX%6Sc2~5 zFBTgvp?i6Q45oox^Dqrey@bWUGgg=e*2p#-#BkyE=4^Bqn%-xWScC4uG@BjQ&|R1% zhiRbKRZIg#-(fM($qv&%3;6~|X@&zX$SI%eJ+nauGs6rHWhyTC= zUWNoElsw+dB(aB)!G{GoqCW&MG@CP^Wph2Qgb!?JMcE;DMxJ{NXa%E%W`m;)x+C>j z4LX?7%EpJw3XU=gFt9POfD;tB2)!4~&}@Nb_0?{!a2W>11O^AN7r_P5DIY8b&Rm6Q zU{xqXvn4}ACvuS$x}Ph-fE}#>d=<&a^NfK(4W*E;)owV5?w3{NtOhI41N`fH2DVZL zv@%>y?7#v(v_kdjaVCi?jA#`{RU|{RHM%41xf2T5(E|KcCL_-~^Z>8bZ8#{)fLc(m z+TO~~!;lb!9M?zhJF&7kFz6sxe_e$SnVA`8h=4K+1O}D^OweiDSzysaOdp{;DMfb8-B^CoM`7sR4nvcc6w_h*~d{o8IY>Mu}9RBO*F67$FZV-X) z!i7Hy)}p(xt_IVf_&}@dDi*NHMTwp^hYO?+^^4vl%YTnvo zmhG!pISne%i<+&9jBNSnMNOE>fd{u!eMd@j1N#s?Dw1oH<%e_$f4M4$H-RAz;GF*%=xE&Ac3DDp$gd}SDRTS z-Y_!w@FSNwzcw&5+o9(deX)cK7*+FeCZ1pD`NcM}!BHNqnXyWr+u#R!e)*cl$X1Wm z7FzXB=fDGgw6@UI#jFy4(Az@4w)}8nL<`0}Q6US_f^k)a?!ExDU=%$g7OH_Bj5*!< zB51Mfy319R3oVvU-F?K(iWbWc^8;I%&|*1i2Salpn!TaY;@8ma&6>h(Ac1aRZ*gKT zx`Cf|VlmJU)4*42u^70w4Aa0*YF7OZHE$*DDrY|GJOd6WKu2O?;tV(2eci8)MY4X{)De*~e&vPwqCO7vKkG1(V_ z7TTh6QlVPtu`JrFFNzk+NBi8SqQ^3q=VNd5NNHRexD-87UR`2n4n?y!G*{{xy1iMp zyao~I2CiL^xE9^OLszgExB}C_S7BHTyt^9Hz^H2s&0*-Vd|N8v0vCGS7t72Og&xaq zvl|?>(PO!l*PsGDmbY$XWLu9O%VA~*9*CmF^3`)}5_`~NIqDXuNL1rME)sKcV=5U~ z99WQw#8nr~Kt&=a6S!Lf8ReK-AssHqz$n0o)CbYp-wP_Sn~)2-*2l*{CAJH4iCtCv z7*t{_AeWpwcNKz4Y(eA_`_x^AW?O~^Ph@*TPf90jU}rG!MDAZ}RWtK^V_;C@ME1zi z{Dy;a3=Iwl%fnM28Jg|U3vExi zgb(P2w$^E8o`2|t_R`9RgBXRjO+TneL5hWNeU3?5G5>bNj>B~e=k%$tE zabH0tHfk)ttBP2N7Ry?bY(XV9N-ST!As42B9?SXs;F1&7-gkwdk`u+=2j9UZCyIfh zJo4Ak4Sdyu#lX3rFb%BviD}?edrSjYE#o)PK+h>xzcR8dMUUlMb_W(DNGAJmSqLOB`2!ADn+1@6UE*G zLX4oI7{x%*45jPn2EH=GV&GkMOao&?Fb$kK8`Hp5A%X@O=&?Lin~7~LdMrV6Fblcx{CcZH`UDe$k1$FrQM!;hlA$3Jxm3J5OF2)0o58>r zxl}w=dW1KHkzpB1si@w+qfv+9fC);e_zsIBd+o6}Qo;v|BilPM9eK+zvpEXQkyHKD zas$vEX?1o9Zx*^Ezs%g?sEg)EEuYC|E6_u7XVxXQt>__H>w9YfdPufz^OU}V9+Fw+ zSRBcrj_Js%$5XATxeE}d+40Ns%{OE0nQKzHOvzgrCAXwl93*i-rex+8ztWHQH~ zhvY8x+y?ZJ-1Tz_ZvlEpE}6fDQ4c*N|6y_D&jnZ<85)A=$ku)@Oh=vy0;kDNlr)*6 z1x}NJ$Z3)*@F*xvs&OIL)cLExY0?EHO@@ntGmZvw##y@toF;`?ku%PoP;i=Dgq(3o z_jQ5OqzH1x$%??@NDXaFM|P!SaislPERNhd6`Uqf9QkrPI8CBD(l{KPCQ%&e`q~Se zCQ%&e6bDX|C?PrZwl+9TqJ-qF+NGe3gBp?=JHTlYH6;0_V>+_)F&0NokHK{0(dpip zj=WS1PLrsP?9~IONfbv)U0n)FlcICL z$GpL55+x*)DzP}yULVtuQLnK$a>rgQjyy8mP=K9bh8A*~T=?@4+Y<(c$;fF^D=Plj z0~v;dRVZolJ-74{W(FTIUUEI#|&hd2MSV z^Comho;6EO5JPk1rQ2=17tkH~>~>FM6q+NoT4#6bHZVvaPnWYEShV!hwO88c05@9tIaqO-tqLvutg_2L(V&Nx zenj8-O5Z?tWXwKA<~?YRoO;JRcLBO1fBkLa`GM|8DU{i;FI^ zEkbwXgpv~tCTNZfoyMmojJAN}mG^R9A7-@l^XlaU#w@g`IpsRnEPxj+k-uH(%(ezC z{j7>Fy>-A8E&Uwb#3y-x4K4jVxR%L$6D|FOzPCy_AcdBGxDuE1#-KYg@680p95hFI zO`B^r0o{>*H#@U!L3iZFvRent&>YEnn@{oqx+7oQ&Sbue?#LO|DF>v{9H~{xp3JSl zz=pgaZ2QonMr&&M2?iUqsCoL$Pr8F0Es>Yqmu&uoo__vYCl<(|MdGi6%dPp* ztp51Wc`3U912ggvO~S{-=C5de5w*8Vc_4@87pBw8c^{w$_}ZTn8f(x3T+430*#dsF zRPpmgGTV2w0M`nty{BN07T~7q&r5z_M@tnY-x`_!qd9WwE4y?CB{WBN-Dl@>VL?l} ziGOA|cA`1*)s*?>7J_Jwy!pA9O^XS&d|oxL?m>eqnj^2?IxpG5jONG}zZ;p0&>Sf` z(?0nDx+AT=s`H#NO-7QI~Rsp0m$UP(~}zwEnj9WU!!B zh%f(dY3xNSD^9T~8XXWsD=S_qUSYFhLant|6}Oyea7Rm!S0`PN{=tEkAb$v1F&i+V zwtucxI29TQipCDYXpTIn^Mx%7-I4oS&o%g>IWl#h zfXog~G)Lx0SYb3RLgk$c7|@#*S?L)(56~THz}tGT61{$3xA2_}TCMc-mVl}{S~e8Q z@8Erbo*?!3CN|DROORgY7MdLpMoW-?tz6iu&=cgu_FE19XbF<_`vl1!oM;I$N4|tQQ`lCaJMvD?y@pUU zN474yD7k|R&5`#^nwTTd9C>xUTlxa@R*lh34!#QX21A+j#Ktvfj`S*5HnhMf`tqO5 zMKAhFELJ73pk>2+nXQa#(Gq0W^u0)EW9FLZ3LT9-bFzBJ>np0mnc#p86Wo$j(oUM zg$=#v(~6zC=Q9Ip(I>jkYw8@dY`7|Qso4T?v;_HbZ3x>p^aOc->K%r7v;=weP>|#c zZnOmXB%+A94lO~7T6m{F&_~NPR+*f94s2+-rd)Sn;|a6``D*)83k69uM;_eK!luTI zTGwdVOn=0Xiss0v?=DHM;6Za_LR=H`G;~M4@XmN(gyu-6dQH9rbVpiREM&|^bEKBY za*G1=X41*WN7!DYw^ioPx#5t8=E$Y9E=x>7ueEO#OEJ$vOFyDLez^?tXzAzG<(0fS z=#G44HLT))QvQx(Hwd6`xmyK=#Dg+fA2vKnj>9bPnNuc z?#M5-M$KE$9eKmQ-~oC_HvQ$~yN2#a1&_Im^=OWK#k0~l1Kp83l~~!8(25JK`Xvt@ z9i}74IF4`GHbF`^A2=J)&~_hn4^VcQZN_a8+1qR@tVtc56zKTDJzXHpgVG< z?pHQ-G)Jzwv-Dm=KAIz24^5Grg6_yWGmMz`qdQVJxZr>inj@7mx%eE|(QoS-a*|Ln)dgTc?RiUO*4Yh|NmO*U=qW6PCLGy>s$$=HwPNUrtT#vjlf`Lm3bJqax&wZzvyV5mWJWkVi|CFBh`1zd#E~ ztj2k4GmmNk=c7MdfwVs(ut zpgZ#98dml^^pL!`^+7`&nj=;BUz6N`9+E{jteAhJJ5nPWvz_0R&dv7$-H{2oa~ti^ z9QkU?TH_1oj@-HZC|e1-BW1VWXsAbXWa_JH5(m&7dFiec^M7KiNf}Z7L={vCM8wn_)Mfc8!SJ`UN9JwlX&#i_QG)J~h5|?~|?#LUTl$eE?P`h=a zIdQoSPH2vNb$&H(2)ZM)swOgWGolVQo(k1BDnNJS$=6@m7N9$F!`^!gZD@{U-8~(% zgY)LQ5wj$^Bl+VC9JJ6Jne>#KF9Y3?e`@A7u0V_KtTpS6H=sLmk2jnN&MlT^TGPt8T@>J>? z-Y4jeRO_9{D2o=7UTY1FHlRClr`a>M3KrBFN9*6QI}ZJ5j$HaILh^$kS~(dftIceW z?#R8#c?@1?jufgs!|TF{mb*9fPi53ZbL6Uj8;uN9(W3ih!ZWrN=#C6IbH`ySnj=&D zBPBNop*gb5NSiqv-H{n-1q|M3j%+&5!qwQXkMA`N7B1l^HJa~CpNqB+uQt+9!L zI$B7cEE8jYf$m7b^N$?nqB+v_xunzvVYHB}uytlGM|b4O^a2KdG)FG_&BONu-H}h` z&tAsEe(M(JmHQ_vl0vUVz?FPbB@)^9c1fbPhj+n%vmpgB@Y?D`#t0KN9N@fGK8Z!((9rw-xG94R_$8I7>njeuNZTa z4H%AmBgS5V=Ezlh?>=%^kLJju`=q5lpgS_7!kKwLx+CTD3mKx&9GUf8m(PV8Elt|( zUC5Y;=Ezf9%uNh*(L!?PcQ*DFXpYp9zyH8tE1DypzPc^BK@2SXp{{>dYU?P!iL!YIK*bom^K3E~VslE@RyX;Tl2C@`3Slz=d3 z)Aw5Ka<*rz48d$j>vpteK0VQ}hauq<^2~MW_86%d=myqJS7!di)S$_XwEXxgM{#CC z9D_kB^2~KoavskKZiZz%$TQdL&n#@LWIQkh*)Ol=S@Q2?NVtV;b?9Z8WpU{7l6@{o zA8q8V*UX*m5i7%Fl>ObGpBa5;LLDj>?I@mH#fUnspLJ|!9{T3`ImMBzXcIRf&n7&i%{k1)QQXFl-wru|C6X=QX%X!Nu zXwK6Td}H*Bsi6}kI{3;YD;ZH2qIB_Dx9>rl<52C2b&F>(C`696TSxUa>od#%ZS4o8 z7x0eDi^@wJZx%jaWe8?RN>{6PzghE_iRl0{$S@E-(V!Tb zT^3f)$oPN}#0TM&0~4}rtPL6T8D{7qFR_@Kq0aVzmEkh-5{p&FZ;vqSVMy48oLZ)? zn)@xD!Jrn|BU0D>n$i4wb$zU8k^_ST^6K|VH}kj*(T*~hn!Ek4ID-#2az1j{{`em^ z!!nc|rh9JtDzdROfKPs4fNp90{&v@YriM)9{Ys+T<)Iyns2hZ}daU{OFeETBBm4Jj zp}8a4lvC5wbIs_=b{0SHl|akp5xZ8SXY@GvYhMBZe5^?!MuLNbFvFC%g+-@0JUEXmxE$%CA_T)XE>SSXN;{g+pQ$bz_ANO%6 zIG-2OsXMBQFrAupEt^@2xuFxqsj9o=WG_fC_@tmX_4n6D4#yZ0q)?nXcVj!d5gWs0 z31p|fn!eWp)2Rk;A7VPyDL0=lgNH%Q2qjeOs;eB584M<(gzBq1!pzdl4J%QcTKZb9 zH5o0nN0saI@&wtsMCHAUqYVxac~({!{~l0rL8#)UNoaXt_H}eE%u5LsUd< z>ZY=zl@*iztTJanJ!@sEU)@z{w8I2m>F$5Ui)N|otoTrNG`F2PyM7fjniqCH55J0j zh=JRe)$bTM7?>H5PM)#&6%{Q3N zyY_AMD|F{A_>JkjTUea;tL~~Sdg#^fS3imtdQT6@PeHE;K0W03LazvRe*C^f0{>>9ZO9NM}>c#reQnJ>P{ilzk)l;pPPzVBTeAK|CZT0Ct$_bJMrbw5)V1}~W}^AG>+b&UGid%b{d7My4Xr4+ z^mF}1Lj{H_Ag_Wk$iM0TBQw}o9HhVkAR^_!gjMhDz0WoK~U?ev~LsHOZLJy~!`!N=0E{D4d zOs-FTWfhRQj#=>Zw=X;H%vydjHK4*y|Mbh3+of6b&iX49Tb$LK9LuGZpu@q@kigNz zz`>#*z|_bm(aqGLAkpL~z~U&u;s`n#h|P_GYoP+yVh65e8@Se7;9B>A%V2>5GndJN z0OnJ_tQJ&kWbSBS`m4d1wBU#)GuPJO1v0j*Tz||PI3yS~6q6Q++-2svXS={6l=;;3 zfCV1htX?J73j}sDpAs@&u)>km%RFR(PCN4{E4u|1Pno@TnJy3!WaW|zTySMCGuJQc z1wH1>UG*Ugj(D^P0Kqg53g+WM;3bzy$@8tX?rT3sxLwKJ~?T!3uxYQxc&I zLZ&jGGO}MF@Rj-09J2*ezgF7#1)vFc@<*$Q*enlP1Dw+u>}{a=6C{e9C6=j6$Ag z4E&HYGL3nhUW7tLs|oYAQB z?gQVu2Yg=-@Vzl$PC5ZPY*Ge%V_UtzZ1aG2+KW_v+Idr?7iNd|jGLVJlsd&z=!$#cQ{?-=+$1@JwY z06Kc`p#tBV06t>{4cR*fcHaJ_&X|_HSpaS(s zLVI}yd&P`EpDz>mK_MWQ1wLs~QTEEDh>9c8I3WI1^C_^;CmXt_t`-C;3Y@7ClAGM zJe0l1D3|6TX}zJ@c1yE#%VCQiXUj8(ZFU^AJaO3i3bV}_k1d59>}3h;h2XPPC2t?& zdy~Nb_5tXu(5D9cPZjvT1@Pqw@I5x*duqV{`2pL@2YgQg_b=9p(sCD*Hq73&sf38*x15IA%q+<; z#Uv@&AlcH;L^Ct5BtJJVvmn1DHLpY=IU}(sI5W2(Csn~&TU#N-$mzGQGR++YH@LVR7kL)SxIU|Noi4PP-p_w6&df_m%gIkHNzAKMD9F#uD@iQ^ zsWddr$SiOyDoU)3hZvDml$e*C5nrB~nVwMsajq#Ss1h_F>iAC0x|<6aKBf< zV1aqlftGt5Ov+Wt9A{qiDi|y=Z#ppL9tTr0NZ@6!g24)qJMM8XEd~j^>QyjU19Hbb z4yMN-f!DnX1{*-`0GUy(%yH&TuY$oAkUKzTfCS$5Dj4hlxdUVdNZ?(sg25j1rUPF< zW`G3V_bM1X0J-Bn2a|H0GRK)8y$S|TK<>ED!ITUV_}Qyq@B-wH`y5P*K?1*e6%5{h z+;N|S>2aMg%RwGS!@~w|m`n~Ud|)y;e1Ur}!&w0lV;**W2V))%5W%JIV65Y*@32?k zYtt3($rb^|Jlr609uUE+?_gZwrtk1Tptbo3_hpNay$N3riSkKUraW0N@o*SljDE?p z2WJiDI9nDN^XxgmeSzl?JDV|&pG3z&o`Vu+6ec)Io)MUFkms=F7bo+>1$@sQoK={^ zD0N0*Nu$&mfi(wtn54HHdq zWyX$133{RkF{uTO(h(UAY#9vvAW}g&?*UuV12!3{g1iT884YYQQVST{L0mAqAmhM7 zMkz4a&IV$Gq!u*xu&8M$o0;u4o4(C_lhrmOapP?pHt!5<2)S|N&8?VACr;gY;l^16 z%1J9F7#I{87#O&@iZt5G8`!HN3T8Ol?l^3*!P)Y{;ihLY#~_DG9|RS_FTnMOn91V= zzUKk_4;A>IIDp9K0eoLJG&HhTYQV}_NSzT;Fz2wv7DnqO&9+kxGiA%%P?WjzQ09(; z%#%bA`2u_r@`Z&mCm;nq<3#}m1_{u8OlQES@-kabVYXe+Y%|5#V!~m@0*1SXtxp`b zn$v8v$JzFPv-F;3>nYCCEr)Gn3?3HnJ$=CUg%LY*J zV3q5{BiD*s?m%xmPBRa*;*t9shg;uQBstK1RvwgGcT+LJnr9GYxr@;9#{dyVa5G_YdU)h? z@W^T5lv7~Sz$2%IM@|KgoDxns2Q~#fa{t6I(=NpC4s3t$$o)dd!QxE(3R?L?Bp)rz zC$N1%n2jO#36IcM{XNJ4(2Cv(+o^MjxY&e|A^CX0qh^}$h}9%!R(O6 z9_|I~?+|Ly!l;1#Egrcy2szN%@1QoL<3lz@4z|jMtb>p|geIPVL)-y}xB^TZ<_EP4 z5Oc}pv!LlGmCu8wpIAO>I4pEX#~MBh6>x|%;1GY1hSi+K2XKflz#-m%Lp%W{j+QPa zB)t|$abU>B+9ybQg-7ls9=R8IanDSbW0c;Ul>0aDe#_OA(LRB{yEHESR7#J z2bCe{`llfDW2gs}D`@H&rXtj%#>vWtXsmIyG69FU0}gQo?Bc5#aEL#M!fNm8131JN zz{JtQ9#Ymo!yXovIN}go4md#7VweLeYoKyyVG6bbBMya|4s_U$D5|lWgV{zo!ks!cEb2}R7 zV8;SF*x?{Shxm}7LwwM|jsWM} zWyxrHMM8Ygp)46KuSkdwI<$R8%PSJ%W3;?tU|=Aw9!#J^S<*lUJ4h?9q#^x59nj6F zpal{x4!jH>4N&`ri`g3;SQwM1DM+kPU`gQAQ%E>+fWb+C=LokrQ_>v;$2J847Dfp- z;id!=M}{=X7KaFB1<*QVaR0Ckvwuh(J0)7^Z>Pcl*r_o9b_%f3KWru1=x?XO0NAN9 z0Coy+&_8S?I_Phw!T{K*F#vXsmbcL~t5-(LTiS)~Xn9M!upKRLX&1JmPcC@^u zUD%G6x3mk}!C&60QKfHBT>Dnhkb!{{wD06p0?!czo&yg)TjgCgdb3F@Ym(B7LaDSs zp(h4hNdim^Z$P^XAbU21!0vNI8heOj_}iUW(W<~ab9$4;2?-}fZcUd%%53a1Eea70 zj#VAb3MmVY<_QQegHKtAW>d6ahZqN5hrt0h@1TU!11?LKg_6iR|2P?dbVBSE0jncu z?_7q&j#eI}hl0#3d`<#72OQ2EOXTR_*>Z!IDG96n(7AYo{c{-{3s6lskjT*mG6C$~ zk?wy%;^F}%{&NHbm_g&8=y4C)@h!}PEAAm{Xh7pp+p~(gu4-};bP-1|t zr9gKRXm1jVo3N(`UZ*4m287!Vptz01Fk#Tq)j6iEXP}~CxS;qc_&IXV$0);Ji z8$$ui5ukNOWQNay<2MpHI`D)KA2|Mz;tV}}4jjLf$k7E3AE>)3klh6tM}*{KQ24nx zL<%sgIy9YOU|{&$y|ANI5n7HMa^#jwNJ!>5IgycVnybnkP63|i6AD7}k|TOzXC!lU zr5#~tY!+bQHc;4HqS2MvqwdrqGds1pQ6DuSV_`w=FCEgCU`l3Y$jw37Ns2YfEbkk zG790)0tqG+s6#>SKzApoEEZ;QLi?LF{Qad0^|ntbOyG8dUxe8Lm*brGQd8kK@i(9)k^rEN&+mm}e(Pw1M(@3p}4s z;4Jg%BRO0>!HtT!42~6`O5L!fSePhz`?NAkUg>fTYCL>_P^bX z3t5$+WoAmW_CQB6A)(Uc&|C(_n;_3ywy`m`Tedxz@JxcqS<2Aifdb1x zCZ02AWOjL<1=UCWq`Zj#uLUUY-9JR0bw<9D|gn z3hrzYHzb;xQW_dlg7{!e`TAoWxTO$5Uf{A5TG6!D#iD*6g$By;p+ z_9<#Cvutaa0nf6PX@7&7@)qKtnm)1!eGp*aG2TVVHxV@jM#V^b2cZ(+9EX8dMyTl0HaC(`K;pk61Som8L-s#xb^mTt9&P z1j~oSrfJM_yaA;gC&M42JUWs*S$&RzsAYA=7Lt9 z%rscKn1iyg&+1b|D^DEM0!}J`9gHLFL2I>O8f9FuAJdvpHZyYhwsOs))nyB#+|`vo3)5)pb55qzLxT%ky@*~3DaC#SE` zunts3&9H1sNPrhz#?kGI!DTJDf&{q}J?tU-gdlA{P`e7=9|^4g8ead+oo9*zsOV&< zOh~9qNZ1K0EjNHl%MwuW{`5hsW!r;*XA(`0lJGtVk8B^f4+3fb%aGwmNZ!Z;`A!~G zNmW)Q_9<3Ov205SK={rZ;X7=8L(U)g{5BKjs|6rmrGR|ZaJdR1NJak?KIPK5rGR{e zt^B;9)ZeEB@)fAw1NFazS=_;6HlV$)E)J0*PLcx5tWqINCZBm2S`C>P{&p*_Y=w-4 zd9Y3VAi*@5TbYejrbR(wj=Kyeg2{WB*NK^JGf$&{%*6{j2Pd#FU*eW zLiUs56h^~<1&*o?#^)IVB$OFIa}?~lDMFwzw0;Q@kPQWLc_KYyA$8l&VB+L zyvQ%%XQ0vE1ogHAvnY!b$I*_1jO^g^_vGqKSS8QwY36Zk%LUEfg7Q#wo9c^J-NT|^ zK#3dLzrhzbE`u&^7|&7|HztE7ZY~gx8zZDN21<*_`36)TqK>V6kZ3A`=b7Wu@H}%| zn)fj8Atp$k*$B@w4hAo<RNa@^W)AMwVlY41c>BAF-;#=4U=g9Ae>vHMZI1;B5$YIo`v(OstSLgyTh6 zyF19hpdGW_oyi!t;AMgH1y~zG4%~)NWNR+R*@kHJ3Yh>d3p);)pHS#IINeF!x=rAL zGu!Vs3eE~_i3TSEdK@Kqj5r)6K#Q#y7{KcnaJ9{yF~SHQ-zNQR0*tN(%nng!96!kZ zdm8^QP(GQn#qoC!+XS&Xx6!KMppLU6OL&2a zcfuEDHgTRC3l}sr6nae9%yP_uwNb#yFsa~xBS#af#1eN+Uq?2R_X^GoY093{zr<+05mYmQiw%gHx2joUv z@vDmzzo53-U}{H7W^9yZIRaTq0=PeJ11%9fW9UG(Vxx(FhrYxB(f25MWC%I1$jn2pWW7gbqUBYWE<|sSW-%DYU~) zpgl_B+ztKPqtK2wt~jwqO0S@OqsaXPmjmGOvBvrz?e#V0VvHikK;wdtIo!Y9jR#p( zk;cak!{^xAx#0P?oePwISs>$MjPUU>F9QKA<71JGaS1OAoF~A>$E3jHV+&cEOVP&1 z&KTw#wB|8lN#4*4o1rc`Q`Jm%n&am+=frAgkF~_^ySt}Tq9_DSdgpPM5 zK*qa7c`l%jcO9R=`o`ddK!c+hY~<^NCts)|oAG-E=M9@@WCdoxp<-)XulY3|Edv zFoC*@41+1(M!g~2cCtZgBXTn^FyOCWLG5BhpMEgrT^g0kJPgpq4}&RwiD?`ABgHRh zjW2=tMeV!){~G@veSZA!_4;Cgzt@rH$NzRG{%X}gw7Hs0;cE!odEk8{cOK9j3v>-Z zA-szK%~i@6cj0A$GXp<#?QWvOW)?U5garp3Y!)V7bupPAF zF=9z)V1o@1HO^^(4G<-L;5Fh%{;(7rXulwV*5L>Wv}6HLpb3Bj4Od?qx!(zC$0F8y zWA?Q{^~9iSv%?2P@TCjnxqOH}he~@LI#|MhyRDAg7lDlHBI5AMaUO=VHp~u*J)pQt z+=Tm?DKWBSxXnzpFJXFwp0hAyWSlCTXybV+iHhf`f6XZFd2wL+JGGQ~zVF%WQ zEKL(Qn2UlN#C@#p2+U!YIAbK@QgGB2ob4kHD#&7F`^XD?#XOD@Ik4o**fS+SG{pck zzIkVxqXQ4}8X;!T`h7;^_4~+UbP)f+(`Uktt^>JJNo$pK$h z9kUtUg@i4uc178WK=iWeGlnsZR%Z-Z;^4y|2Rb;Q!ypS%!6g+m&e%X}`I}MJ@)L@) zi-R=Iz+=)B$C<#{K^AAArZvGhLoP$m*8_p_8Dwpi;!4Qpgk;eq>4;3_T zfaI~~I`BLuz*7UxV?ii+EU-b`#p;g06lflEh2$}hgAZgd@>uu=zG9w()8WGz21ZVx zH4Ho8!x>2H@G$cjXiX(->;_bhBGsF} z1GO2WdR!S;4h`LQBl3DfNL_&5Zd`ISg2h1rboS>Ui966R0l~OK?&E?Efk3b0KxHM1 z$U)FJ2>ZeE-^c6!xYzcm3b4I3mIAM(zkBzi{Qt9#ALRdkynDCTm7%c^6vvLAS`m8- zVC|NLh<3|DP`kyU@t}^g0!w)ytlh#U%5x6fZiz-|w?s9FXIR}4*a2;~w7P@)1``f4 zU=9f+2wXTUD~4;f_k?p3ZUMt0=RdJzyIKd zG$sHEFXVI}aCR}Hi-X`92JohW<@LX}*Mq9h#0z$d8Nr(~?%w_P_P)%Ke{bdQ-@O~b zA|L@uOOmdTLzE%go{#^9w}+1Z+HiW?%v^B30tT3%n{&LhGI zDzrfzeefB-2QMP6g@EN}K|6R~U(oJQLGuStOI(4)+!mgng?WyF^K&>#ehzIA53#x< zumYN&UERR>x!_;}W?w&wfxnoiQKF=n$59gA;cuLCpem)#EP@Ppz5vK9ifFM?b* zfzI+oiZ|#O-p}dvKaZCmaS()e{%@l8Pu|1xWBYqh{{%XHdcHx71=xASt!4$W|l)og! zEz;n`pv<@60SdzPDhY9m7_4Am7*uf!8nmElz6G^~p#vC$GH$_>UsR1-P+J&2pfQ;8 zEiq-XA5tG3GH;KV!-UR5Bic=%G8r+z@$Y;6fAj&4zxV&2b^LQ5F~IS+dm&dlV%-C* zKh%im4>f}NLkW=ajpdQB{!p73&lPYxD-NZd71JPIV0A~}2(+EWDscgP%2$@b4D^$W zWSk{fz8f4DSi&fI#*igP0ci^VO+U2%o&uhzmu!LT@%RAQ*>IlEh$RUWl1C+44k|PYv>aq(G{n`fMlLf!bH~VI zXc*~jkhar7w|0Tfi^JEKMjqQ4I&E}EwkXg6Ww135SlZ{1eir0>T+m%^M;w0shk_|` z4{he?vMip!_f~-Kc>v%31AOlV_?~>={$RlOnt?5afjx(TEkyuKDzGOwfRugSz*oh< z{zCx7eR+YeN`QR{1KZaJeD4+b-!3?FSpSZ*#S&)Qo@VO{%n~=8t-%bb1CCM-wF&$= z3H&(*{5cQ!e_!BxdI6;6=LLS3a~YtfF+;k*tj4od9*vR$49Ogwx-kk0qNcM2nVI>V z1bE)*2sZIBG+45!-e?7#!3;^O$`0+ihd(X_aiHh^)q~4M7x0;kkqitBD1Loub4P=v z5bjq4{x2W+J_YdYXW)OlfiJ;6G#f&*J2m@HyodvbyAp#lHv1?&Y4{Dll`PbToa zUBF)Gz?Bfd|Lz0drwe@jwi_C4HypIt&}je0*?Nkz^`2&%C(iZ)=MUd>GT?f7f$w1f z-_sA=Z$9unbRf!>XV`2xW%K8dZHu$jg~Jv*m~9&xZMT3K3m9dVFkWH*c7W~M0k+Qv z*#2$ce;dH}_5Q9Nf;P zO;Z$TF>F~bZls{##=~d9;b?HeSY4wnvMKS%fre%QmS)@Ru0$S3rgk2V4#O5l8NG%t zO^MGXdKSn)mA^37OqdectwVr*j|FSmJEma@capVf_tXuzA#TT z23=#r@D_4b(Sqqs^F4nj&8TBs)WC3QpTw$y4h@9_9%I3S1?4S&1;2|wh@Vj3_>c3q z`iJ=j-VSGk1Z)DFBAO&vk{CN!C$ycdaZ*%fV-sjfSdjcqz$BUDmL+Nq;t>Mppf-(! zc2EvtU;vfn&~Zh<9fzkK83Oeg_7C1~jz+oDLGfb`c z&AA+9;9+`(*>+E}&6LCX2M$}TVV0P{D6@f4VuB->Y?NtWwA|usapthq3r3j-&LHj* zXX_V^G8332t~7(P(w4)?*b9s!p9{7L^f*~f3a#X zgVP9`S@G|r9mf(O$z)T}0(dex@>lSO_>=er^#}iP{!#yeNG1hLQY=}FD_9E{;c4Wf zfK4*TTq|&H4d702@IIS1ZJ|MnA&Wr)C^rf=gUj7dYq+`+r#+A`Gyq-AlyH7Ul7zEF zbH#~<1_@`0XAhga^(D&s4syInJn{gXpY2XFxy@9yFacfM1ez;^q%UVs`g#aYUlTy- z%csdIM{-TR>^7bo$N8e$-e-fdH^{5t`@<~3>22`VSyzayvwR5GSq)&nLh7t2^g2s0 zfMv<&g2e(y9AU}*kj({9`9XeJWrL&6upzfZoWU$H!I>&0qF_K{1IH8c{fpHlIQ+}7 zTn1Ec@ic+k8@-TvYr_OaNz>VpZ4Ls*1Qg;y$=uKal=2nM&siXm#@PmKj(l6A*=E?H zDPyFda7>`Ba0kOB?zXwf228~g5akQbFKGx&<7DFnUFri$*u$#c+Di9&%csF|wBACi z9m%b?I6CN9anZKknlO=37nEBM2^6H{pgQoAQQsXf^jshg%|b+Jg(A>slFxbG;>UjJUp*Wt++ca7PQ?uLQNbQTtkr zNcC1cX1&EWg6geSLn#Lq__CN^%j=T`9TH{eRgp0;FkIIF)n(@)byhWLhW*tGi}B(7;i5IfD9J2S9zU3e-MV5I8Lifj(EO zA)WhNmTVdkkPa6q{jJ&H@PzcY5=Z-6_5l)$8C?!Qm+2`>^duNG9gJWJkYK0RAO`~j z!`tqILhYJJeXt8>4nq2179~3f_rW?EAFwgTFcdJxIGpHcWNcB8cp+iSa!GNy!UAn`fqK@PO+oSdYqPH57O_b(HA=xU>M28u3!_7#0{+Dgh!Fa z2?^9)94=qT?sJ{NHa>)-58^Ng`dnX;>aOx~NZpk%c>p+gB%4TTn@p~Gqp z#npCY6BHZfIJPAezy}P~9eN-GhG_jR$DdCUPlM~O2GqKXxbZ?Ka(l;E(*s%Kg_UOD z6ilD-LML7!a84q=-v!!p4;sJ$jTa)Mq6O3;Jat^-{u>b)cqZA8gFzj8cnEGbqFESmoH3umYB0H*0}1tTSkY zpMjXZ*D8OxZ9Jg*s|~IGg3MDw#tc=_`y+#3o(q&W!g)aLbau#GiH9!Q2)$qvk52>7 zo12jFc~EALX2a}vANv~SK`_S!ZZ|)?*{TLvQBd02^}G0-&@@VTl_%JAH{Sre2S;nP%;ITQ~!G|WvFV5)%EJJcE5gPfs*quv<= zV|$?R$JaM<;uUNH^@@<|9OUslR4U?xcns4FB$}6d35U+IlYDC9K!Vw z^7tKcpJ;Ua&e?v%jNieUfq|QkC2*v1!UylxNHsaK8Le36*yb<;mO(dbf-)$0mSYc>6>j$z7k0wfZM(eLA7K=^Kw=6HnT0Q1t}eBMTX`I}56; zplgyrb&@a(q)%jzJ~k;Bz(uFxc`(d3F|><-#z4CuW1!rSb$1iseU(cWlc0T-7n@Z< zt)oQPBA#FlMMd2**h156re;Vt1$|w(Tf%YySO z4k)B?f`_b#npasb3!PUXrF(Gen zpNzDzD_9O9Tqhy-Pe#|Dq1H(wX8dXZ*Pr2?V;LgjSfEZ4XxIw2?hJVhD-PV}A05N8 z`@&3M3=1|uYJ5}@d5*)oEde%q6)ook9lgRbw{a`+GO0fj%29&X@ za&S5x9lJt0FNAJmSFoJ1`Gs^-WD|ViyBU!)CLaJT8U~G85i_R%8nXh=DZECVQ-GXp z2bu52IP*r5+`TODGj9}VcIM3knZrU4m?Yr`-+1{#4!&tqeSoqb6LJp9r;DJ%8al^- zJO_@k?n=>?^0RM>2%mj3;S$AX-yqvU_SrXv_6@Q_h(G&ArdjySp#tVWt^@~n z=m9t^Cy)-nvDm5(+L{cT)d(pDZB6C^9l2rHk|?9s0XYEY0PFyqh^_DgaKP)a*i;pUcP+UTS&%-%F zzJIa01c!gIoQETGSop)C0_KAg7-d1nY=92MS%h>b&VwuSkPpQPI;+`c$l?V#6o;*7 zhXP~`eG}|ZoP?{(Kx^p1hvE#ox{UT`;$W@IK*vjs)@3h{s}OSQGL8;9R%Wz37YACG zEu6?`2+Fmf!*LEhKsp@9!c-g70)=(HLRNPr@-*^1fgFyrpq<+QvOR4FY1|eXly#?_qwK%eI7qLk)9fTf|lW=to=*$gl zby*^)Uv>f1FKa;Uml1c)z#!O{_zv^7xUn6%l9MLM$51(7^B1uOB`d250S9rGO95-Om>@)29%hvzUr=cN$ijfp=$fzI3hwjs3xwO%7` zywQ)`-ZcFD9OQH{NYBqfuh*hLsEF;O*?~(J2JjikbHI5GQn%sWgA+jZ037@b z9pv5PCHXcyD#`X@iUPC^68{-_EK`?g)$|4whMIXpC z3*AAUHv=66hd4|JRE0s$V8%ITHagbWz(KFPgn9Nh##p0~5$H4yXw8HH^ht3$sDGrEp38lfDngE?E~VFW!~hplP{!)5Sk`Ax9HbrP;_ z0Zk0xUGstHkKvpv!+ZYQkgB&JM~C35vueQYLP#GBW1Yvy8ds#-s0QZwI?#0<8{y-J zLQ1e>JuJ4O9I_LV%%P|XK4j+*e0I1PcE}F4e%32!KMQUA5c!Pl(K-sTLJCwzL2?Xf z_0u$PK7#bGaG$j^g6gNsl%KVOJZLpgXYI%|i?0E#<$%wB3Gr~;fcBC6dbv0d_ZcZ% zRdez%R7iL((V`#+ZD~?(d=PJaGzi8AiRwp1>B5c=ft+mso##Rx6GUFGfn}a+bWD(5 zohstyxuEI1!n6bV0G^QQwuA`_t7bX2Ib=XpL~KPIz=PJ0as-V99std8RiKUq61RtG z5R3)Vw;vV7D+E5i3iq9R(Ebzh_#blrX>|M#_Zd9&%3gFH|AS3wUDY#^Kw2vwvKf?7 zd33TSCt_TA4XCNtrM^Nfyn;k?J2zV(9_sD6Ot7=rZ!%KoL% z@h;Re5d=oZyEq&TM#sBQ)(wEhyP!voAlFC8<6Wa`;NbPq=o&a^pOUz~%!nNKg61Z| zbro`7W_0ZvYF#zD_6>Rpa0qSJz73l(FvzheD04Q*V_=|pn$a<^6VBMr^1&DbgU^UO zMLy6cBp7~dNocYH{6HVH@h?Zv_!oGtB!ir}k`X!n1x~#L_5q=ceGz>gHJy6UBW~;q zmP1TAdD;@DJ&-Uoc%Z;?P~qw#Md&<6!qsI4phe7(jW^C8{VAmL?m^`XXsxm%TVo{j zV($rCRRxR~7@kSYcpz@Z(E&OG0@43){`3~afwnbU!RuBa>%Tzfkl^rtDA^kW;0KW$ zB;p{FlCP8=M1pJ!xtq5=93si~F8m-8WQX9|2f@I=uw4NZpT13!0u0F<=Ro%t$S{k1 zIaI)0hFH*Ax^L(m2~3*_^wealXM+L*tmhx+2ISBIh19c|r~m zxdL4##vHSnsbwbOE;htE=7m3hB%Wrd{ktJ`haKoTSkO`q@Sc%yaN2;J{iB8RT>hvY zwty?iybcN%u8@0NQy~M}lL>rp7qAy29STL(aZOKp*cia$^sE}NJ3?0+f?m9}t`KqY z7S20DK^JeOG0s4{BlLrS4fO6U&^SG4O|v6g;v{IuPcYSl$f}b~LfOOs% zu4PrR6wlBi+d|e1t&0PzP{1{^y$hczMs^4;?}F0ILN0bqNSa|YEB>8y;?zROeQn!{ zHxRzBt)cM%TVpgs9%GEdiH=6b76pkHH*8ogN*p@QC(dDUT!5|7n#WvV2Lo(Bj)b#B za|I}?rf6))hMeQb9JRZpVTP&&n`94p(+hm>75IN%;CFFgWm2MBW%#z64t1f7q(JcInW-8)i9Crsk3d;?A`Ks&byBvapjXX;*Vfiz;*t>LT( zL1#1$jd~DrunN)j;4_J72cVrnQ1=hAHUhOTh+GGP&Obrv3xe+29*K2eVh@`DqcOvh zqcSY_Kg$3Act3(AK!TlamEv~;P@Ndq4DKIZ0@sPmqAiCDm<1;y&e78;l1Ncd%Y;>k zlUovIs7kO&a%I6Q#3c<9DHl{SCMXIlfM&4*UapR0taV@`s6Y6j=I@5o38?+SFmU-i zr22!215^h-lUVXV7StIer7yS;9KMkHFPU(Edj$0bL8qw+8ZbLVopJmS%gLrHF(tu3 zj8Wv6Bi#mS-gYx8w`(Ew4YE z8MB+|(F|4M`-AI1{lQz;`J>z3eFjZ6fVzX=u^%G)gVE%Ui^30Lp?F-h(hO9e5mgUf z0F8?#Bs|A4F8UW_5OiF$9h?^-{Xr~sAqS#ASW15Xs2JP07mohXVCWClBGrX=Wx;i! zz~HJ2FTF};m+bI_4U=A1QIr4;leRU@f)3VgFx6BDdY#NJ*`z6z!qEonM84XzJopw@?^j*ng=w?~gPoscy?I`1raeWBwgI*yNCFx3<=0_8el`-Gr7 zjX?zwXnd5^I`KN$1BURk(uf%_90cQ|SB_^e!0N=icc&%dysQM$+FB}~k{lNEW0{>r!!8bNqWCS|ni0~|-kTFQz zF#)!c9SP7=)k~rC>I~OcF(jsOf={BwII|DuJRx$wP?K=KWf060g64Y^U$v@3+Vj7Y z793E7O!nv$PJlP&an2DYFiEncFit?5BYZDlk<1}&1zo7o$Kh8huU*;QlJDd>`unBOgXp&;dVcfu4z^JIq#wO5| zups%9fI~9JJ#c%7S%T%Q!5xQ$6xe-OBFs!>6B;+Hg3d)`tQBek9i)_^p_dJt?pw{I zbhe>kZn8jA6}&E6*I)td#xW4nkHa;#Nn~9%2*x&v?8jX&RTThDHZUOHAL9H8c|5ZP zTz-v?XKp~csMKL_k7qizId-K`sxKx1daHf^w(Gfvs8!#NH_q z0=n;U4QRab251iNE$VnBk@Z|SxpR0}n;2wWra)vpw*fR>IU^S|Sc%+cYh?k=7C_Ep zN<*sWoG{mMav|pJ=#<_D!Mq(Pbci`4{ZbOUB*$`SUy9+n6tpk(s1Vkd+N!3o=@RVV znFFR85g^*NEupbrg0g7DcoHgXm zZpiu(7o6j;EK`^ea}{*z%@2ZoJ#^~h_#pLhbjv0*u7Dk@SI`UVG$6DV=rb zmW9k`FbQgV}Zgqs$Vjv`pcpzK#0 z9jn7N7d1LoH#$}aTB}W9%>`(z&d39!E<+xx8(n*dSwar-vAW1M2LZaSJtVasHA2Vj zpgE0j9fv%2H@fB!R~X?Fq7gc7 z2Ts`p=7~_o>}tW|)uUr}8yaml9JJYhdQc%<$LuB~E?5@Xc1Qwx_|$=|nLJFOMG3Go z1(rBKCt?|{YZwW%L2rG>zYYO1KLZ-41JBRAL7ks*0LQ(fjsOD#1L%Aml=b9a4VfKy zrZ7m_%ILGKoWS>1fbV$#-~I!9?*;gteBl0I!1tPgEro$Shk-3c08A>dCpdtVecr%V z#lZdpe%y-y`;rgb3l+FG32^OXVEg(2w59wlc_*@*%K#Na49Ohsx^s#$*vwuVa-=yh z#0Y}!&3JQ2#`%Lr%9R}GWP!$3R}RP-R17lUTmfC106A;nQzz1#O&2&EAZrsqYqGF~ zgQ9JW7E3ui91QrseBk>Oz_*`)|M3RC1P8W)18fNnY=vO5h=J|N1-^#{{I3_V7c}t0 zPu(bV;My<1wMl_{;RNo40RDF$_�W!?E3J!llC>ohER-yukOcfbZ!C?l&L!9y$tk#jWYlkI7fzgos0Ko4e909<v(rr57_96wkI?(0FDR{&7ggS;6oyuZva# zVgs^TA?Q#J$T;?cD+?BYHeCraGxIqK@C3cpY%^q84LZQ;m_S?c4hP7Rb0*l5^MtD_ zK=*ZmmYjp=8&fyzn*>gH(0LBfJ(a>NkhOR^IOjQP&oBru zt2(&xALn+|THv^eQCINI(wY<N*1&YO_>DTPp2d+<-^Iw%Vd>od%@ zdzx*g9M(T@*kTQ{!~{l}4U7^K9KmFxOar6k7H5kyhpk>P$~=Id66Yv0;gEhylgFYZI{@&z2v-1r^5_(cE&6TQGRt@x z;<)P6ByXH~+ns35-p$11@jL0orH6|e8Vt|4upGWC_`@Bta*q|Xa_>R?Ast3TZ-);K zoQiBs(kw-cCy*EKeHHLH&SG)gfc3q>ae}ce920DfVvNgFmcz09M z3{?wgx$j}RTH=_%xdU=W3JS*r*lKntpe;#HxVjCL;UP=XA@f{qiWcp<&4N1)Ph;e~ zmv|bS78iihBDntCkPq6A;~siNc{=w zJ7KFo1p`=Gz7(t!xZ?=REr)C_fJ!ndl!Z7}l{$ElTlB&gmQb(g6%1$uUr0}G2;g@T zjt~IXq279P7K7?gM{Z~x>MZ={kc_i|>0w?MXSR|nIcbtijuOrbjv;5MW>A)rfMuzz z%O#Ep=oN$NSZ9d~51Bk6r@&orlw?Zwv!chx(9Lhb{r<&(S&* zRGAX0LpeI=Ux_Aj(5VhBfiDg*WHFl2&co3G9g*>S%hiSGYbsn_lO*A6fjIWD$p@0* zVaGl;hu#Kdc+fE);Ql9>^(Sb|8@8W$DD*!;r@(-!f1>-J&m^Y74nPL47sS%{M6Ns2 z(bo$?`{05BY$LkvY&Dc}U^&AuL9AGy=6e0#>-EWk4v8{H$Zt-NS(zqt@ae}~0oA3x ze9*d-S)}8zjB|qHVO|y&L|wY$46J4hp3DR~N*`X4b_lRZZh9e^!qJirZM}w8H!Yq4 zJ{B1=7K-S9HiG(}4{B64rgjXI{wL(fxB;m~nG8Hb0jB=Q5KXrY{#E~h*ig2h(Se;Yj-dkSz$^(nmaB%0=L{nnZO$07 zq$Tj03&liN6kA2kL{~ zbQg+dd#B9_s)IlnbUqC+eNans$8h0C(ok;<_sLCAivculK*Shs251ZybaD^&KIlxO zG29+-K85r_ao3|9EMLj)gThY?!`dmt(E}S)eNfa3&LH(@!r-k(Pps~TE=_)Lbx8x# zRAP{>Qp*|eDLYM|Q+D8aY5{{GboUwb>=5L-)Gz5YxGtSQL0#%c?tlXRv_sZdF67ub zy4R&qps`$u8$^%gdV&fn=y(}Pb!h77GwHTXWbABe9rSkX9KK%d1Vf$ znF%}EBxtQ-%NmXwmTZa|?a&F5u(zCjNzf&$M32)VpU*s6H-gp(;jbGZxs9}Xk+^eC zplh*4^!lL7hnG4Pu)`M=esL_fNz$PAWrd|Qh)MvT+lE63iK+mScUkBnGgC(X8986=dME4srOkFv!ter(3cST-1 zJ38))z0Wwr#$BPy%pY7`l+A%S?<wbEI(-2&ciDhC z-imxyGr>A>Fpam;zrUEV)dQ0I2(6bz9%DsbFH3N~a&(LpI$v2n(#BX}QTj3#PBoXURK;wwelaP?>K;&`6(e;~z>%bv4j!2jFnS(!IWLs)M9A6eqvMITM4j{vS|3HH zbs329L|E?0m1FChyQB*l$*pZg7dEVp%HkJdD zjOPq>9QDr_vK%))F5tl<3qMdPWvw9iKqWSH3D}-iNWV(ep{qPRRW#$Qnf~ z;Sq3^?6pDgW8M_79rK23ZBQ)5vwT>sBWsq=#er2Q;2PNh0H1fn>LOeLz>v)0t=FSy zaLTqTEn#!q7Fc|nFj|~3WN|k+E)a6A!1*9of`iA|v}uY0Ezl#(pt)s%sisC-WK$yS zB9HE_L>@(F(18wyEsnB!3m~g~7r>VGA28Jht@Z`oI0~5u1EmvYSWyJ?_$5`;nfN7B^GS5mhekXVak35xKZUftewpZz0!<#O4MG25fa1gW!h^sVhKd zOhv>a&yB4>nj1?6&l7VBFs24dEDaQJ4G>}tVsj02Xmvn@dnozG^5HMba2(5b;VQXB zC;SvZ>J^=!a|u()4*>iw!Vv)Avihn;k77i~@1zr_8c#92So$D+Qsdb>`y_Ui?9fnX z*rMN-FKfWXA#vEp#o&(Y#QK(htiQ!S#9ydy`p@}a{fv;nW)>FyDFP`92OJZ`8)6y6 z1&ops7!*0!tjhQgmiRUl^MnZS7xOeKvla6+N`@Tfb=7ZRG&P+a*>*?(cC;C+AlRw{ zDhQIHDKGkPS0axi?+PTg4Cfu-OQ~m&3@>sBzTOt)w%R<+3uqj!z>mCN3 zeWnOH`wW~eH-OS*4CrpRE&i(8coz(bXZ8?0v29GYT6?Iis-TE2U=?VzrLkGYUC693sgM0r(+>Se=AxXD>rC zM~_~QqRDB;Zcy9m2&}HPa0J!0?Iyq2Iz63<`_b! z|DE*U!b9YK?tunb6D|&gx806BcM=!F5BUD+4mnAU6?Br?2gCv23)z}vS!x(RAaCOP zB@l6)#o~AY>lcIL0v4$T#|4%!n%xPgNpbK!n>KBsK?}HUg$^5MOAD4bwk1q}4mTYz)dZci z1R6+YfSs`h@82H$8IyDxT<0!8t#e5oOU@;CEE#@YA=a^Yvc{5O=M@fAof``pOJ*x0 zx_|2pDh#0eEJ&?)OUWNf#b2)B7)l*fW68Bh_3qufoNVe6J+2Imh4h~p0?p6Tz1}T= z*SjIjcF>(4%yGAocYbUz?G>2B3p#c&;WV3MlcF@FTg-5M6X@7Q&~+N%d0YH_+)d|I zxAB1L-8Qs(7x^4h$T>1t`gMb;j|*-ex$E{QDje5s2lZ{5VSO77M$nvXyYX=W7jC3E z+g`yY9-juDH#b37;4t9o-y+Y~j*cVazUyWPj3a+CN;FnbfYzLtGq#GR;NzQk5+mBV zH6YV81@LK_vs@CX;Nxb9ov%gi*J8huYVgk2f~qWP-KTL_qUjmRToHnCSS-68-Ji1^O<8_!*19hsy~Z!o)-Czjp19begjvkxa!mn za6X0fVR5d38u4SsbYB8R+#0Bd@OjxwN(s=u>H<>@l)h>;hoT`P$chZ3qiOA!>79|_N>zvs9W%UTJ zb4pU!;piaFfV|ErjlU7R&MBAyw9ZMGt(eDAGT<=pQ8(D!tB0`yJhN}r24!~m5~s}^ ziWeIi<|Yd;)j(&87_M&s%>#kDpv26x;vD-WuC5$JW51;KK}&iCo4~zK*j>n=vAe^c zkjHzG*D>LlUmYFqh2M2kK9t6Lp}o-uTRo+lkOq4vwuX&7lFR;()0J=+r0h zNH1FbxezqY`=LgAW9kIdabCjrBBS;}2h%w3K=wgnXSXDPdY^>qPUNv&uyc0FCS30<9B% zhdQ51T%9_I7L}7)r{;jhb<=Kxs#WCv=U${aR8amz-J^+RY+HbFDc#$6gJ}*G9PV6t zJ&HOfEj!Z^Hp?x6ji(zN1dXS&n;aJiI9uR+5#Ar%bSVkiA3U&C9n?gGZ-*)7P}GN= zU&S2L&D0FJU=ghzJNPpKI$w!akCECB989~J2C^QDp53C*e3Pht;CI{N#M^h;Lmx$|N8>p(+hlv^P~oJ9s7F-UGj$uNNKlqP0|6quZVe$xJ)Ro9%n4@b}iK#2)k?!jrQe&8OS+h!N zA8>Rm^FTwh01I<0s1MjeTs?~12OM3iNgmkM`VE3s*Z>;O;Bf)0d<}bc}x>@ zx6J66rUQQl1A8XJR~+k9!R=3y#x&uxZ_A~S=jXE95@7qBs&$+oqnQ}{FBXEvGr{w6 z4V2BxjgDtR>au9!=H*bvGFKz5X&D{MBz&gv0FPzDSCD8KOEg6`!55h~Gir8p8z7gS~~!gDDy^FsL#xFmT^nD0?GO_KKqHg@-a{7RsDRlsTd(bKs$P z%|h{tMDY?u@q&k9ISa)y62(#!#S$Kh#w-+#NE8iG6b*PN;XfQN-aP|DOf? zKN9%ADDZ!H!1rbW--`skCklKI9`N2-zetGvrb zZ#HRVO;UPMD3ul{^u&NGNq}j=b5OuT_X#P3)2y4000RR9Xxi$?rjHDaEDoGg7zG2E zpE5IqFbK4HHZ)Aw&+xD1r?eXnj{?g4orIx5g8&Qj zN3V9J8LApEb>|L$XclB<=5RC+SYK+<#LYKXRik1-vjCe*0(3D7=v<*_HiZt@MoiH8 zWBy<_LFNQN;;3#4c3_oo;O}P;U}O#a_y7NYc2|&hl-XDWni3YQVAo}6Nfc60c%|Q9 zfAAmZR#&j)-#k1Lr!9avvWS1F2+WC35;@a2yP%HL|B>LA2=!IB+^h#pZt0v{axWBG z7zJk}>|pFXkTC6_MH5u<`d=LpFvZ&POA>Y{PB@YUdJScW`34s(o72_p#f+$(eOhn%!a87 z%?ykA+4?v{jwW(7Dl_d(Xy88f0Os^0rH*D$RSgcqHq z4A5}gz;22)9KT2SAcbQS|1)?vzF3GHjz0^6U_o{a!%tt-u(<#p>7B+L0Yc0YJCp)+*tJ;tIKDJ7>KQakL&JShX9M3eiE|1Z zotAA0mN>(8*riPd>}k_@G;M+l4x-a0LnGF-slYp$HpwhEZB-s%EjFo@G#N%nxoNBN zfUZdsYq$=pa`Up%o&%S>nQR#!bKC7zydizSVLeCLjzo?l3A-4Tn0RaiX5?|Oq(R#e zr(KRL5QkOSAxS(;EjAJsN&!6Vn$S8(0A2?*bu#dQPrzot8ivEJP6uaiZzfyE$J~b; z@;Sb-8D7wClFgH7GMbU6uz-QtfT8han?u4R&`wH7!!u|J&jDClVYTWRg##Kq2@kp) zCdw=muu!_d&9-9!PaZVfv(=g`XCxamGpIb1XgbP+CtP#EZ6!}|8yK`_Ft9Q(Fle{T{Lw1PsgVwPwL;Aj}o zP7}CP7zEm+n>cyWBwG|bJekfqGo5o{I@i#6rm@jRv4eZTVTDr-g3K%+bCwD6%u=ps zjciI>;Kan7C}G9mWH6DzK_ZXi00U2&WQU@MACqrW;~oZ{M@(-GZWtvTVQ6F)U};{- z%B;#=q z6*M1yzzf%=$>D;%8t0^%X#U7Gaiy9gd9biX3dOSosdJVG)1x zmAjD%EZ)G#zyLejSPRiEmK0z#4RClG+yL%S1Q@6Wo1_MqwFX!$4X`>IVDmIUfHkOr zkyRt0fkid2fx|VhK_E4-L7_FU!C`4&Lj&W{z=j5GQ92^<}I;3TXLi5YE znjg&%n(S{V|8q#^Y-!|TvTvFpz^28a(!u$#Ex}Nrl^dohsyjinO`0i;p_zxH!`RC) zkVEf5Q;L3Mr>$B9PuN2ySUL7WX+i75wgqAb6quPF^@Ih8G;!}xxH&l`aXpt9bJUIo zXajuxK8~uy9f}`V4Nf#PSS%J3X??(?=kaY@^o`F=M;D0eB|!7)X%nVmi9C*Tj66mP z3g;fMx+?By;GSe_^r3aHF^3p)F~Y#>9QP7;C^oQz4NR7S8hC4g%(n*$5j^)0E-X+! z(EhMZK_2Wvo>Hg_H#3*rINh9Jc%U8bWv+e&l{RUni4ZS8f_nKq|FcAYk*1>xDsV@7 z+cHg+$m0lO0z1-z6XHnMkQ?k05_tg;j67+Q9Ez}jtm2<((fhD1K@DU-tfTC0m-WcB zO`2&oL-WhF=?T3Ln3CL~J?b~g>PbmE6fbZZ%*e~&yus1OQ3eegZ~H1GwKi#{`3%i3 z*`_Y&ebA8r)AUDKCoO4*q5wBo(;Z|@E~gfOG;IcH+V}ujlaGpSUeXT50Fb5yoF9-i zx!zJzZz8vWKHhB9)UFN2Wi@{09jLwipe36pLh*sCe3sk6cF1I zkTtzgF~0-y6G&49w*;~#|0-s^Hfg5&AWb_KAZz-gV)F##Cw_w&c{8|WkTr#zY6fZg z4AQjg0kS3^RogcpKY=vu;5I>Micz(@ki0{20!Y&XZWClp zVXu;H+oYL(gEZ|=K-QF_YJVqrhvEg0CIMaxWKH3}nC;r6nf`+`U0Q&wsYcc1N%9Uw z0YQTqc@Dfb$eLojT0okF8Jl0WU44M8>5QuDo8%pe0U%8UyaC9XVxyk8v`I6GgEZYx znEIe2A)T|Mk;_TK!)1qpTM1~71Z0i1LPw{rv#y5`BLirB4ODM%AHp@%)$mNrrT?-) z_aue(K!Iif21C#?U~oO~4_xQRg6n(3-3$RqO31tZJJYmfe5MR*PC!tUv(3D{CvV9WELx)Uo zli>ja&ob#N417&5*-Rst6Q?CetWbz>p3$DW!0A&<)eSZ+3HKH4Ync@|iX5035@#@T zZ25lsL%#xJ!vsb~qcf5M2Pf9I{geEi{$c*b`nLa)|I-^10$LIRIuZhU5&|Y91WZW? zn2``LCm~=#Lco%QfE5V=XM`k}T^TGzo;l25PULH`6o4>}=o~Z*V7$SsD8VeM&&Jgv zFR_`&L#TwI<;iT-3vHGiPDKY3Di1omWZ>YE2w-rSp_*aEz@*G3!XRLz;Lz6D;c#ld z+y7aAvwy6Ax&PRIDN&vokxgj}I2=Tp6vdJtNK%7Ig;ic+a)eKkVdqg5F%ZMEkwNmA z5|ieCnZL!~&OfwY{$J*Q@xS+u2Ni-2DpVb0n5Ej0#<_^EL18K*D?`=ncV#8s?P_3EJ za7HM_Cg2dq^rXP4?y{{1I~tim4XUei&M-!B6e)^BkhP)r0^TVCZwxjtAL0|5$UDX3 zg~1D^MeLL0QkYeFta;uvY-mnN+01g=fi*GXlL1f29*KyIBtxDMj}A8e!w#&?8D9*} z81dXONP2Lp�Ltf}yb|4}1iI5i@#c|f3l;N{PA9Ow z*pPYv)CQd5*uWRfX2NkH@w5Ruqg9TikD^1nF0(d^<;K(rtqa7r@$3+I1IjAkG6K|Y z;%;M5Fb3yDNV^f#h61%48^P_y4_Bp}Hh`L0;IaWt?x|FBLn8x|G&Z@Vc=TXBf;m3Y-Ka4Ft+r%3B2`4Ft?tc7m({`v+9^39~@P{~>yQ z@=x*SWeDf7x+B1oWNxR_nF++~3|i?w$EPly1MBijLx13-S11edoc_6RbYGB6zGZBby`%;PG+ zV=mAPwqgPsbE8WE(`FVIIUW|NgPbu11$sP3I1io`5?~hPF|;`3!0OD?SHO5!+@(sm z(4D2qdBbLwqYkXf0$B_>U_a@A?Sh>70E$cK`~j#q4zN4ozzFg@s5}Pkxr3C)N0^T= zGBBi8q$Y!Kd`>=tbkocm0vA}o^B>YpOgw3lOo|eQ8qXA14kqk(YrX?(2%kCp0ye{; zUux0xu^bvcO1zA&s*qMxp~I{Khmc5oS(vZ`2Nl3Cc~9h8mX?SPm*2NmJgf za9FzOR0G2dRf8ZN4umeVCa`tM3t9!Zj0LtZD!x#{6E{-evgM~6UfWh(JrU;%N1hw}-W^oG~ z(CBdF_yL-eg_WV8a2959WMetf;Rr696wSmP*;wG^52m~riae;C0M!SK;PN>j8dMX( z%m(E}WI0g54Jy;1dL}d`h(M>!*0h4$3z7rPxxva}kef>mavYFg{7a=iu$7ijc@{3F|>ng%ik+UkZ3(bDV#U@iO*Hh?EL6B{ZaM7l?6qQQ*9R zJrNN;pu{T7g43RZ3~Z3N1EoV)ToYkOKm&A)3e*|FX+E^R2hGb9Z8n>tAtn!w0daB#t4-bQe+fz%QgW((4`38XkM#00!3a6SMIHjrLO zJmRy5k&zARH`47%00kX5Ua`jw!avf`^$o0$_`EA3)1!o2rz-hi3JR1q;j;t#@@1R1p?utR_!Vhc^pE9 zGgJlGwBmf2S|0SVESF|-InltdBjM7+HV4r|;L{3V^Ao6U8^|zIF@RJ?gD=b)n81q( z7_f(#GPry~#3#6(q(?fti`=e4jYoGhH_p7e)aN z=0FMdNgCXjE%-$|g+n97r)5fCuauW)X_V+;l$_EiIfqedNu$&nM(Hh$(t8+Xjx@@g zVU)enD0_!d?oFfIlY>3F8DgrAe2W#>6dCIHK-myn{y@rZNP7d^WUx3Ubg;mA0=OWA zmOYqimSL!Yv>}kqX%hm~UZ64vavr@8dKtti!1y$n>uER-YY+!(APeT)z!Ffql8KdP zUEl*I*7qP{xpdnCi98N5qZz3JO%GaF(>dFu#bon1LO_GoTuKrv6xt4|H3>E|aX1=C z7|v}l=wOwQ$m8HLn2{>bmT+VNXqlH=kNSehrbiPz+NGPqL51644mTbyP=Dx1P0}n? z3%|~i9f}>S1~XDUCcw1t=q!tDdNL#W!o#)%u>%Ry5(D9uY&_8#+2qO$I+G7nXMy@c zox05`Cs`N}eRiHC0j2|Y!C_vgz`&5p%)ns8C!o~A)oHrpLPRsq6w@0UBAfXZnQ}~w zY8KjH>QNZoEVj?ILNTUU;-u*e#@J@5>!v3j#x~16HRU)I*R1r})MHV6vs$BBM^i?# zR=?Saq|9c$xn>;BSb z%{-3eHM@N_t0>8D_V{f!<4b=bNowl4z~K{#P8-7&+3~~TP-S% zbu?#Aw%D<(vpIL6#gDeG=EC(B7U|v1rF$(Z+4de- zyBTj;osfR8ws|k(BdZ_M4eOc@GQP92ka1Ywe3bE*RfP=0j^>k$ZPq(v4(w}tNqxXxNbHeh%2WyYP>9_; z-(|dS{X;fjU-Lu8*VYzt0{fevGJdztkbAJd`8i{gO^4is6U{FfC)lizD>&KwnsK(x z2|0yR&2Jf3+We6FaH{zu<5n9B`3r=CcJE3%{15U zgqFjr=Cw?#?OtdVyl!64wA+qDyWmapMyAVl8rlcmHE(8WwfE3|@UwX<({}p|ZHB+i z+nHY4cW5hcwCrS>?XW`IfxBfl(^-cT+6jCudzt<_ywGltY}wDW(UC)EflSLmrpJyN zItS!i4l_-4^3Zvp)pC^SxKoA>gI>#Vrk_q7ItrF8Cz)0|uh4O@X*tbw*ZG7_f_=+b zrhbz-{EGol z{Ay`s-tV7bq42k*o%w-(heg8wmQLojfE5-7T&>;AI|5EvT;OT#Wxf&c!XkjbwV$~w zki+tWWa~ucRe>6o2{NsdnQsSrSbmUeoyy!0RAG5QvvmgZ#-JIN0XnU-m@fqFur$zb zox|K8e8aN9s&yXoj^H1b8*Ez_FnD(P+}4fEAHy`PH&nE4 zW}X-BVck&Ox|R8Kc!qUAUF&w{zu_~i3p!hOF|Uf)VeQbiW{gFSc zFU)E^zmlZUkrp-wR<<5t-VzmIW3aaM81tK`3Y!HxT2C-9k6vMuu&4Dj z^Zn=(HW&7{o@JgABVjw?OzS1)-!T@p47XdaFh7couuXW`dX4#5T!rm{_pLXW*Tv7U zb@kZ4%Pa-@OrhnIkaM8gJ_-HnVA-VO2+ z4VzdtH8EOvD_BT0Y+*Uq%oyR_;33hljb&d8V}{CtxWw{oHlu~_hm(>GS6CY6GDi3s+?8y& z#&T{RV})Htc zWzHVP9sUVtr5paR+}z7}!~ei5>4twSC-yV`@K@lFX=q@5a)41HqCr}wp^0_NAx4Xc z2YNCMEv#=2Ge$%(_{cQ0vEDe!SP`L+DbvuwI{i4~jED_&G7VjSDGY3N~H za*FXrM8SHQhCbF$rx|}lD4dpQn83R8ETcrE!b_QkNvvDWGg?GC{FiB%!usa|V?<>D zn~XmqH>{RzSipMtHlsw;hLf@li&*#EWweNza9_4z3G1JGj1f@=e`On%vA%!6SP^wV zO0HoA>!wGHGol(SSYL})fVVSPQ7DI(4wN3-D>>#^xf z6>$qXG#g&9&X~zGBQ9aSX2UDi1+$rU#5wHKYzO1n7&x>W zn%GutWU|QEAgA5X!ghTVQ$&V=wRS@r+wLt)6&VNowHrFv+O{#x$WX}BZs=lb+rhLW zqo7B-p@;3pPNo|f4y&~r`q=L8X8MuAa7w#j0$cZ9CW*|1d)f_?*e361^2luXt=%w< zt@8j=Mdk-yorW1~8xAtf$V|}CX_&=!{SecR%nMdJ4RhH39cH?b>ENr=Fpurolxm z`+tgwBRgS-PQymF$)}ktvM=n?Y1qPc;xto5cEWz0h8=8k&M?i$Ryd>6u#4@*8KxK6 z6K?4=>}NZAmPsP#!X2H4!)#a2GDYMZxU17}jP21`rjDEm_jDRgv3)(ubRtLKg-*j+ zwn^uhe&jrOsnc+gZPhs@i`;)pk zcf)&~hTCkt=b29AHvH6SxXZTuJkyI@h2J_2_t_4fXX41)@JFZNA=}OKOd5F~{_8Y6 zW?OcF$s_Lok8ZC#4!XX0x>lqk8^T7fQ z3=BdH3=E*TM34+j9>fNz2hj@{AoHm(8g%{zhYkY+XpcE)t`kPX#1AqsfTm~}U^Gaa zQ-y)y4g&*&90P)eiT`3?U;xc+z-XAb2qOc70s{jBj0TAdsxUB^K*eD+OgscC4x?e> zB~Wn~4HKUN6^GF<@hyxD45ADS3@{ob4s!oVsCpO;5(l~GB~(3(28n~z|AVTB(I9b< zdO0SD`7jzJ4pMK-1o1D728n~z2Se4vXplHaeFjuLj0TB=)OSF|VKhv91ymeH!^BTO z#bGo|`~?#OgFFKR1B`}=vok~752HciAos{HL)61)kT}Ra)=>2@8YB)<9{^Pkqe0>z z_4!csFd8HdQr`kq52HciAoYu(>R~iU9Hf2+R6UFaiG$Q%XJ%j!V_;x_(I9b<`nOQ^ zFd8HdQqRKzQ4ga*;vn@(ED-;~XplHay(3gTj0TB=)Q3aW!)TB=NPQVpJ&Xp4gVgsx z)x&6zI7t0!76t}!1_lNg4H5^bKLk||qe0>z^$(%yVKhh_r2Yp~J&Xp4gVc+&Ld=KJ zAaRg-JywW+VKhh_q}~^*9!7)2LF!XjA?X=LgTz7VTcF}F8YaF3Dh{Jz;zyw3Fd8QQ z1S$@rVd5-okobbpAaRg;q}U+tfzcpwkbA74>R~iU9Hc$~svbsz#6ju{pz2{XNF1cT z4XPeSgTz7VmqFFTXplHa{T`@#7!48!slUSp34a(35(laO3{?-KLE<3w!t4s*w!^HPO#bGo|{4N)yT!PUs@jqOUdJ0B^#6jUH$PG~sqe0@J z@HF6tsE5%YagcgfsCpO;5(lYIfvShmAaRiTN~n4m4H5^bp8-`5qe0>z^=qN(VKhh_ zr2Y&yq#T3MAaRiTM^N=J8YB)<&&UH&52HciAob!r5dXqxkT^)a8B{%t28n~z`#{yh zXplHaeKu4*j0TB=)Hg!a!)TB=Nc{pHNcxA-AaRiT?NIeF8YB)fb`u z!)TB=NIefP#C#YH5(lYQ=7sndMuWsb>YbqKVKhh_q&|`tlAd8SNF1cT5-JX(Vd68P z;xHN}z7r}AqhaDVq2e$aCjOHb5??SHBo1;9KOe+BFd8Hda*sYAL_LfKiG$R;Le;}) zkT^(vDpWm;28n~zS3}jqXplHa{cNau7!48!sow}y52HciAoUmdAmI;CG)#Q003z^+iziFd8HdQr`hp z52HciAoa_k>R~iU9Hf4)FeIE|G)Nqz{w`D;M#IGaLd9V;Ok7q3Vh)UkiQ7WOVKhuU z7Ag*-VdAw=aTpB~p9>X-(J=A7P;nRy6Tb@;htV+czff@)4HK6Yg}NU~!^CYxA@K#H zVd62OkoE?Q28n~hvkvUwFBlCH2dUQ;gQQOw4H5^bcZRBm(I9b<`edkj7!48!sjq~p zhtVK$kouWW^)MPF4pP4!svbsz#6jxML)F7*kT^*FQ>c0v4H5^bXBCI|3r54lRiWZA z8Yb=v6^GF<@l>cdjE0G~Ld9V;OnfO+97e;$k3z*^G)(-dI3)eRXqY&Q1SI{yXplI_ zzmgIV^)MPF4)U)BR6UFaiG$SpLe;})kT^(v4pcpi28n~zH$v6JXplHa{Q{_Z7!48! zsoyFA31=7$5(lZjDgjA9Fd8HdQvVvN9!7)2LF&0BA?jf?NF1bIQ4(T4j0TB=)H_1e z!)TB=NPRd|J&Xp4gVdKp)x&6zI7odjR6UFaiG$Ryg^I&ynD|+!IE;pgzlDm!XqY&! z6vX{78YZp_6^GFjj~ zLE<3wtx$0o4HI7q6^GF<@uN_27!4DD3KfUZFmYBHh`lfxCawwPh}wC38P`+EV7XN0;56VpzxHEg{X(oAaPK5 zT0+&sXplHay+2evj0TB=)aOCf!)TB=NPP=bJ&Xp4gVZmEs)x}aagh3*vXF3w(I9b< z`kS(l^aGeb~S=EG=^I7q!aR6UFaiG$R~LDj=( zkT^(v9aKGx28n~zPlc+7(I9b<`mIoL7!4D@3KfUZF!8TYaTpB~7nO&&A4bE(O`+m2 z8YUhJ6^GF<@lvQbjE0F%m4~DU7!4EOA`eMFFd8Hd^6x3AdKe872l@9UR6UFaiG$St zhpLCu|Ns97sh3lLm=B|2>P-|N?t#%Dagh38sCpO;5(k-|sQ?Kd7!48!sqa*Pqz4!c z5(lYY096m8LE<3w2cYUYWM#IGIRUqzx(J=8`6$S>-q%Dkw ziO+(H!)TcJai}9QA_k)VVXqb34R2)Xb#8*JYVKhwqDpVXs z!^D}?Aojv&n7BSv97e;$W1!+N8YbQe6^GF<@oi9X7!4DD3>AmbFmVBOhSx|8p4HKUZ6^GF<@gq=i7!4Eu2o;CXFmX8zh6;N>)4HI9g z0ZCUd8YX@jDh{Jz;*6RQ^)MPHt`8N5(J=8?s5p#jZkqI4HI7n6^GF<@!L>w7!4B#t(gFo127sUZmI)u4~&M1CqczwG)%l7Dh{Jz z;(MUtFd8QQ5-JX(Vd7%C5PM-XOxzhN4x?e>1yFGq4HKUW6^GF<@l#N77!4Eu4i$&d zFmV+bx?5_4HI7t6^GF<@f%Qa7!4C=)n{M;?HPj6FmVli1_sc+ zcNh&54}q$O(J=9HsCpO;6JHEf52In?$DrzAG)(*xR6UG_iHjOQ+ykRw;*JIo_rqwI zcnVZKjE0F%fU1YlF!9Y$^)MPHejlnHM#IFJ3?c4;(J*m6Lx_7|G)z1Ysvbte#A~7I zVKhv90aQJVhKZkns)x}q@%K>mFd8N;!YS15(mYLj~T?BFd8PF4^08n0P!?J&cBl_dwOdXqfm0sCpO;6Tb^p52In?j8+i$ zz-XAbo)yGBFd8Nv1XT~CVdC{r^)MPHz8I<=M#ID}K-I%&nD`f{dKe88SG0z>2S&rh zU9BPU0;56Vpm>S3hWHCcgTz7c(gigKM#IF{LDj=(nD}jLNdAJ+F!6uZ3=E143=A+D zBo4Ay-iCoemVto*M#IEyY#10s7#J8}G)z3e29mE~G)Nre&Kette_=FCe1Q$bUoaXb zeio`8M#IEELe;})n7EuR1A`i9O)Zp$iM!Y`Fz7QdFu-V-c(E-5gE|8P1B`}=&$4A; zFkoO{fYC7VQ&9CV8Ycb`svbte#1-uz?t#%Tad$h2dtfw7yvz>b9vBT1pKAwk4~&M1 zpM|Q2(J=8ZQ1vhxCaz)+iFX(c6L+_V#5;@ziG$)j!yXdvFd8P_Y7dEb7!4DjX%C5a z7!48!#rsivNW8;nnD|S3NW8;nn7Eh&L_Lg#iCZ~9)Wc|)c)9~5oMALfyw?E|&M+D# zzQ+L)&M+D#{@4K$&M+D#F6;6PzIN4x>Thpm^Wt1c`ST4HG}- z1c`ST4HJLl1c`ST4H5^%JD)Qo9$_?0+{hUck1!f09uHLyqhaFBQ1vhxCceQL63#Fh zCVtHs63#FhCeG>t2{#xG6W4Kpgd2>8iN`?I!)TazGgLi{hKX->fw%`o!^Cg8K->eP zVd5OF5cj}nn7F@kn z#5;_JiCcI=;vGhV#6j^M9QAm-K=|k&ckS!xT-%y97e;$L;NB21&oG?S3$*LG)#PfKO|kj zXqfl~s5p#G92gA~zX}zH(J=8p zP;+23Ok5`rVlRw_iN^#&+z+E+;>}QT7!4C&6$lAu7!4D@2^ELYFma9`Nch8On7AHP z97e;$LxLdggwZhZ7N|IkhKX+pg18?>!^Cex#bGo|oGBP$K8%Km8$rckG)z1x7-Bw* zhKYAV#bGo|d_yp#e1Op~@jFm)7!4EW2!WIjFd8PV4HbvcFmb;SNdAJ+F!5@rIE;pg zuLyzUUl)PVdDFt z;xHN}{sw9ejE0E|ghA|u(J*n7Fi84@(J=93s5p#Zjq4ihtV+c z9H=;qhKWy#gt#9@!^96k#bGo|{8J<(9AGp|TrvtG4x?e>4p9*EVKhv<04ffnVd4v* z=D=u}_;IK>jE0H7i-MF7Fd8N<6%7%G(J*nhXh``0qhaEiP;nRy6Yq(JTNIHbkF!6;@aTpB~zYq&apD-FG{tYS)qhaDQ zagg)_qhaDcP;nRy6R(Mbq(c}D6JHD!htV+cQ&4kYG)(*#R2)Xb#I@of=EG>1cpy|9 zM#ID_;vw#b(J=89P;nRy6Tbm92S&rh|3k&$G<5xyS^^~eVKhvAC{!Fq!^B$>AmIvFAM#IGK zLCt~DFmcu-h`lfxCa#kN$zL!UCLReDhtV+cwj@aUhtV+cRZwvl4HG|?1nKv|XqfmP zs5p#O=%GGVKhv9 z6;vEX!^AH^&4JM{aprW0y)YUkZjuf$A4bE(W1!+N8YbS74her44HMr26^GF<@h9n! z@Q2YbaqbL=IE;pg8)ZP;38P`+$xv|^4HKV|0dYT!hKcWiio4?V8YUhO6^GF< z@fN5#Fd8Pl2`UbwVd4*<=D=u}IBO2XUKkA%*UEvUPZ$jokAjNBXqb3c4kR7IXqfm$ zs5p#ja{Vd8295OEj{6Zb2Cm=B|2 z;`LB*7!4EO05u0j!^Ceu#bGo|oS_ghuLPrE;ThAobaWknn`jAaRiTNrjN|1V+Qew-qulXfiM`z-XBGeW-dE4HIWBf~bel zFmatCNIwWh!^HiIAmI$7VdCXQknwgH4HKVQ1o0P)hKa8&g6s=`(J=ADMG$|%Xqfo( zBFOnwFd8P#P|UypzSa^-gTz7O|H8$Pc!AL{af4!rIWQU~?pe&hAj81G0HZz^UI*>VKhh_q<#`q97e;$*FnvJ(I9bzn7By^#2gq65(k;%1XT~CLE<2DqM_n28YW%> zH3vq6#6jjXLDj=(kT}Sk*(H#0hS4zb?In=(0HZ zmO{i~G)!C_Dh{Jz;-00DbID*dOgtGX4x>ThAb-`CLgtTPG)#O_DI^`jXqfoYQU>rj zX)qcj4sz##Qb;(!XqfmNs5vkiBo4CoeJLb8!)TB=$Q+(Bhz^?6Wn7!4Eef!YhBLE@nJT37~|e}~Z^agaHO${=UZ z!Dx^;$lfbZ^)MPF4pRRWsvbsz#6jxW%OU2&XplHay#`bqM#IEC%NZCz_hrCnkT}Sl zVNmrj8YB)fryQytMuWsb>RZYo;RB;V;vn^l${83ycV)n6kT}Sl>!9jkG)Nqz{tQ$- zj0TB=)ZZwFl=CneBo0#lzZ}v|gVWG{y>10WJ&cB_52=8thtV+crV2zU_&}Lv@fYC5R~iYd;(NGjE0GysA6C+W?*1| z(J=AfRSXPP3=9k~8YXU94Jl_}G)z3a8dC4WXplH4ezU6~=@v$V#6j`fQw_MRK8yy5gVY~{nh&Ev;vn@`t0DX8U^GY^r2Y%kd>9Q92c>8B8c4YTqe0>zb7Y|6 zFd8Oq2o;CXFmX4iIE;pgM?=M7G)%kzDh{Jz;;m3|7!4Dj1r>+UF!A+JaTpB~KLQnp z(J=9wP;nRy6MqL4htV)`=2}Sjz-XAb1XLVG!^Cx=;xHN}?gSNw(J=9Fs5p#ub|>E8Ycd~mVp6umlmA< z|NlQI{qWT>Fo5n}g3&Pba!_#?4HMU^gO~%OVdAz>aTpB~_ko%NqhaDPQ1vhxCZ1gf zX;;8#n0R9yWc@abhKbLpgXCWr4H5_Cmo-rJFd8O)7-}z!28n~h;Zhx>-3X&$;*X)` z!)TB=$o%h6b6_+`9Apk#JtTZ!G)!Ey9+Ey`G)!Ep9^zjZ4HI{OioLK9+qhaFBP;nRy6Q5NNsb^s{NF3y^{Yzz$X@yYpl#U*<2@rlL7sYNC6X^ELRdTF^O@$pqTr74N= z1tk^nnYjfy@$t_7z5$*-jv=1@e(}EkF0S!WQ9g#|@$ng%1@5UO0htx4IgTkQMVXnI zu9j~3MY)M3KA|Ca6j}HerKA>t)L>ZU85|$(>F44f9-Na?maA7%lvn^}J^tH7M17~m}XkU$LA+!=ftNK6lLa>q{SDPq@)%V#V4hvXXc?vB^DH<=B30JmlS2@ zrN`rtO)N?;#`Jn}SxJ0RUVNINS$vwY5f1q(0`j>8MML+v*+iH|ZgOU}Ara;EQzG$#D+~3zAQZvFpmir3;dK zap)?*r7I6yUO;qVNievyRpHf!&u5^x2m1`uHhf;gqYaVrZG{$ktL*o{wJTwxGjpCyWF=L}D6Gv9dg~UA$ZF#t~72wrY zfJ+;+(!p2hfg%~FJTww<%0nX&r#v(gamqs@5vM#f5^>5yBN3-OG!k*jLn9HVJT$g& z%0r_Cr#vM5OR-k^88}i1q;ZE;PZcgbka)tXCl8k%NZeu7Q-DhkBtF4Z_BZe94ig#&u^eKgRj!VD-TUGIQ2u*3{H7yn!za# zO&vJpq3HmpJT%5}%0uH8N9~-Ng}cZGg*FatRk*Z4YL`+R+VXH|gH$@Wv=!je2Ca*5 z)>Y8*0H-`OZgI*(;})kpG;VRqL*o{wJTz``%0uH8r#v)namqt8C{B53Y~hrLMhi~) zJW$6HG_Hd*rjneISOn@?y5O#6iY>r{HJF;JaB9lMZBrgjO$E3$72wpA7Z2+EfINjU zum&y7@yq8D&|g46J}(~B0l{lOXix`d9hFN!e*ppcy!c`>6L9?C^gmwtTmt$F2*^Xy z6u1C^kJG`@R2t603Y4U<>Z!t|2a=+&>dC{U2a=$$>M6jb2b!M1n&54Q;*9(v@Td|l zT~&}&f?XFrKS2jsk=hI}yYTr6w=R7C0;NNo^-dKeo#B*+rc<2q&~%DZ9-2;Z%2z?s z8BTdjjuO{X~JA?Xx#z!sKHaShm3feR$8dhiS-rc<2q&~%DZ9-2;Z z%0tsBPWe1gKM+)%AQcIvu!M@U7OcXf3tuf=3ai@TNeE^azG@n`E`0SgsN(_h7_NE> zzkDvZABkIk0Rj2Ecu;c{xBe;u^0@@$3kb;P#h2o(hpPz4=Ms=FARrG(SEv;fEUr^= zS8JfC#;yrZt%h3@o=Oe3COmZ-IL(0F2k)7d!YWXFz5*3YIJDvO7anc+e1=CGKEHv= zL7ep&G`-@Kho)DY^3e2(Qy!XLamqu}D^7W6dc`RZO|LlRA?X#hqJpPa+|?T>6=Bze zr*^}w2~XvQTN9qT4V+%V?!%H^@%Rdqf^cZV=Px|k@c9gnHhg{qrB|Hw8Z^D)l!vBQ zobu50ic=n%UUAAp(<@GSXnMsd4^6Mw?@ExSKfDFn1HD8s=`|RKvo% zxU!f8cN3=?=5FFt!`w}rYM8sh=_DsVDKUrGWI{+W%+)iBb#;XHcXNbOy}LL@9>3nJC3DHxs28=4Ma~ z!s?flR4l8UaMmxlRl{6OwAt|FLD(fQcY`8^uxeOf6Q>&HZsJtKf}1$iFn5C@hj@1r zryAyN;#9-jO`K|&yTNe@Dfe(kJS@10QVer7QHo)XCQ32P&ES|J*3CpIhPjz2#V|J$ zr5NUBaI6sPW}+0s+)R{Wn45`G40AImQlRA?acP5~VwjtWH5p!j5MwfW{X*Eypco{^ zWN@pRD8;aFCQ30ZoI#O7&>65iNR(n&I1{B9=4L{QOHwQ14O8OFOu$1}py@1(Av%zf z%)FG$qSWLPV$>px!@7hVhjBR7LW~2s41|es9Y`GrV^xbV4yTQ1o&_0&Q!T_ekTggc zRvXcb!>bly97q|l#^F>8H4e1M4%=W0mZ$&?lH&9U!Z@5ZqI(uJoPf(}BNEab5o!@i z2)m95wGbtsumEBFQ9*=Sgc4$12eKBYT8MEVX^=8vT!&XJ!Z?sJVvWP87HS+U-GMi* zWacKOr^Xj2<`(3nX2cu0V9pPtBu6wO(D&$o)FPDNbO<~-5~0?ZgycwsT7(k9t|LM% zLLEX;Z=(;4y24&<8Z2l8iyx28WT#6XhvX5 zjtC_<9Rg2|M5s-Kl(wLmE5cz)jAA4;gdIqXVyGIB>p&QPG!dg1Ne$611lf#JG1Nei zJV+f;F2t)C$v}`gq7B5U7-k@zv}x#)NT94lHw0ViL{fv(G4SL`jN(K{kxFFxBtkKg zfke0vVIWS$Py<0R55fc@3t}KhAqZntjAS5AOA%p+Fc7C=sDU7PoR*>+h*vR^fgp8Q zeSytDoQh!v;z^%IgwrP^DDbu|ku1XL7z~%=YF!qW=A^_YfjTQm7V)4>wa^{1kTyti zMq*xGYEFDgVo73rNo7H5yrBzL1^J+TyXcZR`H3lDG4xypwG{41!<2Zq?Ksqelo*;p zU5G;sig|<-gA6n_BEdjHib0i-5kY?-yRnS0a;Tw%JcDj3A@xvW33&^THI$H-&`l+z9%?KhFQFStNWC$!841)O!I_cJB*7U8B!(k%z#WNfI}WuV zC4|BT#XLfaK?V{E6%+#rDF$_&NXSUUDTk^euWf&{yg zP#7Wn4l4ILaISVf)XDA&!8AegnF2`F2th-?m|3jz~&&k5K`LUO*h0V2b&HWh5)zG@fu1{71%6vtMQ}`ulj_3PQ#bRt7c=-F7@F1*>cEs0W)5bp{?~U}Fg@1Dl5KT0E%+Y%B@t!RAAqLC9Fb z%D|?f*bc53@Fp+fm4i(OIfAgE1XY2}LbsYw;vzvk*nFrf2pLOQ8Q3&*+X*Eu64Zmu zhdP6hv4oX@O@rGGYR?f{d*W9MHW$AVaPr5m1Z)b5>j=dUv5LVagDfXxAOSUCb5JZL z6eq+g2Ad4BosfY9)PT)Fv6N8RL9Al1$spSa8Aw14*c@a_NvS1vRlyP? zeyhP|p}Uk&3L-&0*nFs42^mXR8Q3&*+Xy2?3*Bl$DToC1VDq7_AY?3IWnk0LZ6}nPNKg+pALFLL zB%}yz5~{s~B8O#VMPQRq?Il!@5Um<)HrN4#j3lH8Y!ZsSq*SLM)A6eZ zn@?C7JaH0X8is2Lr6*D}ptzTi!7zW}Q3o~;!vTcS7AYFQHo#m$$Y3JWfz88k0HJh7 ziUzO^FqaTAmw!U3T1IHGG>{EERQgQj%}Row*CfXzX5A)%NeS~b{g zu>FLLB%}yz5~{s~VuxteV6(vvAY>#VMPQRq?Il#I5Um<)HrN4#j3lH8Y!ZsS`0F~f zY!4co!P_SVn-6jX5ylc$1~v`DwS>|WDH^~wz#KxzU?S9k&BJg2p|nMc2CxkSWb3ID@skprn)3K zBR(;&GQPMnFB$AOj4gj<&ocN4P@EP6^OP~Uo z*bYq0O-z9+MqdO9)dxB(7sVj30ES|SL0ILI(o(_anLtc|^U#d|OJH*(NCLbe6TGq& z#LL8Xhy_RjbOJv{m?x!WLLvjIA9BDYc4gS?PfRY!EKAKzEY1e!v7*$(l=!05^vvRt z)FQo1GZSp0*c@04%i?M}|gPtJxqHMuf5CpEdG2z=Z!MrB=v{)RbQok5^AYenB$0 zh(NI^B?Y^#yu`fx;+)h}@R8Brq6%KlLWJY zgj_cTZoDAe3Av*RO%QUi6q+E!c4(oE>UcCki0x>CQ0=Hrf;b-Bo}1MzoSZej)aN>@mpg18CfQXCEipLYRnNAK5)-egn@yTfq@~3$3Vp^a$j2Q?=A7%fek*43=9m4JPZtr;u#pYnG_kA7#JAD z7#JAX8O~EMbJpZfWHegTBB7Ly za496YF(_)Q%+OF&Rhgk7sm{~7WJ1EClo$n-tfq!F&o~5X850B)71#tD68ac;M4B2L zTbcs6nAimQl2{I~a86(GGOK|p|ESwaINr-VX-ngKU6kH7&2 zW*!3&A>bg|!*qs0;vo}{MzbP=a?A$C!UPr`GX?&h0!9XwCP4#+1{Z@vYzzXN#Viv9 z^$Z&Q44Bx|0=V369AJq$$RRG^pew=1%=Cg)ImdzFXhI9O!~)5l21e$PWvw+?c!#5mYWG&XX-U{_{nU}Q~X5jI$$*yF&!7SbXpaiF2kfQdmmK*-G? zkvZxR2TP8FuS8=b?+Z?)13WA)jtriL9S$sM3JqLLE4mz6k_?y`!v!27c$5WN_yihe zo48If_{sjMfc=8Wkx!O_%s#S;oGTR?7??N|7&J4xG!j4k|L-ubUTIxtSBD`}!>$9~ zvy{AM{OR1Grll0NNI_|lV8;^y2L{II0!Ic$mV^dIwm1QY0}6-qSq^hBCbH=s;AlQ# z&@9H&;+Vpt%rZlA3&UX^tHV4|1_^9S4SO6C(ju693_5s?*Bo?^c+w_$jo~m)(_x-$ zg9NsdhCL23X$zQp3|8QPM8?g5faFslz-a1_^9$4SN&>(k?Ld*qz`v&N%2G z&vL?`sbQugw*kvS$1@99nVSL>L5ADC5C9o2Kcz$R8iTVy)DfO+0|gG#BsQx9FPh~f zT@*N+F9>?7G)+)RThQES*CAwlfw4^?rBm`nf`h=JBRo+J3t2Oh*mNZ>G+7unizTda zoHxUfo9&uHz?ltf5>XR0(juA}bvs0i*DxNFO6iikmXN^fbd;y0VIk|vBn~U3&J^CH zqdX-B3s`r8?2KqmFl3hHS>w1#!in2}L!lwVutkhVz=7dr5{K>q1qS9rM+Kq`92nS^ zbSqwCXl7vQ5O-d4kb#k9f__^BGY^x2LITf5HfELtheHxgd>{il3|VAB21q7zSb+?1 zIwnv8HlRmy3&;Qtg*2aqtjsI|4h)vbTwptRIHZCz4l*!GBsg4}k;Dr!g<;V#k!%A8 zkVU4;z!n{7*kE{6ibtShqfv6kLRo`D3gu@m@G&O}*rahV_ZWD{8MhpC zkZ?I~(8Ms~pq#;>1=VK~_?Z&}><)7@NzO==H#nqFcV+`WbK(U1v=_`h1{?~;Uk*A* zs9Z2$Vw};aU~p(b(-{W==EMt*hdEj#XDBKf98zdKGeLkkQNTG(fxXA>gre~pK~H&> ziIOK07w}Iy!SlA^B70y;Tbaa*W*sAEiG(kX5;GNf*nTPWo=M=Bs1|qS?oY*)idZqsGie1-4%bQ_oBgkf;^#PV?w! zv^$|{Jb}4QjAgRqwGJ2VNhf)p7%X7Dn9^2u;6hW5k+U^VhNHzyMIM7K3Ukj0urWso z_@`}P=g~c(ZhYf#gBZ&c$#0C#+(M^#ej6-c6-;fjJ8+??$H-ZpC&STWW}||^7KNoC z-$n@pr+IMj=$_CtRyooj#xhm$1YF7komD?(C< z2Nca)j9er@iKQ@=qxyhC1H-3N0^h)~K27l(*Z>{pH(&!I+g>n(4A{(b5gg$el1&OA z1FjgcID!n=nA*_-GN9%=&0K2r|k5Q{F$Ye&A8Jb|Xa3~z~*~kxaJ%eCcmmS0?gWxZq+#ul4 zF%#k-hM+SdEMON1Bro&e0J*tA#@N*fV$@7UAqHihA`c^H6#-7hY=P7^kA_Br6N;yK zCNfAQZgHL=sl>>3D5KrP*o7&fMUbs9t%F5^Lz(B(83E1)!8Ti#S&Bjl3Ov;wCeA7h zZW6x*GQkFXQ@ji|;EUr5umLZcL%;?&3Ty-$(5z5&7G!{%6U%In0X;1qW?%!N1aiR! zY*DxkGN5*X^A1U+Mvq?+%_YVzOgvv4`97v~upD@?SoYId0ZxMp?$#`G6onWT_ZxUv zIIA3RkWUmSZ1Z5~v3sNVm}epbbL|c16Ou}d2ET5!PXQaSk>4;KYyf}JIj{lpEOS8y z@Hcr_fem00ECm}Nr1TnWfQrivumKmEwtx*-$XW<8;6jtkr*i_FE{l#zvCLBxN^syV z^00AMVQ3S3D^S_y(ZHyCOYt+v3sEhOFF;|ig`@cjC=4Y;K;!ER-kE_M*|}hM@GAdi7QirfP;g?kp-GU z2@MQPhZNX-Co)K|2qf@3W^}PgC^RrsdpJRidgCktF)BfR6V#|$fi{RyFPcptMlm=Z zSqL$TLx~?^)C7S>h*1qK9xf20G8|PPMmcaBU6cU3ie-YchLkecr3^(LuE}863tZsa zn9&1v2GcD?@m{dg92_KA78`<{%fZo{V&Vq!0t4eWf!4M+?kQPQi&#LlqQ_EXy*JxakrHr-4JntVRYl2?gD=;KtGP zHc&%ALP4MRAOk4>Cd_IA8P&ibbXf${eqv-1n90oOmTM{q*49`rdMh1oqUu6Ra zhOL<`EF25WdkYS-iY!;u+wq`@JHt;op@H#gCI?6>^RCMr(m4*5=LA`nftx8eCjT%w zJg-{Fp>9@VgDt0m!`Th$jMbACxG6L+C|%)U5!j$EnZ;(Mq}U)}$|5__rHN59tBu#} zMuUXu5h)PYGONQ>snbQE>54$Mi_l3am6d`VHzsSCGBH$IT z24#T_&lQ^VS(-UE3h*d9957%?Vr%AbbW{^yW?%@EU|^Wc$iN^y;lcm^|NpZ$@HB8t zFf%v=;_y32Gt@EsVN_tO-Own()WB~h(88c2{bRuokSt>avxCx|hqAXC}%wVtLXfKRtH)~+8tYELwXfIsQUOu7SOrX8WfW26Q zy)vV@(u2KLgPnoDfpKd?8j1USeGy!*hH>%jj>fbRnX|H}t_ z&o8jO72x~Qz{g;F!r68Tv%?u?+m^!?Yn-JIG~4W9ww-d=;=*C;6VB2d&bBj}EvGcw zY&mGNq}gr?a|8PUwqT2Y|NsB5XJAR-We8^Y!!Ut?>jm>6*?S9lUpUGgVw8KJD0}Cj z%&i5ohZ^NBFfL%PT+v>{&{4Udy+ES9G=sTfLVNjw_96-PstWeX4CcZW?8OG{MHAZ1 zX0(@FXfI;;z;MHX>jcOf{0F2MmNEQbRA3ajz}{lL$65M9vrP-L%>iet9fvKqI9j(b z+b&>s;QtiB|CB-CvjhJ_1HRW6*gh!mzjxq&YQXp91K+0$Y>xu?pC#};Rp8Gx;D5P* z|0%-*#tn@gOboXjG#C}wq&rv{Ht@1AGbC+r6qyCGRi43;!H4k!GXoz(XF+>;26NSf z_JRiX3J>;ji*~b&_JRfMB@8bd?j0FBZxiYm~d^D0j<2 z=G8%&GYe&&EaZKrDEnoh41*1WSwqujkon9C?BLk(psJ4!^3MYqF5h6v;KMk9qr9Ts zYyo>E2YW?Bdr3iiaYl0)2YZzRdxPAQg>t7D6`nQ99ZHnBwvhLMqujklxl@TUFCNN1 zS;%{2q1>5+GN&BnZY9WFa+E*Ccz`*fnUVDXQ;4*W;xCX5j1kNSL>TND{?Q=b8$3WZ z*fR(@FxGBx6j=u1L45Rq*@5X-1689#;R?ujP#P1uz%K2u5aGK94k`zrW(3G8dvIcA zU_T%a_MpR#!`53EZBIB`9ALIt(ri7)S$d1J#RX<7hA#^IUjz6U4EetZ@IBtZm&?Ha z=>y-F0KUf#{O=X`a~1f$Ch$EA0Hy8E6Zn1w@G;0SNc%iQxE~Tb4b;w+2?-Ei2nFOa zw7vnc*&P_JF&MB?xe)971~QxZ0W_dW8NM83OHyVqyx~y5q`)Tau<$oXp3#8y0Dpr$ zg9=qko`edJ+4c;eBr)YZh|QkB%fPvy!S2R_hJzqJq)7L0kk4^Aq`)AQ!1yA8S&5Bl z0t0tIuEMbfh93#dj%*->_6*q!9?~5~kfMHqnMRrb!x90OfC+5*8yqc}85LR;8k$Th znt55IAcgjtgKSE?3=JX-3=EzU3=BF93=G0e35OJTjlPNq#h0zIis1qQ+_Ei)A)&a^C;DPSSo!ahsDMz|@%Nx_$6 z87HHu;tDmEYb_6E2_y)&xX%kL%mm~50-A(7Ygp+LAPujv7=a8o`L zBU_>ZTe5-Uk4c<|miq|oOKtjKD9mIuLtx55mV-R!5}X>?gjtMksPUvsVB=x{xpz@P z278f3dtpJl8Ap4W0)%8QnbBTwLqPoL1-1_h*vlN)s}k5tF0g$N05iZMxeWY83)qVl z*sBcKi!QKzFaR^aBDoCwC15pBT>&UkWneWBT`v>ZAu4kd_zM%*s|47~F0g%500|Z= zu;(W5!_EKZz+M41GdF?160EYQ0i>XC0(;Q{_M!l&#?l1#Dh2k^1dvpz07wc%USRts zz+M10wvd6n$N?l<#K2zC08(7wz+Q2I?VSNgahU^qt^qg5Y5}g&3v9~;*mEE7yoJ*Z#y+I>d)$JXaJ6&WK9rbY%aFbFU(FtA8&a6WZ} z!Gb}7fyIG=fq}y)p?wQ`YJg9gnFs>|3j+g#Biow^2O4?$UY==VVrXDsU{E;GAS>iq zaDb74Q2{g(=P-fMS!;ub2$%zkB?g8D9*Ll(tj|CSAOZ{wDF-HKnV4~KG%zwSfh=NR zQebWi+4IoXfq{oX0Avn>0E48#l1uz+nV1q78o;h=U^vmBDB9cL$i^Vd$N(AzWME(f zS!c!A%)_vZ39O9)V$>EUHc18_RF% zVN~QQbfXpuOH?oz2qL@6Nz>p1LxL8A0@&>e3`Pkatziid7!QOnfP;_00TiTMTMryy zZdeJ97X}7Ma8=bNFfd$3alG*A0}LDtEXW~m^R1DQ!G|3=xW27nVpd=fK=$LoTkLEM z2C~QwtJ==R)4<>WN-&_r0CJdXk-P-Mff8hgWxdfdU|?ckM2^RMwMpm>yS@1cy2IXn z??iW4guw<3Wl0h>7^VgA=oq3|dR3QgKDygZ$uWDQd*SG$gYM`)jXM262|YkM(>_c@ zciti69VgIz@k6pE1>LjKyn04x&YL=$OY*qDAh~!;BJ3 z(7pV{V#6hLFK>{+G>~f^rh%!Kuo!s83e&(E*@lA{F5KRnjqXCz`-~E6&|R2jv*Q}N z3$x@f4fMK-X`tvkECxE+VH#*5-{2_CaKHsQ<#WAfHppORn8ATu>^!VvV9RAdE5!cr zA6UT4kf4N;$D5fX_AoN|upmeDhX96Va|X0*uE&+|feo!FJLJyDbB_V7V6@O|aFjuJ zq&}-b2QymP_;6XlQAPmUPPWN5ZVcceXcLIFEkfWOLQ4(bb&b#@nU?RHnZWaGHf$lsVtC$ot=dCi7wKGC<-qaO*;*w~grxng_V8P5V zLjpP0p1x*aOJ`sZW(KzyAnle_b7T)Z;9*EeLQV!#w=;vwb{^y`UsTS}Y=UNW=s&(N zDKx8JwXz#{pnG}m$AqQmUiPZQVxT2IhJji0u^9OF3#NgOsu-G0(OsCse;wV0TzlCK zBG6s9@JGQ~bQjjuU>bOJKBj@Eu3<6o=5I^`qv{x%&Cql4ZT^G{Y-mMI)?Y@RTj)j2 zTYJp1eHAOGK?QnIvsICiEg!w82~#=nfET@}>0<$x?I>;3s7aUwY-qF)MgiMp!iia8 zPgF0&D6zjy#x(G1AEtp`epn1VsfB6arKwm->{CLRCAL>I6K09M(i^kH)~e;iEU}jw zFk+V2CTf@^_S91>m?ie5nOI6}FJa6Q+bf$1v&3HMi&1YCg`y^9wz{*hV%u%A++iR_Sva{6NnyU(*=b z>e1RltN!U6c)*X=7P`8aRpJkNTj z6TNM7hugq{6|FHjwT6*xIeIK_(m(J(1g%sI{lzLVhY76#c545RAoN&P$p~4A9?LQ& z`$Et{TU1UeR0}f);42ywxS7RC&b&a7p3_X@_OC?<3La+N`nR%kn zWBF}%gQGTjEVuF+RG`Q5){Ts8>(OI5%Y^E_NaSP!cS|6n98)W#!{rzl1sIX~AX@u-K_zw*azWSn_!y|fc0n$& ztBN0kN^Aw>l5^*-LQsh4XjJ3T5mXeT7$};dbRFHmS7ulYysM6BV2lW+fm3H=8n`M%&>#anmZxenv8_dqpw z7ND1hq1!klwxGwdmpEf{IC?DqR7&`O9?M#ZEIe`Ov7Fk|a1f)=o+4;Ift6u~8gf~8 z*7y_EJG<3)%$le>M$HIK`9mAVR2-yJr+kw_+W8ldncwNZy9DbN1-`# zs-IeJ0JJtS*=Z!JI%$<}S2(pS(! zGRqu`BRSMD9a;4lizD~U!Q#lJlg$pGJMw1fCAJ;tj{N9%i$NSMx>+B4N&elFoHKo7|!^S3bSp@-x@EROuS0E;6-LoglL+V6$w$WuY!G}(!g zCUdmFX)+KwO>zYu1*J(fF65dze-$`Qx}c=Va8Yo^(Ll~PYuA9&q%bRT#@Q1JPLqp} zGfwHgE^wL@LC!c?5m+3lp^fRtu5>Jpv|o$Gkz1#N(hVSn2tO; z-5b-9mx{q@64jBtdf+sP;z+5hOF?N8)sY`|fzu?aBh#mYGY(2fu6zehlc*v2I~JTK zQA6^WH#kkAgk(}B7Dw9aV>&YGH5Nzi*o(!HN2VJJurtiiLQazle;#6c!oV;YIZbLs z#UFbh!;r8FB~8BPmR`cl;3I~dCXdxgG8Zy5tVGT@Q(x-mIPftTL?UOLN&i!M&M`79 zL&-R%CpI`*F&s!i&Nx~kGYu2a9XV0?FxxwHN6tt&#vq60NYm*&(kIXzSvOg-xeDEp z9Y#3?{Ai9W3Qptsgzm_Vr#2k4Lv!S+_?dh*Rdzeju5iKOIr5;OAL38BSeLT`r zSkOYUY=$IrJ-Q?1jdK`;&>R_*p2p+Ggcg#zXE!){qB&A){Y*m(PPFL$Xm*%Q6Wx&_ z>BkaO(Hyz;8ISZ9bVugRm294l?#O+{(|s5i4Uh+yw4O#zS+F!+cfr?h(;5}D^QG@L|EX;<~kk{<{&7=$9Hv`@#| zcn>f!sBt3~(OEZp8vPg!gdnH1Ri@Hr56~U?G~JlZ4&9M*xi=Ve&>Y#i>a0WuE1Dy( zZB1m}gzm_*X2}U+XpX#eyN&k(x+9<6?rDrdbEH=5>~7r#1_|Wpa@J$K(mPnu(oe)r zN#-4BQFGPWJV!wsE&bej*~aq%J^j?*-O?C~mVUbSN*f*EMoT{jD=xEDp{1WycKIh7 z^w83e=sREO8|aRV*{8_72hEXF@0jNgA#VoPH@nj^XRWQ`bj(Hwbk z(Pg$p=#HFFa-zWm&5@zg_|$~a7LdI1Ue4>ojFx_0y_~?9g%&lZT<4kv@S-L1w=135 z)}W=IRq>^_4w#~)pQD@jBoDBmrJo1aGMR6prJvCERw)Oh(9#cA;&R>?bVugBnZTHX z=18w;bIm58JM!;lXSOZqj@(#w>wp=WBUx|rNj^Y#}- zH9wlwA0Ij|Wfx#zMjoO`_?X!I70oZA_I4=`k(He!fU%`;HdiT0yn<6ztIg+;shU$q(#ksiNdtBlCYWM^1fZm(HMs=E$!5?0haP zXh}Em&kV;-G)KOgGT+=n5Y3S{KNquUF`<^vtLD``XmCYy$qP0D|ybya)1GO)NWF}h6E$(Qm)W*_Bjg5Xa$y=Y~{DKf}GfXtHB>FL9%|IAo+t6EkWkU zS1|`LqBa;pIo;9~w9s-*(qRrh4_35XvqxfX<4QC~W~nG!IEbP-@@#4f+bVQN-s!p5 z5Q^r=)zm2oXvg6x{U*l2?&T7qmXdBU~>Jwa~oJLM3LmLOekPn7<^ zg_a;)Y^<2$(Q-}bEcXHiJ+xeN>MIBD5mvOUttfw#aVMH1z1}IC7>J`e@@1U}`wMhO zicUG_5R2wW)pjA74cussJQC={T#n|*sa{@%4Ekt}yyeZwi_xHZta%i(LFJ}uGy&a_ z4_B(Np%;Bxu~YYaWAYZNxVf%)jAn#AT!w`>_Ag>+@ zl6=99mLQ)*6fxJKB}h>V@AL=yXt~BJlatSZ4K3G{>n?0OftDa&ZC`4kAc^M4gF9N- z)R$XpT&XYhs>;?#LJ384rxm9O+cA$(MlcNK1=_jM-?8 z)Dl^4QGnh|I{Day%?Qnrt7gs7nuS&?O^x!)NiarB6`Rg>@)WS4CEZ_^TN}@zCEZs( z>P8PF(UR`kS6A5Vm{Dsbt(v)K7;?~(?$gber8{`hl5SMF74s&vq$~Q@uOPt;&5=rv zU-Dc)cjRiDgN?V)9GNv=-8exS&5^BNzp&+@JMzZ7a}0TCjx4<|EOP?gk>_fxn0KK$ za_VaTf&_CkM}GRA$@2l-k%{)9Rm`Xj2Cd!mZ#3kir61LQ!jeCD(bA94ge>L@XzAyw zOi+q~6lAt9Z_Ae2JERymVKZJIJCX@~3L0Y^%`HkCwpF2MwiYj$~ba zMRErpnj`b3HZk8ocjU>SGzDukM`lI8;>|#Jvz52f8C~&02MU9jy{sHz zf}s{I{g}QBkzT=%mdF$488LrDPd|5q^B>rur5~;OS3D2c(Q=|r#KFctXpZdSUv0bq z-I2Y?tnA;=9XWH&8HRc^M^2r7Rr&?GBQI>WV*ZEb$gAC91rO}e9Qo_=D;@_9G)F#* zI@rj;h&n!YDoxu+K@lw^U*@L1ZpUcRP=E$sX+7=%$9J!#CEdkAut4y~%a_B*G)gBiDq9u{T1hUq^;APITKW-PHBEYlAX*~7cU6no zf(f;MA$m3{Pr)56{RqXr=6QjheqLrDY?MVyKU(d&CI^(!(ogFqQT8fyM^3Zb!ZifzF~tZ!we(j0m-=!TQwaRGQb;T zK!l{hl3#~k^Q3SxEaO2QJbYF$kx_^7z!c=cLoe0!CKJ#NJp0s@Z37EKFdOpx&h0&y z9HucOoI)PqWLt;7k3@3vcGqPW*t5bJjgyCMbXtX_3SAWLcWkIW6h4yXYLM!TRYYO+E znYPMipHdu}X;Zc0=jfwF@u@lbebdo=p?WF&h&Ngna@Nc4&#Gu;&fc$64+^2>lz`tw z37lxo+g6+UfQ3Pr9XUF_?=y-(%gdr&@q9Xrs3X{?j;-gJ!;tU@Io7^D3s>`FFpxwZ zZFc&4x=(S6Ag-2@5Uz`@M18CMYi|UUvD0efheQgzw0|0wHXhTAg7j9?1s5>84`XW zd!)2|1{<1xwZt~An%lr&fIQBe+IQoMp96yg^4h0U?m5xw3^T-#^U+0>V{3&Oe7KSQ z`#@;+Elv&wX5=kni#tnSvoKs{LW&Np4@Xv&F`@0At4)&8W<0>chV0+2z?>$u0SDJ* zGuY9U35A}iR!7U`joM-8*?d*F4$mV_v?|Z6+w(Jen%{Et(1LkrsdVbD872N`sq|K? z4$mj_y!NNN6;y7ZMo0TbwrLFv9PkJRZ7N)8C$OB z)Xg_#8bla;Qcyzm`q?iGOBfQAP(pQSff}01fQ?q)FyOyA(_OHhyB|1bHd{U9k z)G9cqHVLh;y=s%5ln}&VFcT$6v#b_-pqJj6b8FwTFkD8lnrrLkmvb8!98l8d+p@4! zv~r;7{hCu~#YO0&$f=B|ySluNZ$5>#H}>l6^r`GD4dBhc43J$dT0!Tv_|eLWo%2HT z8PJrKpI>FpfLc~uJ&}G@8f_~X*YnM*_|V*T^;={pJDL}S{;pZYj8=5ttqZ%0zJ=|E z{pxoNXwFky5n7J!Jn1W#&XdZ*be_o8SG;J>3+;}=a9)u6S`6p?T^x25-FX>7n9j>w ziRrxkS5{f0hh9#Wzb$%{*_f`Fhvq!h`BEYN=%E+*cBML6=p9}0?TIkj2EYR$&7aUK zf`V1HKUq*Cca{BBohfK3IdpB-bT71esw>v?)YJxs1E697jExdJSU25z>gB+&1m55S z?cL4`tIk1h52USj-GNrZuiAZ8BbNE+W}^95R3ls52hG2)rfv0JhE^0jJvYlW7~Q`fQJZtP zI2ueqUI6104T@9!vU{}|83owD0w6-tV2M?>?rBX0hASX07%MQVm~n{Ad(Omgf{Bp< z+)gl(o1q~o=rf^*Q_cMti!+zQ-32Dsr@pcZ$Xv%Pc>3Fy9d~9eKbaa(VW)rk<;(5T zEP7}C6^bp+>P?R2(n`?b;AlwTXky@CQ4nBi9p2jDa6=MvyU&liVTD!9+4?2CQIZ&C+L>K?eu*Fx#G~=qV{+ujXK{ z^k^?AV6Q4*E~{uSP-vGtBgpq=0^dUe{yYQz#~=7U1n?OP9Fe`JD04$m_E4f+nn9=a zo@VO{&X!x4ZT5gp^|3zlL8L5!y*h*4JfpoRgWbHLy~Z327B2L*0PHB@`ml`|T3-hJSE_ki!~0lqf|%tDlPGgaQRYaa?5RYyYZII; zPdHo70FyfoTTF4b=wP;nqIG8xHdv&ulMjwj1AI>d z_&ysbAH3u!_vE4Yjfb-L80FGDB&|0z+iq!=ZaHkx<7|26u+5HxmM0EdUtzX6eqU;ex*&B+oFBIj|6y{p*akkyTY&oUb@=U~@f&%vP3ic`v z_9~C|@`85BbM1Uj1^7OIUHjgE@ACxE2{xeoP^rMcuz`Vrp-9d{!MTG+lC#LjLqWN* zXX3<*E(-)SQfv&3ocbmxE`DL~k4e?grav`t)-qA=P)0{z7lq`U#JqHcvecsD%=|nB zLvuX?Jp%=ejFOUqVk>?9^vsfs(j>j){9OH@#FW(hJpY2!Jm>t}g3^*y{hXY#T-}1A z{H)aE5{07tWL;A|b3J1PBV%I=BZV~MR6}!7cP z!xWRGWP@Z&Lle!+ypsIfyv&09lGMBsh2)IHqTtNjf}B(ZXKihT5GO}RkTrTa`S}HU zxuwM=1x5MkMXANb@lhechGr$H6(yxbsX?h}sYR)I$*Evbi;T>I5ST<_if?`jSlq|} zA?}u$Q<7SQBIBHqn3tEDLOwDP^96qyp z6%4Y>nhvbF$-#6OBrv;I!6479>A;?w98ApV${ap(dKCr6V#L*fJRSL8O9mUISa6f^sA0sgB;W>&`BH+q71 zc|^gS!xmc@t(P?0PC3k!EptOr=FUTzI|?#S5<%n(@abI_7RsD}WKPD50t^fip!1f` zfKQTQww}UlyP(--inGOp!;A$CcMn^iIBYei*=CQk?Ez=$J>?INHVN#0dN2{U+aC1dk?X`Jw-Fpp3~Y8{IiTEy5>7BV8$5DWc;qZ_$|HX)w}I^!9=RWQ(=9M>qtq*CG6a=EeZ_T0C+!2sxM?IKrWT zy&9nw&5i>0Dm-$PSmX{u%b)FtFoK&2liP|%ZZjUajX32L*w*8bTZ>0-H6FQ@IOQDJ zmgA9IibrlS9=U}GIar*DUqLH>h~%S%`2@CVgxMH!m3ZXJ@yM0pl-s~oj7P2zk6b<; zxm<)CrQrk9F98itc$mP-m# z1JjQqOaj!E(7A=6HlyQ3HboA$ z$`3&YA$bT*`~nW~4LHOnz{FvGP`dyzms~yzntoFGpv6C+_8rWAV)>}yuyBJv*6>+4 z0f%@24)Fl&;)@M%hzsBl|KNw!J&P~E#L?14fPcO~iUUI~w*EqZe;yvWTs(3)c;xa0 zQVu}vLl1ilx8)+#V#wtner2)*ZXyO7m#6Ngr4YQROu#2zWfJ1x&4)FpU;sG#m zG`~&o&qjnFER1038$)dtLM?_|CLXzLMEJq%faL)UbFvU>G2}84a`5ne2rfG|c%g?Y zM0^4c@d6y;0WfixJJrshl^rDVSLLNL1+88=xpC;`3b6Wz-=e0)DNTOCw0SfwEU!Qc#f8z)D6$k@{_vZIa+>F zH#|qnPwIx}X!(gXJVD)T7YAMj^uGII_C^O5#^h-V5-Su~5;*k~5{?{Ta1!8&S;^f| z*vul? zI>#TK?OQ-+`y3SL9DfRQjz2ovw}8&}jh3I(t&c{_PwIwe1D(sW(ejhZ@ki(KY_$BO za{SS`Jvdr^QaS!c%TG{%g4P%ykI$3e|D$tx){sz~$UWc!bZ@>9`}fq@gWFXvPO z&k+Tl0}no1v!lG2MpskA_$Ck9+e0!$8PKp_vAI~M}G%@I6T4{=*0!{6@2 z1x-9Qv0RH3k~xk}byBfOD%j)5(ZnjTq+OE5i7o048>1luWG+9NO)&yAFAp*i6qle2 zjfGhbN;nxj6=+Qp;Ov)>$ror!U}Sq~$aZi-qu6dnh`pe@zJ*x`*n5XlfK9vEqv8ld zWwQVaw?V;5V+kfDX5F&p2GE`#(AolY`=M(U;P#g@u_T^S5Rx-=xTnB!kclUzi>GbD z35V{x&^{EzG&c;3)B%61SM zJ|KT^w=s0U90AHxWQ9$DriBC(?y%tlhc_ahqK8e$D&DpQ(6E8JX#%pFAZ0cr|ANBK z#UWCFS=FHlRJ8o9g`N$PUdKV=j{ocJ3RZy57U_uvlSh1hAE^?gFQ?g z)GnlPbn$}gFa>a#3Mmso=?79K2C%do%s9Zou-1_MVtVbh^g6b`-Ha!DVC5s%bWrhi zKtnPiA(`XcBu2Jr?kaaU1q2LECCkmC>Jjw247B8-U!QU**8*=HO-NY4KlIUkhPk~!GGg>wU_a8Bmn0$J`WGRqNU ze~VtTM@3(v=aGayMTZ5JZ4VmUV1?k!D&}SmP$39QFJegk2BjmAzuA--jyOm%Fd)?h zi1hMYLga}AtmIF5#lrzB`LUM=&@>Hhqk__W%RvqX!vHo0l=Sgjf^RXx%r2;zASaH- z4YZv-8aD?Nns}K&#UIvuBZ-ubX_arxp#2U}B)iCr7jTIR$`9!IAGvP@k|#31fLnW@ z_4kN+p#h~{KrY`v>OlFNSv7zq)gc?y>|^-b{qR5+q+EmK^%LN7TS6aHZXZlEI&k2a zzydW8$FT@ph|fC=swk3YC3AFT_9-eXgOzX%x>ufMLShQ!esupK#}g>*K;@gGx}Z1*HxkadZmdg++JEL9NaA;=-mBxWUp>{bTZozbVLun1xIIgs6;f&)|+ zl3qV=yD*c?x}-rj4_=pm`nRzDG02ZD4w0bt?hyx0P?>sg2ef@%nUJ6fYVjt5f_n17 z1J5L;Jer-%!2+)s8#GM~IzWxy<^~CDVTs(o0m-YJ;9)QfU}CULNI(fnW02Vb%^yYvlZ&l7<0Ht_**>9dAP0_&XC4 zb|xfPf{N+J4@Mm=#)l3(laOczRX@!$lR4PZV8zpfmF5;*c*<(zvs7y$xOi0)XD-A&dE|)>2;S^=in2R&WPDC7QnnCP@mf|_p!ls-j?39vfcVcC@j8sHuh zI8Tqp8Ah5MjWbA5fwf#iE}!wGMUwLww7iq1T^(-CARysS*%o(LXf0<|5{!;^@50O7aKpbBvcmU1$odm^;P#TJKTmXQmd zNnj}>8+09>X*7YRY`|qCsP81q;tn2T0qs3@aflRgk`!QOl?q|{@{)(4)sTtdZ@1#b zK6w8wnWM)%DF8Gtc(C$8w*j1a+Tf0YNrJ&?0gi)$yk~`YJPj1~HYi>_p|GH#q0nQ( zW|qSatP5G1CU7tp1viNMSltm=!zgjaNW`V!sOx4HH~WN+t!5e;3JEeoPjng$0~{4v z0(#(+h>b=e6Amu7J|U2E3p@=49>)aL_077@!Yv1g5Wj%haM5k59L>6iMO#1vbkMN^d~w6_YEZ=u z;|j{-X2Bqd8?avp#f=eC8UvMu$oU485>dxQk~z5GdFBQp&)fjz8E85agr~EISB`+w z8ET$^rn4oe>8xTa?sS%D6k^aOaKMo*5jt-Vawo1lqle@lPaQ*?6AA=-*8pSBDZ=%jHfbx&SK*tZ*FOE~Sg@DVs9eQRx;1i8<~kI1o&p!@NzbY@2x+1!NL0>KvTF!hDHa zB4sTv6Bma>JBy{a0Z)j4mjXwpfx>at3dZG!c^PFJ4(d2NvV=Du+#Y#qMVU z9N8AUQ*d@*OE5U0FojWK$%W&6iY)aL4%%6t5a@A~;4ug?UV@;OGF^64@E$$4-4e}3H3<8m`Xh693m2Dj98os*jzWWI1BN3 z=qFT|nsR6;B+L@r}xL-{FI084oD2pRJePjB^ z5!gQj;s|-{0+cR^FUwjGd8h@HhoEVWTK(fQh9w7)(wvfhs6m^+5qKvVnI;Mu{+?Pgwq2=M;TY%F-7tNq`iea-UsQ2;qNEY zKmULmUzFva`H;RCw!Sj`^AFfR1o98QcmlO0LE{h47lRM8Y!)EGcYh9$A`-S(0++bL-vp9_21GLn29lp z90QF4LVBm5^{)#*PD7f9o*)4q+}WgrU>fn5a|lAld0bpz<2)IrCZKU1(Bv{=oac_{ z1J>z$<}CTpah{Ekah@z)^l_eIp6CGfW*$b#9N36Yu0II#4v^)O5jD;B7GmR|QQJz3Bpkq1;Afu`LVk~zGmgZE{?=cxA}`V@OW zeF_`syc<7k-p%pqDe&AO>Kt`wgLsJ59f1w7xkESbeES4bbF?|?GlnURGCW2si4%HZ zgA7GNdeA`z$ZQDJ;uYSb9gOh`9?7G2yuyYi22Z?#YasmbYKfF~L3UbDjG0ekL?5tD z_<9(|tH-e9Pw0A>70~<%T@TY>ioPD^jA0BTD1Y8Jg?0Q-oLnCQ+RV!in<1xK{)G3K z2V?#uuH3aiO1qG{2Hq~lUhX1>KL$@81vi-S=TVaDRCq6YFvcx$WiN?wi#%j9c;c3b z@|V=OHDGqA?NMa_uOj=~J=oey3y#vDy=2UA<#+^>0BBxmkhGV;0}@ouv-8g}z9wUZj22Y+PqC5^js&_!^kqNb#AT!?3`R)d(l~bcuqR)3XM6H?{weqavl&F=B zQLCWy-G93m8Z~Po)&(#vgRcu{Uk4x0XkW*3nAeF7vMyjEd|f~mFCXT*fbXmo2bUk_ zJqTMD&;wZ)u<-gP^mPHp4cHvzd5lA&tqt9xkV)`V(D+4@WQQYH?HdUv2QJAs zo11wYC3_AkI480t8=O#JN!~zs4%r7I9P!rQpfX+%y1?Sd{9dV*6VVq~9GSmRYURXR zGAt`6-kQG~B+ z!zjUH#F4@P85DAa4hkiM7FV!Jwt&VwTJVi|AlHpUw#}Zh;AozJ05iURHS(IZA=F-n z50c<&t0VVAAmh1+Il3#yc^GP~m>m*(KyA0ghZEraEZ91r_IWG_CdXl3CuT_2zj6V# zBq(Gp4=C%qvm!4EGH0zgxa2S|qcn6$Pz)sLMiyfxU9l4fWt;`rZX?F()&#Li*6eEL zVcedJ5@*bywfT(5Yx9xE;~;*6=jqxr3>TIgGl(1m#pO!yelM6uj?abr>G)iZ!@P%> zzZcH}pW4EsG5mB`bk9K<=Le~7Jt=VK2IR-T-NUc!Tg(XR$T%_Td?^f$_#&eG zBNShmhKRv~!4+Q_8!2yFOb~+&6by>|1s)2)UtbZ5FHk!$s>hXq<T$MVx_$Iq=7sH&UM!eeD9M4g-~mEIkY&2SH;S?21{7-LjUr)%K_g zu)Q^wa$uRlkZH)Sn6<(!Yl#`VW7ZP2tQBTrj4a0(|8_TOH6!)_bU1>h$=RM8Y8^x{ z4;3^gG#=D(R$!SPY2t05a~RNsT@Y{;B8X8Xk*Bg~M~fxwEzj4lT_90bpRCrTHpWi2+#0#%fW7wi@@x;Sux zrcW1^fkcZ17Mo=)EXzt3bV!h4V6b!r9SaI;_pq;k_aWF_prq*#%T92OR!WJ92c0=i0INAC~3pG^q|%M6An%X z2h<-&a6l=5c2WdDI{S>!fC7b8G?sn_a$XqjLoX>u{&5Mo~Q=T&lA-S6*MzI^0T`m zEI+e_dxP_HB1(RaWQVh7%g<8a?K}ssJ;4~ZTfkaoa9p6L(fqi;jDu$IX++NX z5sqy0-zhj>C`8}8gPC8E$Cn^}hUZuKyxIb`tOa6OM;rtZU3!L>D1G|&nehDBK9l1x zZxdvkBIXIa9};#J+z$yy$&c==6^u&{^E$%DDH6c>u`RM1Ge3%*IVj`Iz;;K**?{dP zY_LK>y)p?hSaEO}Xs`mdRs~C*BQb7aUHw5Bw+mHt!4$Wk#y!EfMPAnc8gD}G zYhuZ_pz};WBGxp(>XhS%G<+N>4bMTO;fS~3G@Ont4L72uVYaAdENS=xc&zP?jI#rB z8m>xWP3B;fOA$B*8YG3D>x-*?MINJq_!UonWnf@9iIPv*5q@L``H=~dPwyOt=hL8G zupiA){HV@a(YWL=??KquM+79FMr337v03y8XzVSsn8#5f1~zEWIOk3$XaJ-XI%t5a z{Ix-9J3!|636#H(E-9$G8I<{yMrAOhtLnfonDQxTz?fh@B{6!B0UdIwmaNl+c$XA)FDRM6Z2=`$Tqg!h@EyTN^?Mn{yByW$$eE3EDaJbLE>@_Gl*nQut_Va$FNcvu8JC=!;nEDdu|WEn`bSYTON*1~1b zL6J<9F_HG!h(6eCP#+93CK8hd9}`JC3my}>j?%V+9v|TdTP6ZMJ|gk@JB3BhOz#{E4Sb8BFbX=pYKNHaxPw(8snwdnhsIW$~Rs zXaLz0A;<&U6M_8q= zk;1<|1$oR4ZG0P^pX=GO7O-W3)-Iy2CBK6>d*Ke~>;atuL&nZLH+{A6_zszN8@Vf#1-=V5v068l#40Hco+l?m>_5Dw9H!3 zIco)Ij^QO@j3b%DAF*%aH)7w$Z>;+^61L*mw~+^34+q`1k->|;zee5Bb*n2?;1puhnVMlS|RoQLHC*=)t70YlY*2PV23S5t!R~kUxd>GS{b-24xVE;bIZRT-ogKo8QFwRI4 zY-9r+a)>MMA@3s_l;xyB+lNd|0fAzkXa{yn5bg9Jb;Gef$X+rBGlncK5-CWY&Dx|2@+>iN3GpW)Z(cc` z0m_>JOyK0}vZC|WieAv&PM~$)Q{e0Uk~xs}KmI}F&p)92DF9io%7UI=QTIR25q-cq z4Qc=5Ldd=<%>9p=Y_&%eoG+wGuGt6~pp&@3Gb_er0iz+lxWp`f;BmspJZ$qL zXm&&`3(FdBTF0aNfw&D1IcJ|UOmPIo;Nj^=75oCvGEhYbn;*bZ7b54!!5@dsJV^%< zks|O6rDbdrq>NRC)FBN~pgKelb|(VM%85}cr%Hj2RbV;B2&yyvB~nn!*uRJ}_AjW6 z6@sMMHh7vX*b3gWi(1CYh&^DP!Dr4=0IgFFLegxGFM6F~$=38n!TAGf8JkI{jI~Fq zQy}Y85&06-7BviDNd=WTsOw`uH3)nI@-IYQ`~}L3d=P)N;NRmYBl>`K3eq0OMu@*K z_c$hVBp9$uf=btz0QO{#AX#^F+pb@Df4uVs_`;3w~&QAA7*$4RsQU1OGmA?+q@>c*> z{x)7chh-mRSc72Co0pwaXYmqD{f>Gv#@VA}N`zGMB87YS8U%y@loh*4r zqRD|>y`-Y4-Mpge=<;Ho`4iZSc^D00F?^5_Ifi>?gJT$1{9>+=MZ_;8KQo?0++mi? z;SE2p`wrqfvO8GMBZKwBu%6cq>xV(lg~#lNrEEtukP?G9K~aYN?SV8~7nmDQVr>Gq%kjkv@*FFAeu6b~aQF2)5b@LjiYKW5 zXw}z0uE55M@Sjl=)PJLK$G|Y$;tsiuie9ggI0oc#g=k&p~;XcH;uYJZTOO;Mq1=IRcz*@#b61vI!oy zM2-*8I$p7j4;)WmV~3sOw!qwM5v1}YCZ7)e_=JrS6!WASz|LZ0s|u#945oCP;02-# zegP_jX*f=B)^HBvM0jpJOP~yPqUs< zvq>B~pm7@u$JoI+!vzNy!jnC3Uk)U~@zu-7>$cF_%!J1eXkBh!Cw}~(m?!7L4|u(G z7#7>O;u?AXIw*XQ*A3$zN9aI|BXrPa9O0Z{jT7v8fCc9INsyWea%V7PO#x=x8UJ~b z=;H~05a*ly0iAC~{qclzh6PQqjc64fGa+b(;;Lp0wV8$#%l!)qzsrqH-@xC zfhLT4T^U+IS0RAcfZ&)*K|f29%5A1(j-&v{_-$MPyX2aU#XPYSL1#-!l|bA64gQ@O z2SIIrQ2N3ZZ^-R`NV@`F-bS5afX|Y#tZ0;4(RwR_B|w6i0kW<;nd2;cY->*!{EYfN zrwr~WK-xNsef zU(`D+M@sx6w;OoD^Sy}ti)DO5ka001=%%EV6Qx$dk~MgpD6<-TFO@`-3ma^M2K!q0 zIY{hlLFYQNfDaClQG_pWi}M99a6`QxshzcgaoJ&BN7$yn0?6it=-FtS7hESbN?M-~ zsJSC^*zCzc1?L9Xp(tz*f|Vi+GVq@Zi`=(`_#K{y(?EF`b^U^%ffT45>j6y$uYi|h zkUdo>Yc)0^+LRkXZAy&t3+p(|9OzsyY^}yZ$ovzowHg;Pi+OG)u!Bx@f}XW0(a_(K z04}$1mkKr}}qXr}+1VFTkK_GTW?4i{)+@ju$j zEWuJ~&{n{f1a7Yyu%sBYEnrKAa#9W2F0iFSIqBe*YyumfCXW$IaRT)CLx~3do*qX? z;g7Eza6t+uNF4|-2SDw1X4e2V`1Vu9tVN*14nW7?L-&Nxvz|}pNOXYqQxe#dL8lRd z7o9jOuqAV(3$Q^r25iY3Sq4x}0BAnGm?z~y%OM442KE%_^;C>Mww#b)X$DWB;w&qW z?}LQQ;ULlvmN67W2M5#=oC+%;L3>07s*V7c7jdA4a)%{b4o(ME7zQi};KcGlw$9*$ zz?OsX3S$L_IHcQycYGYP&V;8G&R+1{&khFD6nH>8hXgp7;XPe&3lVfDCa9-N|2hLy zh7^L=rKEf)1=G%;RiNh#OODI&7_nqsfG&7tuv0(83QiTc+6n7^z)32s#1f7OY_;tP~C|6!4V^$ZP5$W z+QXB`!>|+D4(NgR<98tX@jF2Mc<8zFg79!&0J29a6MKIY5SBq7&JRc^oAxplzEMXRD!Y8$`Teo`ZqBzXLJ{3y(MO{a7G* z{O1-mLC)TbSq%3V)^jY`St}Tk&#?>uH&d~kV<~ay8>B3b5P|VfoH|K72o*`+U$j zm(a5wzQEIG6fbzVq#R{GU^r`qtpaUh zSrA*ybKQZxnCGAbY)JQoZ*>e(UkOXTL!S4ClqHC|4%B`g*s?O2WBxQIM#JM9SW71` zGa4ROV2J~Dj6lNxGR_TbiO`l_eR%0sK_*1?Ha`!L>ZG@rjl%QTw_rv*6_qWZV*# zFOScImpjKn=j&4WEGXAH(Ds!LvEX402}o661FI6xRzpTDhf91Ko={+u0Y~NpUhsKz zjIjOc3(P;lnruk@4wA~*!4aQF6%H?t1V`re0ML?fM7yn*P&w;_)c=OeTO;x&sGJ3l zQ-DuR2Gx-Zv7GtDj;JHqL3JcFty1mGCkZBr3y>iR@f!yv4oOr%dfE(Z$sB1PQbFU- zJdP4Qic1_NQox*pkiqB)jlx{TJd83+;B(oF<&y-!y-a+07kP{hvfl-{EEWKbM}Us4 zgw7qp@~b-{zq*6+E3|!1wKJh4oW!B+a|8Bdj)VgcZ#SB^8L-8IMr#fW)F6jO%7$a- z3_BVhgWB-YbKn6`3=IHK;fn1{MdZ0-NV$bb3s}Y+0tT$zbX37vA}0zwv>;Iu4WyHON&8u;GHL&2nMj=)+q+6FN6@CTg1( zao&47(s}PPli_W8A6@YIZR{v*dgyuYjIi_DAm_cag$JXz>9at?9;)E$6B1!{DsNQ; zBw8S216bNf`0^g897Y?>5@dD-mB$YmnF*hn`2loNs{)I=E&Ti!tY>D1GR6fUpP4C% z>&(nH0k$&m*zX0_3h;2=0nsYZIj@cqXAF^tvmP#FZ2=E*;>&xOeKmOA1C9T}b~m?x z`Y^pxh5;b6P}@rA=Vn6tFfoWe4Ayfq%^~-Dz|YNu-0y+q+)Rlkb4b}69RL|ah$;lv zu{Fg!2VwW0N;LH6XD}{gG#riJgVRv%HyJ7Mi#axps5?={93ZDHK-X=wteiP(B`jGp z{Owj0LYnu1o#(U>aR%;2&>6S_kn^0M=7K=8?Zcn5Ru0Zg7UZmB#)PS z!t!`qL^3#!=c45CM8>#=mj%uSuskjU&f|*LU!mvm6!4hFeQ-B;gJ6xpae*ZVLDhB6 z0eCilJP{J>$m?VA$2%kp;PH;y7w)*VVxrVaoy*0BD3B z_s#gD!Ced#@wnn0U)esmq8>WLH)ym z2vCGGn>_%JRwjTv?+B0ZKWyL#hwVGV(jG^?g8_Za5>&>6$3@_?kxRp}mZoJv=35!L zu#8zQMC9*74tC4u7Gu$A8eLO764@l zy!|fZu_G*X0BEcPJ}vE-wZtuJDI0Xn7Hr&v*5`#w9CC-W-D5yYHj%}?WuAxqu_(6JdZ3*dp$3=I@qLMsb=Z3N766JA$f8CQY# za6wBPXRQRc3K{5ISAfe6$as=OMGRzIsRTT(bmt&wEKO z`#BNm0iH1+i<&@pVKDsdKKK)H-W)8Cptpsg_2iXLg z=a|3=z6Qe)bPWcgUi@-Q6jTV{i(_KeYAh7XTBruP`+@;|+{_(umcBdES^Csk*8m<< zI;;x0_To0^+KU^os9P)@4vIRQ{dHyVxVW2+00RR9Xq>ENrg&av1^yYcWSupB%v^-_~W8fTXvGc$*yL4oR4jSd*o+@LA4De1@o z1}6cYm`+{3WGIu9Z^DBn=X6dkxfcp8jDj-~nj0h-KnJ!*vngbN>;cs)i3$u1{$MwG zfZYTtZ_(Wp?7%AFP~FEMz{nc-@Bjb*?5+$9%4{qGO$iJ3uo^P7Bnl}g{LtB9yzqP* z%uPE3CL!FEsV9~Sb<<0}841%CNEjM42(U1Ruj+1-gfdSttM6EFF|w(N-HId8fkjOs z&q2jtMyf$y0#v7`{*nwR^CbVWgdK_#jwDQbAhAN>!~y~4+VQ|y$&ffvn< z%1lR%XE;6RaL|`{&}6s+=A~$kC*TldfP`TiY8Zwhg&}BNW(!)pVuqnZ?HL9EW>p4w zSRP@u#2S{n5~d--a;2VB8Z;O*`R5>o<*x4Iu&}(vtg+((YFKKZgk`7xicF{>oFc0d zb|@}@hUJ9?2H>zfvp}KevmPice{1b=0EMODgD-9Uu&{i8C=kAlFgG~GMtgu z(80jW&$3LKsS#HHMfDmAG()THrQr(A42t}0eHY2J;Ciw z$l4Ij9tHvCr>-;jN?Di=%#kck+wz=)!7#uLk;1kjs(pSVF4- zdoqKuAmhdKr4xEsZZS)+ut*vjJYZlsc!1MXMZ(bG5d+IXh2}q3ZTfnTq!o$hX@~~|IQ*$JnBFCgj4xcA9Y@X0yoS;ygpy1E(i0Oqvhf%^2 zhDv4umgdf@vy?Zaake#baq_ezzF^=u=)%O1xI^?uz>|2 z0}Z+q*x5ccnRalWdSGDC%p+mwaF2oIU;@{67OrdHChvqF2PFFV8Uz+Ff<5to??A#d zM$0A{i4)xK6!Z$34?p0AyWR7ykTJ)rgd+_M4gx$P<&PUA7#KihY;>DSL9^~*rM;ke zwL1x?7kCzINHu6az`KnnL*eMbX^MPI<~NMaG^&6ih=G9t)K38Q*MwPMmw;?@VLQl% zMSO<|TO$)#ypfTCL72rA9M9Se3=GgVv7`W_sR2`f!_(jfaAzXGKsCU~HJB|mn4>kA zYiTgg(O^E-Km}HbfCeU3j{qUojsOu>jzBS1jvxtEju0tU4v7F6Rt}8-IaUsjfQAMR z)u4t30oS0028Gn1h6ab$poWG9#-$*V=_rU~c?u#qSUEU20vj3{6hMrS29P2UOCtcp zWC1Zd0va0{8bDn$aJmDP37~Wb>MMO=2c?$>{09=IDM~cK3YSkmnC)TBT`%?kjyCBg zdxhqgZ2=6e510aOC_6Z&bG9^cF{w7q5Ma~dQ0d@&*p^@@(aH_e)YZN~v`v~RoS~VA zqr=$8F_1&=K~svpW2>!N1W&|6CRjH7qPU^;VcP?-0}9Mck9xuaM4GsFDBPT!mbjiv ztT}2&1I$4aIqDL3C^E1coM>pUSS%*e`hZE#XFPrV@!fj!TR@ zMhXh&9HD7j9-QyK%ZX!SF;o+{;$o2`X*UOp_p9egyUMd;VvM{vu6B9aP|s zH-wgqY+`(d47PW`+l z)i!CSISkD&*=8*0e880C4((0(_ltk2Im}(K8`Y2F=kSyq}C?Qv;m}P z>VwV)9SJZ^9?3dsNjnr3xWSq>AZs!`w+N(Z3q$kEwv7tiNSY#&b@P&TC?@X{IY6O`8;Yk^FQc z+4L01PasVO+*gn_*~L8uX}ST@v?T#q(~D%YZy-N`G(~XVK-OfR*R0zn&2$G85Ze|Y zYxX`M~q?tZ|H0^kRtjQzA<_XA8{01}fW^jK%*5rDw8Kmh8 zNYgHbek4Cdq}aXz`3aXy?_1mPGet1xI9VT zp{O8eFeA@_SAY{K!TS2OfHaAKG+kAgh~%dmDXwplcPJ)+G!^iQAZzlAV{vVhW|9DD zx{(0Vl+M}F$mJyA;j$yatpqey1S=N# z7=3o&pUhTOo}EWk38O(;W% z=Lu_$fcT2WJ_&_v)WcjE>4>; zD=@2VrR4Io1yK{TT33m#4%-lQF{^d8==!h&QG(g6YeYA*9CHv%;A6ECc;T>sxsi_r z!Z7qM;0a;)<-l>w;iz3F6C>9#9p*_OpD;E@GUd)_)=-I#&hvfAz~Lnkz~C@LHN%R5 z$(T)qLBL4Cp{=vSforSVm05w=9&0tX9=jqHnViNst5Jy|kfSZ4qZN!&7jR4vsZ9t; z3hZ=M6`L9XVyLn*rOfE&SQ1d|a#qr`{mPu6VxO~?TiLHEMRw=$&Qk42<6OkopfHn> zm7#@)V?v^m(gY`6p{*HLl!Cgwbd9!pTwBzr$G|6NUf{7vv9nD@VN!z!lM-hKgdz1J z>CK~!+{g8#c4w3@USjnSSBPzZFcw5IWiDW4Xg-oD)6sDtK{6xpi_*nzH+==AOWjKu z6=Dt+9A@B7FgU%y*HMV&NNGNUI7h=egO&rFjeMIKIG1H#nHyS{_V?ygM&^r*0@oB4 zFf>eHWH$7kkb2OegrUWp!$_c1)4(z1fWsM~l$8bovJ8!UO+1StxHu#v4Foo`MB4GV zusfV+R%i~K&ct>?$MG1mYEOZtSZL*yK#3j+3sCY1czRrBO49`XpAh80x0&UZ1Lr|L zW^08v3W7HsI$QXf^%SZUUOJiu9NKXB03V07hnt!A2Hp^XHZ}gkpjd2m6FcnC+LH0X zfG5PD!;Sy2&fx>R%53MPTNETh6ooc8d$hPf(H18*mIG{zh76#*5#6Q)Y6mKwJveQl z;){gS39NY=QV)RIep4JDutl?(aNJ2aZNT0rog?X^SkSD?tj$ukA$3Bl0?#&{9Rd%) z=hB191yEawyN$sD)XoEq9uZ-u4Pf#1UVnw1stBBa7S(1+Z}OWWMlyK-9YNm0U3 zey=?;fHCheaJES|IrH>c6zo#eXyM^#h-^wcvS329084X8QK!^wRS7mN4kdvW&~P#+ ztU>h`sQiYdNziyZER2xcO^^8(hUpJj^b#sSH7m4Vj5ZIWf|O1Mz-{0TJGL+zKzBWu zq|nD%(A*$_&0Warq@&Et-_TXkf4}%D!VE_|D3xhzLbQ3F2nq-rb zgrUMc1(t&hTAe%`4cub3i3hpGjwUoTBnl}Qz43Xm`Qf<^P_MmNiPuSh=T@fb4B@6z z4Gc3>4K^hxxbbwEa5x%dET7)s$B}kqfd?q%m34K#YFS{Uz>vn-6C=?f2-Ue#Z4Hch zQf&)2i>1;G7++I;6SvVG#TB3g%%ps>p+Uk>d*wYN?z9Lw81WM=NFaV97FfcG!3J55(xk|MtNNfqVXOR?O2mu8G6AMq9^aK{3 zWztQ~CmI?oln;Q62ZwK)B6wa9RGx#(;ubid(c#GP5427NmVZEHjW7#n-%N)ixO{qB z!tV&GPoQH2nDQYg@}P7LsuLK&Wpsc|iUR|@jE0Rbz~n#@pmGhWXF_9w2yEK18RTA& z9LR0R{x3PmaX^BR1GGedL7DA@bc;en!$F(9hZR^Lbql)s7Kr*Lg@23)^|o8#>iNNW z5>jt~+_wd$o})>DID4elne9D`<&YggocZ1}Pt5?t-)>AnpRCDNy_@JI2f+z>w;|a^(Pr!_N!zlo`NHAy~Rxaa5B7 znl2N9lLP{oKpjFMgBhtDEwHgUPCcO%=ulKA-;{(MiXKN2ra4HgxS`G|&}1z%Cyzrw zk(ucMlUKVmlLmN{9h5erp=CGBZ37r)GY$;qFl*w%8D>i0vK|tjbWLaQ_6=(Kf}HgQ zanAsxFLTfY4szN8wF^LD#Qo?2@0|s_HxhWSDDYl*z;k8+&xr({BMLkRaE%Kk925J( zD8RuSD8W8SgZr`tzlf)BXr%bGOzG>D@)9kL5Og^oh1fHvEwD(r z^svo=sX4OkFozot7pP-3_r$_ksuq5oB|8)sfF?5>n*^JgI2;Ws)Qe}NaZXCskj>-J zF_@7WF(u*10??AOdz`XsBAZ;9lfZQxs6W-L+pMye5i%YK@&`9hk^qy!K2XuaP^iGb zu#JglsfDXicSpdDCY~PM8wNL<_-5&H2;OQETA}Ol@m7=AHrdt7q z(ET(^rA_tUGK!B7fpIo^*9c{Y%*G`XHopB z$z-)&#OGH{W}Edo7Qb$?I;eM|`Aw7Ec|DGlw@prW^*mhOHMzaktN8M+$>X!$jFR_F zUcdEre0ksG)24sp%8w?$Df&NJel`Wp*SARh)fBo`KST6)Q}|Z>j;X(!BKPXA5dG5> zeM0}l*FR0M*Ysa3{o54(P@kjiUsK|HeT%gJO{stNE8IAmGrJ9TsBt#u&NTSJ#?@T7 z+`!^FS99rBgNoza&6S4@R+RHJSD!XG@tvo+_Oii`<-E;}j1LVgBpUddTN&RQR7fQ7 zH+M4rHP|8HAko~**lqYi;)6tUKjUOW4#@(^=82564K*Y`NH$MqTxRGYc|odqD&rQz z3ds%9%`+Jf8SaprAk#dT@q*zCNe9{H`HVLVIiwEAHZNp+V5lMGAlJN@@wK6cl!AQo zQpTT#8B!PIo0l`T7oaM8PX55o3}F_GU|{{&}rVuc*`nj5keQ$Y1bn{?GW( zghM01uep)wrHO^c2fyZ4rmrRy8XNqZJDD0ycW4wuH1{&~n*Pu*h-{w7G}BZ=lOd{k zGSd=M56ulx%~P2+nPzA{h;E+FbjY+r^Fd7WOr~3=J2V;Mn&&b-GyS1?Ag*~K(i6UTA&DYF^K@*^EQ`Lw55< zrqgB`+68&do0*!;J+wbmHg9FxZJwcBP}{tn>9cu<_J@wq6jnGRcd=zLh)e3a?8RfbN%&gSDx zo2@%^KI~~e$@JQKg-*eV=F?1zZBFQXINf}f>9)-aor3et=b0wka_D}z)qIiZw5^72 z!M*0oOwD#4x*uLPUuD{Dm!Vtmw)r~KXS)vF4?mi3GOe~>p?omS76cdnrX4~3B3>2EpM4_JHOB?uy1+KG}(ni|ASY{N2b#*8u|r(EuWd1 zT|M+aM7Dfo+U=U5Ul7~!o$0e{hyI6*mY+=9+*ar>$Z7e_bj9t2{(`)gzf9lUUg$5V zX!*}H#ht@qK~+m5^EP)4iwQL?&CEC4JuEKNwX`z-bI-64=xk|cp6}6N;n3aE$$Z3P zg#|-jOE>dZj}sOPX0-G&PxX9ZAuzk8pZS<4hvkO3Efblad1_cHtZ13cJk86)a>1&W zsm!~*Dl7}uw#;CD?KQ*l!}^w4%$?plEH~_GnZtbC`-bI*y)E;YKY0JJR5;nPfO(yd zgjK?smPO3heJrddoNrmeJjFM{YQwFTWy}YCJFE=uwX9_R=exp6;bqHe<~e>RtP)>n`T&0XwW2_*?fdHwE6X7LaP)$Gj`>hjoK&>jCEXff6oMlZ!4)sjUQRoi#w zE6F?T1w7mSFrP}fVP6o@_K*2M>JR%3@ofz(o6;maKIFAEv8+tD@JOg@YhhWC5#e#6 zx2=t3T4sfZ!Q8eEmfoxx9v{}Wb+N3?-r-TOudRn=ZVrd1!`ZfpEFHNTo*!)|=!RohgS+j$wD20z=TvmDRw@ci(d!m!+rZh35p_ z_W3M}iaERrY}*&Ibd*GR1^BctWBFfF;dLOoeFe+6(ivU?+3l-XUX<q+%xvGla-mYf`@xF#O)Mv?EW9`DY~RB2vO2=s;8goImYX#l-XCtX z?_@bryTW_H{r24~>+4Q<7rblV%d)ush4+EK?fY4#H867ceBhC2ILOl8$f)7NAS2Om znB`9+qleE0ZHb1XEWerbvhBGV&TN!Wo zOo*3gILES~jq!)iggl9c3oOgp86|uf>LeO2vCQdUwD2wHm1wxaGNF?(!gs+OiH2(| z?Olu&z6;h!G~8h6>}H(do3KZs;TB6v591EshSL%acUWHZFuw3jxFykWpXEU>BZuFE zml6#RS#I<(YWNlWlxTR&a=D+;!!LkSvf(Mqr3s7~ehQM34bNH5Ph{-yd!QlN@RH@! zB*qnf4wjM)uUT$QW<25d!Beu~EzA8Wj4%8qL`pWiXE{HWQNq6AItaIj6eJXPD?g4uzr}sC=n5GOR}Mf_1auUj|hjCk`1k_=jJhHL>%}b z+0f2@%xWIPeUpdr=J%er?F4w#;&6^l+L@m&eZdk`UaT6m)bc3dJ!$#J{n;0#k7idX0 zY+>ELi7_KuKv%k9JL{WGj4PrSSV}kSW<9u>@kDfiopi%q*4{0Q5-|^aq#F*gzS_d* z5u@NQ-EfTc*A~W(7=Z}shLf!ATN!u6Jcy8PILkU~E8~xtged8T3#{9>GHS#c#7H+> zVLiWR-i8F2wU(haXz`*$*)h&wP-y5Sw`%AJfH@eH%U;}M`nZYmE0!$%j z^Q;bJF))Doj6Mtu450Co2#`Vs1_ltV!py+1!y6)SEQ6^GFiVu)`VKhh_q&@?x9!7)2LFzl8;xHN}z5*%^qhaDFpyDtZCjNqn zfkB>ufdNLt#Mzl4?uXGJagcjtm?7$6G)Nre9&4z27!48!sSkjvhtVK$kotV6dKe87 z2dQs?s)x}aagh4OQ1vhxBo0!)1F9ZIgTz7VuQM|+h%qoQz-W*-Nc~%=dKe872dU>_ zfvAVkAaRg-B^HQ(VKhh_q}~y#9!7)2LF&Vy>R~iU9HhPssvbsz#6jwNpz2{XNF1bo zH46iSI0FL%j0TB=)E|PXhtVK$kot#E^)MPF4pRREsvbsz#6jxCSs~`bXplHay&fyX zzc3mk4pQ$6RS%;<;vn@YtdR5!qe0>z^(|0w7!4C&0u_hRF!3W$aTpB~e*zVU(J*lq zHb{KIXplI_JyL8C_rPe7ILJL#Q1vhxBo0y^096m8LE<3w1yJ=c8YB)<-v(6=qe0>z z^~<2@VKhh_q<#-nJ&Xp4gVf()gM>ef28n~ze}<}u(I9bKmczVKhh_q<#ieJ&Xp4gVgVUios*w!^HPO#bGo|{4P`+M#IGaLd9V;Ok9==;(izn6Sswm!)TazEL0pu!^CT$ z;xHN}J{KwuqhaEEq2e$aCVrO-QZB)0nD`$qNIeCkLE@nB6y%1ehtVK$PV2TR~iU9Hf2$4R~iU9HgFy z7h*n)28n~zEAvA93!_2eAoWgA^)MPF4pJY<3rWu~8YB)G)NqzJ{77S zMuWsb>Z_sZVKhh_q<%J3J&Xp4gVb+?s)x}aagh3pe30;m(I9b<`WI03Fd8HdQqRE; zQ4ga*;vn_%{1E@bXplHay**Stj0TB=)Q3UU!)TB=NPQVpJ&Xp4gVgsz)x&6zI7t0k zs5p#?bVdA<_aTpB~_l1hXXqb2|R2)Xb#CxIQFd8Pl zRsa&7Fd8Hd^6wF-dKe872l@92R6UFaiG$SthN_3rAaRg-X+enjFd8HdQg0*(@h^-9 ziG$P!LDj=(kT^(vCR9C)28n~zcS6NsG)#OYR2)Xb#7{!SVKhwqB~%f1%qLBE4(J=8CQAm3O zMuWsb;aLb(52HcipzxdkRS%;<;vn@)q3U5YNF1d82vj|c28n~z--N1%(I9b<`X5mB zFd8HdQqL;}@fVB+iG$SZib2vRj0TB=)H_4f!)TB=NPRL?J&Xp4gVa|-)x&6zI7t0W zsCpO;5(lYY4^KmczVKhh_q<#TZJ&Xp4gVb-8fP^!Q z28n~zUzLEQ9~cc12dRGzRS%;<;vn_hk`VPU8YB)R~iU9Hc%R zsvbsz#6jxIq3U5YNF1cT7pfjcgTz7V*Fwc%G)(*~R2)Xb#NR^2VKhvfR|?{O7!4EG zg^I&yn7A)g97e;$bD`oe8YbQ=1xXJu8YaF*3R1trXplI_zlWjfVKhh_-aZq?l$wJh_XplH4JT0N>VKhh_ zq~0H@9!7)2LF)6M>R~iU9HhPlsvbsz#6jv8L)F7*kT^*FPFYAe!)TB=Nc~M&Ncw@% zAaRiT4^Z_m8YB)Zd~0!)TB=Nc~o*IE;pgUxkXpXqfm{s5p#7!4B-g^I&y zn0P5v97e;$r^-Xp1B`}=Z;^+j9~cc12l@9DR6UFaiG%$65~?0XgTz7V|3lTo>Hq)# zgVf6@K+K2HF!d%15cj}nkT^(vFjPH^28n~r&s2bf4~zzhgVc8_K+*$@28n~zFMz6t z(I9b<`U6n)Fd8HdQhyz)9!7)2LF&Il)x&6zI7mI8BE&r~8YB)qhaE#N)Y$MXqdPvR2)Xb z#9fsb81xtz7+^F^ycjADqhaC;q2e$aCVn0&4x?e>f1%V!9A}VKhwK87dB=Vd4c)aTpB~p9>X-(J=8-P;nRy6aNkshtV)`6+MW1 zU^GlT5GoF%Vd8aAaTpB~Ukw$9(J=8FP;nRy6KB)W#BI$X=EG>1cp_9ijE0H#LDj=(nD{2BdKe88zYkRpqhaFA77+Kq zXqdQx1;jlt8YUhBRS%_z5k`Z=LGiK|Y7UGBiG$+h71SIU4HM_H zgt!w%!^EvEA?}3HF!6Y(dKe88?}4g^(J=81Q1vhxCVm&H9!A5&8Lc4hfzdE=Ju8TN zU^GlT2&x`N!^G>M>R~iYd@)o#jE0F{fU1YlF!3)?^)MPHu4oN$4~&M1yIMoy1xAC! zLGco64e=L@28n~>r3-2fjE0G?gQ|zoF!9^gko*OsVdDR+85k597#LtQNE~FZybS|` zECT}rjE0HZ*f21NFfcH{Xqb3_4J2Q~XplI_oi#QP|H5dP_yQY>{47*GjE0GS zgsO+pFmX9s1_m|Inp!9g6L+y?U;y122%}-*#kLF#>I@7FFd8O4%a(z`fPsMlM#IEU zLDj=(nD|GidKe88SG0q;2S&rh-R&UmfzdGWGCPQSU^Gm8t{ucZFd8O)7OEab!^FQp z)x&6*xQaa_-eELM+}$1$?=Tu94vO~-dq}*)Xqb4bJtW>?G)#P^JtW>?G)Np2??>$+ z@eZS5;xFwX@eZS5;$jXE^)MPHZsh<`52In?=?;)^hS4zbUI$1x!)TcJ9tTJ`!)TcJ zV+Tk$!)Tbeup>l0jE0F@IYQLKXqb4GBg8#08YVu$5#k;g4HMrFH6KR9#GgUUhtV)` zF(-(7U^GnJ)(PSs7!4E8b%Mk@jE0F%aDv1;j0TB=;(enNB;H{(O#GY^B;H{(O#G1( zB;H{(NE{UJe9n+~gwZf@BWFlF!f2RyJXAf5hKV;r)x&6*_y%W4IKya|_%&xpIKya| zII9aJ++Z|JT*n0xZZH}q9s^YmqhaFBQ1vhxCcfDP;vN_c6Tj&KaSx1!iF3F@+ykRw z;`*)-_rPeFcsx`+jE0G~LDj=(nD{nVNch8OnD{MM1_sc6aTpB}2gUmzSIB+@7!4Db za)ZpP!f2Sdz8fSSVKhh_6z?%^3=E(%7+^F^ywMFZ?g67=;_IR6VKhwq3RFFehKVz| zL&6P4!^Ab+A>jt2Vd9bQ3=A3!3=A+DCf?}Iz+lM0zyPCR;+vrAVKhwq7F0cqhKX}~ zK->ePVd6#}5cj}nn0S&0#62(?Cf?-%aSx1!iSLH0htV+c$58b!8YV8{35jJ<1aj?=TuBUf~IecNh&5@AZVlJB$X2gW`RcCnVlsG)(-yCnVlsG)$b= z3!)xI!^DleAnIW>OgzC063#FhCf@D^31=7$6W{Iy31=7$6Tj~T31=7$6BqD?sE5%o zaSLyVdKe88&-8}42S&rhCwN2L1EXQ$2cYJ|Xqfm5sQEA&CNAj%aSx1!i97m0+ykRw z;zd4?avny*#Hac|;t@uJ#6j`C$_G+z!)TB=C?2o*K++G4hKc|3fy6tEhKZ~DLe#@( zn7FquL_Lg#iC6eS%1Iav6QAb`DJNkxO#G}bB%EP1O#F*4B%EP1OkBkeq8>)W#J&9> z>R~iYyxI@q9vBT1U+4#M4~&M1Ux1nqqhaFTq2|MAn7Fz>#62(?Chq4CaSx1!iP!l< z;t@u}#25NQ;t@uJ#6j`6%^wnvFd8HdipNL(ka&U7Fmbj3NW8#kn7DBOL_Lg#iHAeg z!)TazO8_L?U^Gm8RRAR1U^GnpW&k7{U^GmeArKM{Fd8Oq5C~BZqhaFVQ1vhxCf*(h zaSx1!iLVWWxCchV#P2}OhtV)`<{*fBU^GnJI0)h%7!4DTf~tqnF!8P+NIb%5nE2Wt zNIb%5kT@tF4+TNu5k`Z=LGk!52of(a8YV6j42ee=4HLHyhNy?pF!5xldKe88pAZZQ zHy8~Q-xdrBHy8~Qe;N!42N(?#=Lvy?8;pjD+k`;W!)TazGE_Z`hKWxOfw%`o!^C%n zK->ePVd5{K=EG>1IDaU_Jun(3ZXXJ94~&M1r$N=jXqfo4P)K;fXqfn}P{@2ej0TB= z;_*T#Bwk=NNE{T8e?uYl9gK#FD}_PoI~WZU4+w*(htV+cQmA?u4HI7+2Jsh+hKZjF zgMR~iYd`&pSJun(3emNZC9vBT1 zXO4iF52In?+7S@R~iYd=^wajE0Gyj)J%cM#IEEMM2yHqhaDo(Gd5*XqdQH zG{ikH8YW&1RS%;<9lJ3^EK13@{ob4hm<>ILKan z7!48!nePQv52HciAoXcbaTpB~Z-SZwqe0>zb0$O8!)TB=$ei_1cfx3x_=z|M2GE(0 zFd8HdGUpmpJ&Xp4gUopk6^GFgI2q`d^AVd7~CkaP&6Vd9ku4B+z?U^GY^ z*28o00 zT>(`Oqe0>z^(Ud~VKhh_r2aNkJ&Xp4gVg_lioWiV~!)TB= zNc~i(`7jzJ4pP4|4YDp5MuWsb>Q6w;htVK$Pq97e;$L6@+A&U=B;F!5%ndKe88p8*wz(J=9~ zP;nRy6F&qMhtV+c>rinR4HJI@6^GF`=H`58YX@@gMk5b?m3Kxi9gC< zU;v$q4WnV=pP=F}8Ya$^2{8vo!^8!l;xHN}u8;{aA4bE(4WQ~_G)&w+6Vk4L(J=AQ zOvpMA7!4E8$%N!z7!48!<(CGidKe88pANMbMuWsb`Dj@tq}>RkVdC4N=EG=^ILQ3d zP;+23NE~F&EvPt*hKaw-grqkZ4HN$d^)HNuiHl@G!U0CZ#I>N}Fd8QAkOc`J7!4B- zhl;~!n0Ox492gA}2l=Z7Dh{Jz;@=5DgLs9dHF= zgXWK5G)SC_g@M7E0n#sq(I9b9!c1U*^eASXWF+21$7 z)5kHy)88-N*Wbl8J}Sz`&^$gqBeTFgwIm?3A~nY`C8a1cGt<@5Ex#x?vBW1d1dk#M z|Du%CB9Iykt2~3_!#(|6{KJEDa>{b`N{SK-z>J*CwA7N!+*G~dlEk7Cy#muri}?8b zBX2{PcAEo zFUpHgGc=1&Gd98@UqwJZmwOh!rKt)WNZ2*u^O3O;Bor`g z!sjKNn(+C_%miYS1JTx8P zl!wMRPI+kD;*^I*qOnnYlp$tpRAu7GYPpcO$Du6`m$m}D+6r)KgH}5DDm_pn4C&2SPy17j!P3HlCf*T=ObgIcb84~;FH^3Z6(DW3=GSc1lNkj7M!GZKqH zT}v0-)l9Jkc(4XjQx#54xwvi0!>OqNx26J|n)2d7oga{=Fb3A3r8$22Tmt$F2*~Hf zgE}C1?FS9&;H;x^3Ft2%AfFdsY-R$EADsTjE1ye1e*pn`NSXo{An%`%yf#!N1(KWT^Bw-;ns!E zSGaZI^A{+c;;h4<=@h3tG@asjlU2kHlcsuQFl zp%j)-an^!Wcy!^brAuK|J3I-&?7~+~n|W6pBE2m zuHx2TML<56fP4V~`Mmg2y!CJu0r^}4@&yFsA?XUWf`Y|$D(-3x6xG-@;i=VdYr<2h z;nswwP6MYIu>0UW(^6OkiqBV|f(eHP&tURK7*!Lobu50ic=n% zUUAAp(<@GSXnMsd4^6K)<)P^nr#vLRqE=M!^oqNB1EnJDn()+axHaLa+;D5cQ@4TB zE7*Nl(kmWcfl?3-ZTS3!M;kt$;n9ZAZ=m#wvtEOySDfNO5SvU0DTcY4Sd$Tngb0%nX@rnsSkeYZBO%2wHxs287SKc~hJ`aY zRtPx*=4PT4!`w`iVwjtWQVer5Co#Vc`sl6oSrxxtS=%FgFvW80Kc86vNyMia}WYl9GyLl@reT1-ELL ztBE!no;(P<1m06QvmDW}+0s+zgHtV%WDPtm{D5;#3PU4kQgyMvUw5szn$FQbw$CIMqUpgQYw0#+A(6#Prno;>6s7oYahX zBNxp1VU*;EW(4{k9gtdt5}Xc!Cr2XG8k3M5iBOABLfCagsD&s2MK%cIk0Bz|B9sv8 zI*_$E)k2H|NrRLT<2t-*5ypX(5o;VywNT^mBu8UH$q~&6Y{?O!1gAsb$&m=PiICD3 zG;>8bOo>s9q=v8qiBSwy19BY*Bg%z%6(bo4Qb)9b zI2FST#FI7+T@neDmFR|GOPxq+a5@H_Jc&`92q{vDOrJz3Mlz5H7a|PAsTgV?DCR+! zKx9D-1SteztcsBg#Azua3=sz6R17r`B#+ZlbOZ4!Mlukj4y!M)8HiIc%s@Qp(}-~T zgaifNwk489I30uGa$K#;;?kUy_#{wgCCMTlw5b-lLl)8oNzO>j%S+9PPf09Ej4!Dy zNR2mi!Kxr1v~L$(GABPV1uTZ1%b=FR9ch>n54RnMT96V$GpGx3s6jE0kYbR5#zrI< zNJufL5;7v_4`er%5mpX0l#pl8O(morYAhiyp&Ls`J;+!S(t?1na;Tw%yoBObllUY; z>Y>IG@)EkSgwz`$C1s?>JZ44$HS4h`fMg^zNpMC2i4jWp$hPB93sOQTY~Y?IMlnbY zp-@3FkdR_f5lA>^AR9?UMuHkjiqAo&5>gK}mXMdw9ZN_($XLQT1I1Lr%Atl5@)EkK zgw#WgCFCV^V+pA@CN?91IwUwV5}G79BZ0(lWDd9^k!{DJ7Nmqw*r1q4NHNGjLZO0U zAR)z|t`i9vi8$p@RfIf)>`-Dd64Y2iUP3pPkb01@gmVUpsf3k74J9cEh^UdE#uD-p zx?>5c2lY_ktt%`U1-WgPh_`WvDhtj_SbJRP%{vqq;K)s2HAIIfQL4d;2!#$jG>K9T zQUprPu%5j?E+GvtgGmYnkimpBfDI;`Q&0nfuzHxWg#3kJE+Gvt zg9-Ty!(c)hK%Ft-au>Mshb8~PT6|b?7g!6yGzMyb5lE0=R}u;%gx^6X5>gE|l2GWN z8c9es$VgD)Bj6bnLy1riGnSB#Fw7;S0cJ2Ee_*b&mcP#WGW%`P-6*s3Efyi>OschP2gZZp_ocoIn+=> zUP3pOkb0=GguH}qEFtxvLK&8nk*X0$Ucf#$fEp;s2H?myASHyn2X{9yia}}!c?`us zLW)7XO%n0~amt~pNbxx_c>!uHAul2OA7m^c^&n#jCvFr|2`h&hN>UKu@6&)2AJkYv zUP5;)A@!iHGS0k!y?+mO4%{AO190R8kP@PUgBZmiHH18d;y^-*K^;aC@&a+np{hvn zIVjN*QV%tjke87C4>FdJdXTY%6E}*fgq1@LCFCV^Qwgbu8cWDa=*ALK59-=r>w!Qj z1{@u9)ZBn#1di+gRzt{(aJLhs8mx$r_fU-_q#9%-DBZvc2OOS3F_Z}PFk?ybKge7{ z8ej$!@)w4K326WuOgO2dnoC$c%veJH!Z4SR2AIKw{DomKAq}9u3GVEHqlb-_JwUA( z0x1e?4Ixh=91k*)kZQ1zguI7pBq7xxBSFcMfM-w)B|<&SSVBI+Fqe=9n8Aeng<&uu z4Pb)_CwEkH39E-0OUPds<`U8XGnkOSFbu}6p(r&yJ|{6RH8TZ#j&@>hN_ujhUT#uy zUWr~#KIS@_;*7MM_|k&-B+zlaIr-(ty2=yFQuUG&i;6N+i}33#0BzSQ&dD#wp)4i8 zJP&RKAvJ^yOG?YBBxD#|4I#raQz{6V1yh2@DA=LupflAmy_cJqlar5Sw^CYBYAQC> zCCM4_iFuXr#g%!xBV=03kf(|(n9m+vI3LPqh{Umf$N$dxq=a%NgXJmrUrGZ!i z70AT42{<<~1+EyqZieau9RPu15Lf_1F~lIOa!F~a;5GUXQ{X&wBft{a90`&DFERpm zvq8K}YzwnN5};G1Fv2`3EfW$MQ2mfi+t`(1vp+GpB(p3vH?cSyoX3h%6I0@gQqwbw zOHzyUGR;h|iDGkLF)WLNjEc{}a^NAVigYY#3{^!AE)^x2DbT5-^wg60oc!d(ocNr~ zywt>^cq}r-nN_J6%0cSTqX=w!9@ucKQpqKWAZ_4q23ee5lwVo^wh^oHJaqMWrMdB7 zJy=eF2HTySlb@Uob!u{Da!zV;NfG!65scVGD90A1u(A=9QNhs%EpCf(mztn+*@_cO zz=xKB)`LM(b8c!%W@4T(SQNtza4n!S20$qS958S#AQAL%folOByMoQ-#TjYwIq}Ik ziMa)!L=AR)T5(2>9@aboQeA|px(K`Klme4Pa83m+jfD8GG$p?T%gO&>m7qKc!Vs0k z*j1M0m!v|w0s zk%AyU4_oZw3kHHZV16_7Z5}NDynG0MnIUgsCaNC?2n#g8YJH za1nuGQ%VYUU3rOl`NcV@so-N!z(p0joP`L-=O^dp#3z@P6qP`ZQ3bD{MF>L9UPTjx z9GHqG2ss56d<_YTc2Lm^%4rBe$a$w|Dk0jTr5Vggkds%@1R>hd1fkkdErM!C6@+R> z6@(n23U0h0+zB~%6-^LwSSp$z#CB+*jp}$bL5S^Wf>7#BpH+xmF6WQ@zZkh^NW!9$*Gw+NL+|+xLcsA;e3c{2sb+? z9!STCs{wJ5i=1lC|m$&Clq7RgWp zQ*z@|Qo#bq+CWXloWwk^p4^=HwA@Tc_(1%fmYY}szNP?@ryy +#include #include #define HIP_NO_HALF #include @@ -155,6 +156,399 @@ static __device__ float4::Native_vec_ __pack_to_float4(const T &t) return result; } +typedef uint32_t uint8 __attribute__((ext_vector_type(8))); +typedef uint32_t zluda_uint3 __attribute__((ext_vector_type(3))); +typedef uint8 CONSTANT_SPACE *surface_ptr; + +template +static __device__ To transmute(From f) +{ + if constexpr (sizeof(To) == sizeof(From)) + { + return std::bit_cast(f); + } + else if constexpr (sizeof(To) > sizeof(From)) + { + union + { + To t; + From f; + } u = {To{0}}; + u.f = f; + return u.t; + } + else if constexpr (sizeof(To) < sizeof(From)) + { + union + { + From f; + To t; + } u = {From{f}}; + return u.t; + } + else + { + static_assert(sizeof(To) == 0); + } +} + +enum class ImageGeometry +{ + _1D, + _2D, + _3D, + A1D, + A2D +}; + +// clang-format off +template struct Coordinates; +template <> struct Coordinates { using type = uint1::Native_vec_; }; +template <> struct Coordinates { using type = uint2::Native_vec_; }; +template <> struct Coordinates { using type = uint4::Native_vec_; }; +template <> struct Coordinates +{ + using type = uint2::Native_vec_; using arg_type = uint1::Native_vec_; + static __device__ type pack_layer(uint32_t layer, arg_type coord) + { + return type { coord.x, layer }; + } +}; +template <> struct Coordinates +{ + using type = zluda_uint3; using arg_type = uint2::Native_vec_; + static __device__ type pack_layer(uint32_t layer, arg_type coord) + { + return type { coord.x, coord.y, layer }; + } +}; +// clang-format on + +template +static __device__ void image_store_pck(T value, typename Coordinates::type coord, surface_ptr surface) +{ + if constexpr (sizeof(T) <= sizeof(uint)) + { + uint value_dword = transmute(value); + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:1D unorm" : : "v"(value_dword), "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:2D unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:3D unorm" : : "v"(value_dword), "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:1D_ARRAY unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:2D_ARRAY unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(T) == 0, "Invalid geometry"); + } + } + else if constexpr (sizeof(T) == sizeof(uint2::Native_vec_)) + { + uint2::Native_vec_ value_dword2 = transmute(value); + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:1D unorm" : : "v"(value_dword2), "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:2D unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:3D unorm" : : "v"(value_dword2), "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:1D_ARRAY unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:2D_ARRAY unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(T) == 0, "Invalid geometry"); + } + } + else if constexpr (sizeof(T) == sizeof(uint4::Native_vec_)) + { + uint4::Native_vec_ value_dword4 = transmute(value); + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:1D unorm" : : "v"(value_dword4), "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:2D unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:3D unorm" : : "v"(value_dword4), "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(T) == 0, "Invalid geometry"); + } + } + else + { + static_assert(sizeof(T) == 0, "Invalid vector size"); + } +} + +template +static __device__ T image_load_pck(typename Coordinates::type coord, surface_ptr surface) +{ + if constexpr (sizeof(T) <= sizeof(uint)) + { + uint data; + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry"); + } + return transmute(data); + } + else if constexpr (sizeof(T) == sizeof(uint2::Native_vec_)) + { + uint2::Native_vec_ data; + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry"); + } + return transmute(data); + } + else if constexpr (sizeof(T) == sizeof(uint4::Native_vec_)) + { + uint4::Native_vec_ data; + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry"); + } + return transmute(data); + } + else + { + static_assert(sizeof(T) == 0, "Invalid vector size"); + } +} + +template +static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates::type coord, surface_ptr surface) +{ + uint4::Native_vec_ data; + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry"); + } + return data; +} + +template +static __device__ void image_store_pck_full_with(uint4::Native_vec_ data, T value, typename Coordinates::type coord, surface_ptr surface) +{ + // We avoid unions for types smaller than sizeof(uint32_t), + // because in those cases we get this garbage: + // ds_write_b128 v2, v[5:8] + // ds_write_b16 v2, v9 + // ds_read_b128 v[5:8], v2 + // tested with ROCm 5.7.1 on gfx1030 + if constexpr (sizeof(T) == sizeof(uint8_t)) + { + uint32_t x = uint32_t(std::bit_cast(value)); + uint32_t data_0 = ((data[0]) >> 8) << 8; + data[0] = data_0 | x; + } + else if constexpr (sizeof(T) == sizeof(uint16_t)) + { + uint32_t x = uint32_t(std::bit_cast(value)); + uint32_t data_0 = ((data[0]) >> 16) << 16; + data[0] = data_0 | x; + } + else + { + union + { + uint4::Native_vec_ full_vec; + T value; + } u = {0}; + u.full_vec = data; + u.value = value; + data = u.full_vec; + } + image_store_pck(data, coord, surface); +} + +constexpr auto IMAGE_RESERVED_TOP_BITS = 3; + +static __device__ surface_ptr get_surface_pointer(uint64_t s) +{ + return (surface_ptr)((s << IMAGE_RESERVED_TOP_BITS) >> IMAGE_RESERVED_TOP_BITS); +} + +static __device__ surface_ptr get_surface_pointer(struct textureReference GLOBAL_SPACE *surf_ref) +{ + return (surface_ptr)(surf_ref->textureObject); +} + +static __device__ uint32_t x_coordinate_shift(uint64_t s) +{ + return uint32_t(s >> (64 - IMAGE_RESERVED_TOP_BITS)); +} + +static __device__ uint32_t x_coordinate_shift(struct textureReference GLOBAL_SPACE *ptr) +{ + uint32_t channels = uint32_t(ptr->numChannels); + uint32_t format_width = 0; + hipArray_Format format = ptr->format; + switch (format) + { + case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8: + case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8: + format_width = 1; + break; + case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16: + case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16: + case hipArray_Format::HIP_AD_FORMAT_HALF: + format_width = 2; + break; + case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32: + case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32: + case hipArray_Format::HIP_AD_FORMAT_FLOAT: + format_width = 4; + break; + default: + __builtin_unreachable(); + } + return uint32_t(__builtin_ctz(format_width * channels)); +} + +template +static __device__ T suld_b_zero(Surface surf_arg, typename Coordinates::type coord) +{ + surface_ptr surface = get_surface_pointer(surf_arg); + uint32_t shift_x = x_coordinate_shift(surf_arg); + coord.x = coord.x >> shift_x; + return image_load_pck(coord, surface); +} + +template +static __device__ void sust_b_zero(Surface surf_arg, typename Coordinates::type coord, T data) +{ + surface_ptr surface = get_surface_pointer(surf_arg); + uint32_t shift_x = x_coordinate_shift(surf_arg); + coord.x = coord.x >> shift_x; + if (shift_x <= __builtin_ctz(sizeof(T))) [[likely]] + { + image_store_pck(data, coord, surface); + } + else + { + uint4::Native_vec_ pixel = image_load_pck_full(coord, surface); + image_store_pck_full_with(pixel, data, coord, surface); + } +} + extern "C" { #define atomic_inc(NAME, SUCCESS, FAILURE, SCOPE, SPACE) \ @@ -620,179 +1014,101 @@ extern "C" suld_b_a2d_vec(_v4, b32, uint4); // suld_b_a2d_vec(_v4, b64, ulong4); -#define sust_b_1d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_1d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int1::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_1D(i, byte_coord, tmp); \ - } \ - void FUNC(sust_b_indirect_1d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int1::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - surf1Dwrite(hip_data, surfObj, coord.x); \ +#define SUST_B_ZERO(TYPE, GEOMETRY, HIP_TYPE) \ + HIP_TYPE::Native_vec_ FUNC(suld_b_indirect_##TYPE##_zero)(uint64_t surf_arg, typename Coordinates::type coord) \ + { \ + return suld_b_zero(surf_arg, coord); \ + } \ + void FUNC(sust_b_indirect_##TYPE##_zero)(uint64_t surf_arg, typename Coordinates::type coord, HIP_TYPE::Native_vec_ data) \ + { \ + sust_b_zero(surf_arg, coord, data); \ + } \ + HIP_TYPE::Native_vec_ FUNC(suld_b_##TYPE##_zero)(struct textureReference GLOBAL_SPACE * ptr, typename Coordinates::type coord) \ + { \ + return suld_b_zero(ptr, coord); \ + } \ + void FUNC(sust_b_##TYPE##_zero)(struct textureReference GLOBAL_SPACE * ptr, typename Coordinates::type coord, HIP_TYPE::Native_vec_ data) \ + { \ + sust_b_zero(ptr, coord, data); \ } - sust_b_1d_vec(, b8, uchar1); - sust_b_1d_vec(, b16, ushort1); - sust_b_1d_vec(, b32, uint1); - // sust_b_1d_vec(, b64, ulong1); - sust_b_1d_vec(_v2, b8, uchar2); - sust_b_1d_vec(_v2, b16, ushort2); - sust_b_1d_vec(_v2, b32, uint2); - // sust_b_1d_vec(_v2, b64, ulong2); - sust_b_1d_vec(_v4, b8, uchar4); - sust_b_1d_vec(_v4, b16, ushort4); - sust_b_1d_vec(_v4, b32, uint4); - // sust_b_1d_vec(_v4, b64, ulong4); - -#define sust_b_2d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_2d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int2::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_2D(i, int2(byte_coord, coord.y).data, tmp); \ - } \ - void FUNC(sust_b_indirect_2d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int2::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - surf2Dwrite(hip_data, surfObj, coord.x, coord.y); \ +#define SUST_B_ZERO_ARRAY(TYPE, GEOMETRY, HIP_TYPE) \ + HIP_TYPE::Native_vec_ FUNC(suld_b_indirect_##TYPE##_zero)(uint64_t surf_arg, uint32_t layer, typename Coordinates::arg_type coord) \ + { \ + auto coord_array = Coordinates::pack_layer(layer, coord); \ + return suld_b_zero(surf_arg, coord_array); \ + } \ + void FUNC(sust_b_indirect_##TYPE##_zero)(uint64_t surf_arg, uint32_t layer, typename Coordinates::arg_type coord, HIP_TYPE::Native_vec_ data) \ + { \ + auto coord_array = Coordinates::pack_layer(layer, coord); \ + sust_b_zero(surf_arg, coord_array, data); \ + } \ + HIP_TYPE::Native_vec_ FUNC(suld_b_##TYPE##_zero)(struct textureReference GLOBAL_SPACE * ptr, uint32_t layer, typename Coordinates::arg_type coord) \ + { \ + auto coord_array = Coordinates::pack_layer(layer, coord); \ + return suld_b_zero(ptr, coord_array); \ + } \ + void FUNC(sust_b_##TYPE##_zero)(struct textureReference GLOBAL_SPACE * ptr, uint32_t layer, typename Coordinates::arg_type coord, HIP_TYPE::Native_vec_ data) \ + { \ + auto coord_array = Coordinates::pack_layer(layer, coord); \ + sust_b_zero(ptr, coord_array, data); \ } - sust_b_2d_vec(, b8, uchar1); - sust_b_2d_vec(, b16, ushort1); - sust_b_2d_vec(, b32, uint1); - // sust_b_2d_vec(, b64, ulong1); - sust_b_2d_vec(_v2, b8, uchar2); - sust_b_2d_vec(_v2, b16, ushort2); - sust_b_2d_vec(_v2, b32, uint2); - // sust_b_2d_vec(_v2, b64, ulong2); - sust_b_2d_vec(_v4, b8, uchar4); - sust_b_2d_vec(_v4, b16, ushort4); - sust_b_2d_vec(_v4, b32, uint4); - // sust_b_2d_vec(_v4, b64, ulong4); - -#define sust_b_3d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_3d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int4::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_3D(i, int4(byte_coord, coord.y, coord.z, 0).data, tmp); \ - } \ - void FUNC(sust_b_indirect_3d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int4::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - __HIP_SURFACE_OBJECT_PARAMETERS_INIT; \ - int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_3D(i, int4(byte_coord, coord.y, coord.z, 0).data, tmp); \ - } - - sust_b_3d_vec(, b8, uchar1); - sust_b_3d_vec(, b16, ushort1); - sust_b_3d_vec(, b32, uint1); - // sust_b_3d_vec(, b64, ulong1); - sust_b_3d_vec(_v2, b8, uchar2); - sust_b_3d_vec(_v2, b16, ushort2); - sust_b_3d_vec(_v2, b32, uint2); - // sust_b_3d_vec(_v2, b64, ulong2); - sust_b_3d_vec(_v4, b8, uchar4); - sust_b_3d_vec(_v4, b16, ushort4); - sust_b_3d_vec(_v4, b32, uint4); - // sust_b_3d_vec(_v4, b64, ulong4); - -#define sust_b_a1d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_a1d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, uint layer, int x, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1Da(i), __ockl_image_channel_order_1Da(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_1Da(i, int2(byte_coord, int(layer)).data, tmp); \ - } \ - void FUNC(sust_b_indirect_a1d##VEC##_##TYPE##_trap)(uint64_t serf_arg, uint layer, int x, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - __HIP_SURFACE_OBJECT_PARAMETERS_INIT; \ - int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1Da(i), __ockl_image_channel_order_1Da(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_1Da(i, int2(byte_coord, int(layer)).data, tmp); \ - } - - sust_b_a1d_vec(, b8, uchar1); - sust_b_a1d_vec(, b16, ushort1); - sust_b_a1d_vec(, b32, uint1); - // sust_b_a1d_vec(, b64, ulong1); - sust_b_a1d_vec(_v2, b8, uchar2); - sust_b_a1d_vec(_v2, b16, ushort2); - sust_b_a1d_vec(_v2, b32, uint2); - // sust_b_a1d_vec(_v2, b64, ulong2); - sust_b_a1d_vec(_v4, b8, uchar4); - sust_b_a1d_vec(_v4, b16, ushort4); - sust_b_a1d_vec(_v4, b32, uint4); - // sust_b_a1d_vec(_v4, b64, ulong4); - -#define sust_b_a2d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_a2d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, uint layer, int x, int y, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2Da(i), __ockl_image_channel_order_2Da(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_2Da(i, int4(byte_coord, y, int(layer), 0).data, tmp); \ - } \ - void FUNC(sust_b_indirect_a2d##VEC##_##TYPE##_trap)(uint64_t serf_arg, uint layer, int x, int y, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - __HIP_SURFACE_OBJECT_PARAMETERS_INIT; \ - int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2Da(i), __ockl_image_channel_order_2Da(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_2Da(i, int4(byte_coord, y, int(layer), 0).data, tmp); \ - } - - sust_b_a2d_vec(, b8, uchar1); - sust_b_a2d_vec(, b16, ushort1); - sust_b_a2d_vec(, b32, uint1); - // sust_b_a2d_vec(, b64, ulong1); - sust_b_a2d_vec(_v2, b8, uchar2); - sust_b_a2d_vec(_v2, b16, ushort2); - sust_b_a2d_vec(_v2, b32, uint2); - // sust_b_a2d_vec(_v2, b64, ulong2); - sust_b_a2d_vec(_v4, b8, uchar4); - sust_b_a2d_vec(_v4, b16, ushort4); - sust_b_a2d_vec(_v4, b32, uint4); - // sust_b_a2d_vec(_v4, b64, ulong4); + SUST_B_ZERO(1d_b8, ImageGeometry::_1D, uchar1); + SUST_B_ZERO(1d_b16, ImageGeometry::_1D, ushort1); + SUST_B_ZERO(1d_b32, ImageGeometry::_1D, uint1); + SUST_B_ZERO(1d_b64, ImageGeometry::_1D, ulong1); + SUST_B_ZERO(1d_v2_b8, ImageGeometry::_1D, uchar2); + SUST_B_ZERO(1d_v2_b16, ImageGeometry::_1D, ushort2); + SUST_B_ZERO(1d_v2_b32, ImageGeometry::_1D, uint2); + SUST_B_ZERO(1d_v2_b64, ImageGeometry::_1D, ulong2); + SUST_B_ZERO(1d_v4_b8, ImageGeometry::_1D, uchar4); + SUST_B_ZERO(1d_v4_b16, ImageGeometry::_1D, ushort4); + SUST_B_ZERO(1d_v4_b32, ImageGeometry::_1D, uint4); + SUST_B_ZERO(2d_b8, ImageGeometry::_2D, uchar1); + SUST_B_ZERO(2d_b16, ImageGeometry::_2D, ushort1); + SUST_B_ZERO(2d_b32, ImageGeometry::_2D, uint1); + SUST_B_ZERO(2d_b64, ImageGeometry::_2D, ulong1); + SUST_B_ZERO(2d_v2_b8, ImageGeometry::_2D, uchar2); + SUST_B_ZERO(2d_v2_b16, ImageGeometry::_2D, ushort2); + SUST_B_ZERO(2d_v2_b32, ImageGeometry::_2D, uint2); + SUST_B_ZERO(2d_v2_b64, ImageGeometry::_2D, ulong2); + SUST_B_ZERO(2d_v4_b8, ImageGeometry::_2D, uchar4); + SUST_B_ZERO(2d_v4_b16, ImageGeometry::_2D, ushort4); + SUST_B_ZERO(2d_v4_b32, ImageGeometry::_2D, uint4); + SUST_B_ZERO(3d_b8, ImageGeometry::_3D, uchar1); + SUST_B_ZERO(3d_b16, ImageGeometry::_3D, ushort1); + SUST_B_ZERO(3d_b32, ImageGeometry::_3D, uint1); + SUST_B_ZERO(3d_b64, ImageGeometry::_3D, ulong1); + SUST_B_ZERO(3d_v2_b8, ImageGeometry::_3D, uchar2); + SUST_B_ZERO(3d_v2_b16, ImageGeometry::_3D, ushort2); + SUST_B_ZERO(3d_v2_b32, ImageGeometry::_3D, uint2); + SUST_B_ZERO(3d_v2_b64, ImageGeometry::_3D, ulong2); + SUST_B_ZERO(3d_v4_b8, ImageGeometry::_3D, uchar4); + SUST_B_ZERO(3d_v4_b16, ImageGeometry::_3D, ushort4); + SUST_B_ZERO(3d_v4_b32, ImageGeometry::_3D, uint4); + SUST_B_ZERO_ARRAY(a1d_b8, ImageGeometry::A1D, uchar1); + SUST_B_ZERO_ARRAY(a1d_b16, ImageGeometry::A1D, ushort1); + SUST_B_ZERO_ARRAY(a1d_b32, ImageGeometry::A1D, uint1); + SUST_B_ZERO_ARRAY(a1d_b64, ImageGeometry::A1D, ulong1); + SUST_B_ZERO_ARRAY(a1d_v2_b8, ImageGeometry::A1D, uchar2); + SUST_B_ZERO_ARRAY(a1d_v2_b16, ImageGeometry::A1D, ushort2); + SUST_B_ZERO_ARRAY(a1d_v2_b32, ImageGeometry::A1D, uint2); + SUST_B_ZERO_ARRAY(a1d_v2_b64, ImageGeometry::A1D, ulong2); + SUST_B_ZERO_ARRAY(a1d_v4_b8, ImageGeometry::A1D, uchar4); + SUST_B_ZERO_ARRAY(a1d_v4_b16, ImageGeometry::A1D, ushort4); + SUST_B_ZERO_ARRAY(a1d_v4_b32, ImageGeometry::A1D, uint4); + SUST_B_ZERO_ARRAY(a2d_b8, ImageGeometry::A2D, uchar1); + SUST_B_ZERO_ARRAY(a2d_b16, ImageGeometry::A2D, ushort1); + SUST_B_ZERO_ARRAY(a2d_b32, ImageGeometry::A2D, uint1); + SUST_B_ZERO_ARRAY(a2d_b64, ImageGeometry::A2D, ulong1); + SUST_B_ZERO_ARRAY(a2d_v2_b8, ImageGeometry::A2D, uchar2); + SUST_B_ZERO_ARRAY(a2d_v2_b16, ImageGeometry::A2D, ushort2); + SUST_B_ZERO_ARRAY(a2d_v2_b32, ImageGeometry::A2D, uint2); + SUST_B_ZERO_ARRAY(a2d_v2_b64, ImageGeometry::A2D, ulong2); + SUST_B_ZERO_ARRAY(a2d_v4_b8, ImageGeometry::A2D, uchar4); + SUST_B_ZERO_ARRAY(a2d_v4_b16, ImageGeometry::A2D, ushort4); + SUST_B_ZERO_ARRAY(a2d_v4_b32, ImageGeometry::A2D, uint4); __device__ static inline bool is_upper_warp() { diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 61a74c9..1085258 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -2934,7 +2934,7 @@ fn replace_instructions_with_builtins_impl<'input>( vector, "_", suld.type_.to_ptx_name(), - "_trap", + "_zero", ] .concat(); statements.push(instruction_to_fn_call( @@ -2955,7 +2955,7 @@ fn replace_instructions_with_builtins_impl<'input>( vector, "_", sust.type_.to_ptx_name(), - "_trap", + "_zero", ] .concat(); statements.push(instruction_to_fn_call( diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 1d054c3..1f37dbf 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -1245,7 +1245,7 @@ mod definitions { pub(crate) unsafe fn cuSurfObjectDestroy( surfObject: hipSurfaceObject_t, ) -> hipError_t { - hipDestroySurfaceObject(surfObject) + surface::destroy(surfObject) } pub(crate) unsafe fn cuTexObjectCreate( diff --git a/zluda/src/impl/surface.rs b/zluda/src/impl/surface.rs index fcf9a52..0f24fa3 100644 --- a/zluda/src/impl/surface.rs +++ b/zluda/src/impl/surface.rs @@ -1,23 +1,65 @@ +use super::hipfix; +use crate::hip_call_cuda; use cuda_types::*; use hip_runtime_sys::*; use std::{mem, ptr}; -use crate::hip_call_cuda; - -use super::{hipfix, FromCuda}; +// Same as in zluda_ptx_impl.cpp +const IMAGE_RESERVED_TOP_BITS: u32 = 3; pub(crate) unsafe fn create( - p_surf_object: *mut hipSurfaceObject_t, + result: *mut hipSurfaceObject_t, p_res_desc: *const CUDA_RESOURCE_DESC, ) -> Result<(), CUresult> { if p_res_desc == ptr::null() { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } let desc = to_surface_desc(*p_res_desc)?; - hip_call_cuda!(hipCreateSurfaceObject(p_surf_object, &desc)); + // We need to check array format and channel count to set top bits of the surface object. + // HIP does not support non-Array sources anyway + if desc.resType != hipResourceType::hipResourceTypeArray { + return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED); + } + let mut surf_obj = mem::zeroed(); + hip_call_cuda!(hipCreateSurfaceObject(&mut surf_obj, &desc)); + let top_reserved_bits = surf_obj as usize >> (usize::BITS - IMAGE_RESERVED_TOP_BITS); + if top_reserved_bits != 0 { + #[allow(unused_must_use)] + { + hipDestroySurfaceObject(surf_obj); + } + return Err(CUresult::CUDA_ERROR_UNKNOWN); + } + let format_size = format_size((&*desc.res.array.array).Format)?; + let channels = (&*desc.res.array.array).NumChannels; + let pixel_size = format_size * channels as usize; + let shift_amount = + (pixel_size.trailing_zeros() as usize) << (usize::BITS - IMAGE_RESERVED_TOP_BITS); + surf_obj = (surf_obj as usize | shift_amount) as _; + *result = surf_obj; Ok(()) } +pub(crate) unsafe fn destroy(surf_object: hipSurfaceObject_t) -> hipError_t { + hipDestroySurfaceObject( + (((surf_object as usize) << IMAGE_RESERVED_TOP_BITS) >> IMAGE_RESERVED_TOP_BITS) as _, + ) +} + +pub(crate) fn format_size(f: hipArray_Format) -> Result { + Ok(match f { + hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8 + | hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8 => 1, + hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16 + | hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16 + | hipArray_Format::HIP_AD_FORMAT_HALF => 2, + hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32 + | hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32 + | hipArray_Format::HIP_AD_FORMAT_FLOAT => 4, + _ => return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED), + }) +} + unsafe fn to_surface_desc(res_desc: CUDA_RESOURCE_DESC) -> Result { let res_type = mem::transmute(res_desc.resType); let res: hipResourceDesc__bindgen_ty_1 = match res_desc.resType { @@ -26,92 +68,10 @@ unsafe fn to_surface_desc(res_desc: CUDA_RESOURCE_DESC) -> Result hipResourceDesc__bindgen_ty_1 { - mipmap: hipResourceDesc__bindgen_ty_1__bindgen_ty_2 { - mipmap: mem::transmute(res_desc.res.mipmap.hMipmappedArray), - }, - }, - CUresourcetype::CU_RESOURCE_TYPE_LINEAR => hipResourceDesc__bindgen_ty_1 { - linear: hipResourceDesc__bindgen_ty_1__bindgen_ty_3 { - devPtr: res_desc.res.linear.devPtr.0, - desc: channel_format_desc( - FromCuda::from_cuda(res_desc.res.linear.format), - res_desc.res.linear.numChannels, - )?, - sizeInBytes: res_desc.res.linear.sizeInBytes, - }, - }, - CUresourcetype::CU_RESOURCE_TYPE_PITCH2D => hipResourceDesc__bindgen_ty_1 { - pitch2D: hipResourceDesc__bindgen_ty_1__bindgen_ty_4 { - devPtr: res_desc.res.pitch2D.devPtr.0, - desc: channel_format_desc( - FromCuda::from_cuda(res_desc.res.pitch2D.format), - res_desc.res.pitch2D.numChannels, - )?, - width: res_desc.res.pitch2D.width, - height: res_desc.res.pitch2D.height, - pitchInBytes: res_desc.res.pitch2D.pitchInBytes, - }, - }, - _ => todo!(), + _ => return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED), }; Ok(hipResourceDesc { resType: res_type, res, }) } - -fn channel_format_desc( - format: hipArray_Format, - num_channels: u32, -) -> Result { - let mut bits = match num_channels { - 1 => (1, 0, 0, 0), - 2 => (1, 1, 0, 0), - 3 => (1, 1, 1, 0), - 4 => (1, 1, 1, 1), - _ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE), - }; - let (kind, bit_width) = match format { - hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8 => { - (hipChannelFormatKind::hipChannelFormatKindUnsigned, u8::BITS) - } - hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16 => ( - hipChannelFormatKind::hipChannelFormatKindUnsigned, - u16::BITS, - ), - hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32 => ( - hipChannelFormatKind::hipChannelFormatKindUnsigned, - u32::BITS, - ), - hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8 => { - (hipChannelFormatKind::hipChannelFormatKindSigned, i8::BITS) - } - hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16 => { - (hipChannelFormatKind::hipChannelFormatKindSigned, i16::BITS) - } - hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32 => { - (hipChannelFormatKind::hipChannelFormatKindSigned, i32::BITS) - } - hipArray_Format::HIP_AD_FORMAT_HALF => ( - hipChannelFormatKind::hipChannelFormatKindFloat, - mem::size_of::() as u32 * u8::BITS, - ), - hipArray_Format::HIP_AD_FORMAT_FLOAT => ( - hipChannelFormatKind::hipChannelFormatKindFloat, - mem::size_of::() as u32 * u8::BITS, - ), - _ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE), - }; - bits.0 *= bit_width; - bits.1 *= bit_width; - bits.2 *= bit_width; - bits.3 *= bit_width; - Ok(hipChannelFormatDesc { - x: bits.0 as i32, - y: bits.0 as i32, - z: bits.0 as i32, - w: bits.0 as i32, - f: kind, - }) -} diff --git a/zluda/tests/kernel_suld.rs b/zluda/tests/kernel_suld.rs index ad6e964..07fc560 100644 --- a/zluda/tests/kernel_suld.rs +++ b/zluda/tests/kernel_suld.rs @@ -340,10 +340,6 @@ unsafe fn kernel_suld_impl< if mem::size_of::() * CHANNELS < mem::size_of::() * SULD_N { return; } - // TODO: reenable those tests - if mem::size_of::() != mem::size_of::() || CHANNELS != SULD_N { - return; - } let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(seed); let size = 4usize; let random_size = rand::distributions::Uniform::::new(1, size as u32); diff --git a/zluda/tests/kernel_sust.rs b/zluda/tests/kernel_sust.rs index 831e467..5057b56 100644 --- a/zluda/tests/kernel_sust.rs +++ b/zluda/tests/kernel_sust.rs @@ -312,7 +312,9 @@ unsafe fn byte_fill(vec: &mut Vec, value: u8) { fn extend_bytes_with(slice: &[u8], elm: u8, desired_length: usize) -> Vec { let mut result = slice.to_vec(); - result.extend(std::iter::repeat(elm).take(desired_length - slice.len())); + if desired_length > slice.len() { + result.extend(std::iter::repeat(elm).take(desired_length - slice.len())); + } result } @@ -337,10 +339,6 @@ unsafe fn kernel_sust_impl< if mem::size_of::() * CHANNELS < mem::size_of::() * SUST_N { return; } - // TODO: reenable those tests - if mem::size_of::() != mem::size_of::() || CHANNELS != SUST_N { - return; - } let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(seed); let size = 4usize; let random_size = rand::distributions::Uniform::::new(1, size as u32); @@ -461,4 +459,8 @@ unsafe fn kernel_sust_impl< assert_eq!(expected, &*observed); let mut unused = mem::zeroed(); assert_eq!(cuda.cuCtxPopCurrent(&mut unused), CUresult::CUDA_SUCCESS); + assert_eq!( + cuda.cuDevicePrimaryCtxRelease_v2(CUdevice_v1(0)), + CUresult::CUDA_SUCCESS + ); } From 995bc951745db30e39a1ff0c909f9a95b12c4ec7 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sun, 28 Apr 2024 01:22:43 +0200 Subject: [PATCH 10/14] Build improvements (#206) * Allow to create .zip package on Windows * Allow to create .tar.gz package on Linux * Add configuration for post-build Github CI --- .github/workflows/rust.yml | 67 + .gitignore | 7 +- Cargo.lock | 2561 ++++++++++++++++++++++++++++++ Cargo.toml | 2 +- Makefile.toml | 57 - TROUBLESHOOTING.md | 9 +- process_address_table/Cargo.toml | 1 + xtask/Cargo.toml | 10 +- xtask/src/main.rs | 314 +++- zluda_api/Cargo.toml | 1 + zluda_dump/Cargo.toml | 1 + zluda_ml/Cargo.toml | 1 - 12 files changed, 2891 insertions(+), 140 deletions(-) create mode 100644 .github/workflows/rust.yml create mode 100644 Cargo.lock delete mode 100644 Makefile.toml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..a9037fd --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,67 @@ +name: Rust +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +env: + CARGO_TERM_COLOR: always + ROCM_VERSION: "5.7.3" + +jobs: + build_lin: + name: Build and publish (Linux) + runs-on: ubuntu-20.04 + steps: + - uses: jlumbroso/free-disk-space@main + - name: Install ROCm + run: | + sudo mkdir --parents --mode=0755 /etc/apt/keyrings + sudo sh -c 'wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null' + sudo sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }} focal main > /etc/apt/sources.list.d/rocm.list' + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-smi-lib hip-runtime-amd comgr hipblaslt-dev hipfft-dev rocblas-dev rocsolver-dev rocsparse-dev miopen-hip-dev rocm-device-libs + echo 'export PATH="$PATH:/opt/rocm/bin"' | sudo tee /etc/profile.d/rocm.sh + echo '/opt/rocm/lib' | sudo tee /etc/ld.so.conf.d/rocm.conf + sudo ldconfig + - uses: actions/checkout@v4 + with: + submodules: true + - uses: Swatinem/rust-cache@v2 + - name: Build + # We use tar to unpack .tar.gz we've created because Github actions/upload-artifact + # is broken and will _always_ zip your artifact (even if it is a single file). + # See here: https://github.com/actions/upload-artifact/issues/39 + # and here: https://github.com/actions/upload-artifact/issues/109 + run: | + cargo xtask zip -r + tar -xzf target/release/zluda.tar.gz -C target/release + # https://stackoverflow.com/a/64195658 + - name: Set revision hash + run: echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + - name: Upload + uses: actions/upload-artifact@v4 + with: + name: zluda-linux-${{ env.SHORT_SHA }} + path: target/release/zluda + build_win: + name: Build and publish (Windows) + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - uses: Swatinem/rust-cache@v2 + - name: Build + run: | + cargo xtask zip -r + Expand-Archive -Path target/release/zluda.zip -DestinationPath target/release + # https://stackoverflow.com/a/74033027 + - name: Set revision hash + run: echo "SHORT_SHA=$("${{ github.sha }}".SubString(0, 7))" >> $env:GITHUB_ENV + - name: Upload + uses: actions/upload-artifact@v4 + with: + name: zluda-windows-${{ env.SHORT_SHA }} + path: target/release/zluda diff --git a/.gitignore b/.gitignore index 76550e8..7fd074b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ -target/ -Cargo.lock - -.vscode/ +target/ + +.vscode/ .idea/ \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..ddeb7af --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,2561 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" + +[[package]] +name = "argh" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7af5ba06967ff7214ce4c7419c7d185be7ecd6cc4965a8f6e1d8ce0398aad219" +dependencies = [ + "argh_derive", + "argh_shared", +] + +[[package]] +name = "argh_derive" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56df0aeedf6b7a2fc67d06db35b09684c3e8da0c95f8f27685cb17e08413d87a" +dependencies = [ + "argh_shared", + "proc-macro2", + "quote", + "syn 2.0.41", +] + +[[package]] +name = "argh_shared" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5693f39141bda5760ecc4111ab08da40565d1771038c4a0250f03457ec707531" +dependencies = [ + "serde", +] + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "ascii-canvas" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +dependencies = [ + "term", +] + +[[package]] +name = "atiadlxx-sys" +version = "0.0.0" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "blake3" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "camino" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +dependencies = [ + "serde", +] + +[[package]] +name = "capnp" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95e65021d89250bbfe7c2791789ced2c4bdc21b0e8bb59c64f3fd6145a5fd678" + +[[package]] +name = "capnpc" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbbc3763fb3e6635188e9cc51ee11a26f8777c553ca377430818dbebaaf6042b" +dependencies = [ + "capnp", +] + +[[package]] +name = "cargo-platform" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e34637b3140142bdf929fb439e8aa4ebad7651ebf7b1080b3930aa16ac1459ff" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7daec1a2a2129eeba1644b220b4647ec537b0b5d4bfd6876fcc5a540056b592" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-targets 0.48.5", +] + +[[package]] +name = "cloudflare-zlib" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40fa160a8670a2607111b0d6474261ad2992f3b4651982e14f902859086ecb91" +dependencies = [ + "cloudflare-zlib-sys", +] + +[[package]] +name = "cloudflare-zlib-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3185ff8c69c53ab346d5ac89f418e194b997d48393cae321cb611dd05f83c90" +dependencies = [ + "cc", +] + +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + +[[package]] +name = "comgr" +version = "0.0.0" +dependencies = [ + "hip_common", + "itertools", + "libloading", +] + +[[package]] +name = "const_format" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "constant_time_eq" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6" + +[[package]] +name = "convert_case" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb4a24b1aaf0fd0ce8b45161144d6f42cd91677fd5940fd431183eb023b3a2b8" + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "cpp_demangle" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeaa953eaad386a53111e47172c2fedba671e5684c8dd601a5f474f4f118710f" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "cpufeatures" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c3242926edf34aec4ac3a77108ad4854bffaa2e4ddc1824124ce59231302d5" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d96137f14f244c37f989d9fff8f95e6c18b918e71f36638f8c49112e4c78f" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "cuda_base" +version = "0.0.0" +dependencies = [ + "proc-macro2", + "quote", + "rustc-hash", + "syn 1.0.109", +] + +[[package]] +name = "cuda_types" +version = "0.0.0" +dependencies = [ + "cuda_base", +] + +[[package]] +name = "darling" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc5d6b04b3fd0ba9926f945895de7d806260a2d7431ba82e7edaecb043c4c6b8" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e48a959bcd5c761246f5d090ebc2fbf7b9cd527a492b07a67510c108f1e7e3" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.41", +] + +[[package]] +name = "darling_macro" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1545d67a2149e1d93b7e5c7752dce5a7426eb5d1357ddcfd89336b94444f77" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.41", +] + +[[package]] +name = "data-encoding" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" + +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "detours-sys" +version = "0.1.2" +dependencies = [ + "cc", + "winapi", +] + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "dynasm" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "add9a102807b524ec050363f09e06f1504214b0e1c7797f64261c891022dce8b" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "lazy_static", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dynasmrt" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64fba5a42bd76a17cad4bfa00de168ee1cbfa06a5e8ce992ae880218c05641a9" +dependencies = [ + "byteorder", + "dynasm", + "memmap2", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "embed-manifest" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cd446c890d6bed1d8b53acef5f240069ebef91d6fae7c5f52efe61fe8b5eae" + +[[package]] +name = "ena" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1" +dependencies = [ + "log", +] + +[[package]] +name = "enum-iterator" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fd242f399be1da0a5354aa462d57b4ab2b4ee0683cc552f7c007d2d12d36e94" +dependencies = [ + "enum-iterator-derive", +] + +[[package]] +name = "enum-iterator-derive" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03cdc46ec28bd728e67540c528013c6a10eb69a02eb31078a1bda695438cbfb8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.41", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "filedescriptor" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7199d965852c3bac31f779ef99cbb4537f80e952e2d6aa0ffeb30cce00f4f46e" +dependencies = [ + "libc", + "thiserror", + "winapi", +] + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "windows-sys 0.52.0", +] + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flate2" +version = "1.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4556222738635b7a3417ae6130d8f52201e45a0c4d1a907f0826383adb5f85e7" +dependencies = [ + "cloudflare-zlib-sys", + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "gag" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a713bee13966e9fbffdf7193af71d54a6b35a0bb34997cd6c9519ebeb5005972" +dependencies = [ + "filedescriptor", + "tempfile", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getset" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "git2" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc" +dependencies = [ + "bitflags 1.3.2", + "libc", + "libgit2-sys", + "log", + "url", +] + +[[package]] +name = "glam" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12f597d56c1bd55a811a1be189459e8fad2bbc272616375602443bdfb37fa774" + +[[package]] +name = "goblin" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7666983ed0dd8d21a6f6576ee00053ca0926fb281a5522577a4dbd0f1b54143" +dependencies = [ + "log", + "plain", + "scroll", +] + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown 0.14.3", +] + +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hip_common" +version = "0.0.0" +dependencies = [ + "capnp", + "capnpc", + "const_format", + "cuda_types", + "goblin", + "hip_runtime-sys", + "itertools", + "libloading", + "memchr", + "rusqlite", + "rustc-hash", + "sha2", +] + +[[package]] +name = "hip_runtime-sys" +version = "0.0.0" +dependencies = [ + "rustc-hash", +] + +[[package]] +name = "hipblaslt-sys" +version = "0.0.0" + +[[package]] +name = "hipfft-sys" +version = "0.0.0" + +[[package]] +name = "hiprt-sys" +version = "0.0.0" +dependencies = [ + "libloading", + "widestring 1.0.2", + "winapi", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + +[[package]] +name = "indexmap" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +dependencies = [ + "equivalent", + "hashbrown 0.14.3", +] + +[[package]] +name = "is-terminal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +dependencies = [ + "hermit-abi", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lalrpop" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da4081d44f4611b66c6dd725e6de3169f9f63905421e8626fcb86b6a898998b8" +dependencies = [ + "ascii-canvas", + "bit-set", + "diff", + "ena", + "is-terminal", + "itertools", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax 0.7.5", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", +] + +[[package]] +name = "lalrpop-util" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f35c735096c0293d313e8f2a641627472b83d01b937177fe76e5e2708d31e0d" +dependencies = [ + "regex", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.151" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" + +[[package]] +name = "libgit2-sys" +version = "0.14.2+1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4" +dependencies = [ + "cc", + "libc", + "libz-sys", + "pkg-config", +] + +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libredox" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.1", + "libc", + "redox_syscall", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29f835d03d717946d28b1d1ed632eb6f0e24a299388ee623d0c23118d3e8a7fa" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + +[[package]] +name = "llvm-sys" +version = "150.1.2" +dependencies = [ + "cmake", + "convert_case", + "libc", +] + +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "macro_rules_attribute" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf0c9b980bf4f3a37fd7b1c066941dd1b1d0152ce6ee6e8fe8c49b9f6810d862" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d" + +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "miopen-sys" +version = "0.0.0" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "nougat" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b57b9ced431322f054fc673f1d3c7fa52d80efd9df74ad2fc759f044742510" +dependencies = [ + "macro_rules_attribute", + "nougat-proc_macros", +] + +[[package]] +name = "nougat-proc_macros" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c84f77a45e99a2f9b492695d99e1c23844619caa5f3e57647cffacad773ca257" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_enum" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca565a7df06f3d4b485494f25ba05da1435950f4dc263440eda7a6fa9b8e36e4" +dependencies = [ + "derivative", + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffa5a33ddddfee04c0283a7653987d634e880347e96b5b2ed64de07efb59db9d" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + +[[package]] +name = "offline_compiler" +version = "0.0.0" +dependencies = [ + "argh", + "comgr", + "hip_common", + "hip_runtime-sys", + "hiprt-sys", + "libloading", + "ptx", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "optix_base" +version = "0.0.0" +dependencies = [ + "proc-macro2", + "quote", + "rustc-hash", + "syn 1.0.109", +] + +[[package]] +name = "optix_dump" +version = "0.0.0" +dependencies = [ + "cuda_types", + "generic-array", + "lazy_static", + "optix_base", + "paste", + "sha2", + "typenum", + "winapi", + "wmi", +] + +[[package]] +name = "optix_types" +version = "0.0.0" +dependencies = [ + "cuda_types", + "optix_base", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.48.5", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +dependencies = [ + "fixedbitset", + "indexmap 2.1.0", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + +[[package]] +name = "pkg-config" +version = "0.3.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" + +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro-crate" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785" +dependencies = [ + "toml", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "process_address_table" +version = "0.0.0" +dependencies = [ + "detours-sys", + "libloading", + "windows", +] + +[[package]] +name = "ptx" +version = "0.0.0" +dependencies = [ + "bit-vec", + "comgr", + "cpp_demangle", + "cuda_base", + "cuda_types", + "either", + "half", + "hip_common", + "hip_runtime-sys", + "hiprt-sys", + "lalrpop", + "lalrpop-util", + "lazy_static", + "libloading", + "num-traits", + "paste", + "regex", + "rustc-hash", + "tempfile", + "thiserror", + "zluda_llvm", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_users" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax 0.8.2", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.2", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rocblas-sys" +version = "0.0.0" + +[[package]] +name = "rocm_smi-sys" +version = "0.0.0" + +[[package]] +name = "rocsolver-sys" +version = "0.0.0" + +[[package]] +name = "rocsparse-sys" +version = "0.0.0" + +[[package]] +name = "rusqlite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01e213bc3ecb39ac32e81e51ebe31fd888a940515173e3a18a35f8c6e896422a" +dependencies = [ + "bitflags 1.3.2", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "serde_json", + "smallvec", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +dependencies = [ + "bitflags 2.4.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scroll" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" +dependencies = [ + "scroll_derive", +] + +[[package]] +name = "scroll_derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db149f81d46d2deba7cd3c50772474707729550221e69588478ebf9ada425ae" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.41", +] + +[[package]] +name = "semver" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.41", +] + +[[package]] +name = "serde_json" +version = "1.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_with" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe" +dependencies = [ + "base64", + "chrono", + "hex", + "indexmap 1.9.3", + "serde", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "881b6f881b17d13214e5d494c939ebab463d01264ce1811e9d4ac3a882e7695f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.41", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "thiserror" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.41", +] + +[[package]] +name = "thread-id" +version = "4.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0ec81c46e9eb50deaa257be2f148adf052d1fb7701cfd55ccfab2525280b70b" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "time" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446" +dependencies = [ + "itoa", + "libc", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" + +[[package]] +name = "time-macros" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96ba15a897f3c86766b757e5ac7221554c6750054d74d5b28844fce5fb36a6c4" +dependencies = [ + "time-core", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "vergen" +version = "7.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f21b881cd6636ece9735721cf03c1fe1e774fe258683d084bb2812ab67435749" +dependencies = [ + "anyhow", + "cfg-if", + "enum-iterator", + "getset", + "git2", + "rustversion", + "thiserror", + "time", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.41", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.41", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" + +[[package]] +name = "wchar" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c74d010bf16569f942b0b7d3c777dd674f8ee539b48d809dc548b3453039c2df" +dependencies = [ + "proc-macro-hack", + "wchar-impl", +] + +[[package]] +name = "wchar-impl" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f135922b9303f899bfa446fce1eb149f43462f1e9ac7f50e24ea6b913416dd84" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "widestring" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17882f045410753661207383517a6f62ec3dbeb6a4ed2acce01f0728238d1983" + +[[package]] +name = "widestring" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-core" +version = "0.51.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "wmi" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "757a458f9bfab0542c11feed99bd492cbe23add50515bd8eecf8c6973673d32d" +dependencies = [ + "chrono", + "log", + "serde", + "thiserror", + "widestring 0.5.1", + "winapi", +] + +[[package]] +name = "xattr" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914566e6413e7fa959cc394fb30e563ba80f3541fbd40816d4c05a0fc3f2a0f1" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + +[[package]] +name = "xtask" +version = "0.0.0" +dependencies = [ + "argh", + "cargo-platform", + "cargo_metadata", + "flate2", + "serde", + "serde_json", + "tar", + "time", + "zip", +] + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.41", +] + +[[package]] +name = "zip" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +dependencies = [ + "byteorder", + "crc32fast", + "crossbeam-utils", + "flate2", + "time", +] + +[[package]] +name = "zluda" +version = "0.0.0" +dependencies = [ + "blake3", + "comgr", + "cuda_base", + "cuda_types", + "dirs", + "gag", + "goblin", + "half", + "hip_common", + "hip_runtime-sys", + "lazy_static", + "libc", + "lz4-sys", + "memchr", + "memoffset", + "num-traits", + "num_enum", + "paste", + "ptx", + "rand", + "rand_chacha", + "rusqlite", + "rustc-hash", + "static_assertions", + "tempfile", + "time", + "vergen", + "winapi", + "zluda_dark_api", +] + +[[package]] +name = "zluda_api" +version = "0.0.0" +dependencies = [ + "cuda_types", + "libloading", + "once_cell", + "windows", +] + +[[package]] +name = "zluda_blas" +version = "0.0.0" +dependencies = [ + "cuda_types", + "hip_common", + "rocblas-sys", + "rocsolver-sys", + "zluda_dark_api", +] + +[[package]] +name = "zluda_blaslt" +version = "0.0.0" +dependencies = [ + "cuda_types", + "hip_common", + "hipblaslt-sys", + "zluda_dark_api", +] + +[[package]] +name = "zluda_ccl" +version = "0.0.0" + +[[package]] +name = "zluda_dark_api" +version = "0.0.0" +dependencies = [ + "bit-vec", + "bitflags 2.4.1", + "cloudflare-zlib", + "cuda_types", + "either", + "goblin", + "hip_common", + "lz4-sys", + "paste", + "thread-id", +] + +[[package]] +name = "zluda_dnn" +version = "0.0.0" +dependencies = [ + "hip_runtime-sys", + "miopen-sys", +] + +[[package]] +name = "zluda_dump" +version = "0.0.0" +dependencies = [ + "crossbeam-channel", + "cuda_base", + "cuda_types", + "detours-sys", + "dynasm", + "dynasmrt", + "goblin", + "hip_common", + "lazy_static", + "libc", + "lz4-sys", + "paste", + "ptx", + "rand", + "rand_chacha", + "regex", + "rustc-hash", + "serde", + "serde_derive", + "serde_json", + "thread-id", + "wchar", + "winapi", + "zluda_dark_api", +] + +[[package]] +name = "zluda_fft" +version = "0.0.0" +dependencies = [ + "cuda_types", + "hip_common", + "hipfft-sys", + "lazy_static", + "slab", + "zluda_dark_api", +] + +[[package]] +name = "zluda_inject" +version = "0.0.0" +dependencies = [ + "argh", + "detours-sys", + "embed-manifest", + "tempfile", + "winapi", + "zluda_dump", + "zluda_ml", + "zluda_redirect", +] + +[[package]] +name = "zluda_lib" +version = "0.0.0" +dependencies = [ + "zluda", +] + +[[package]] +name = "zluda_llvm" +version = "0.0.0" +dependencies = [ + "bitflags 2.4.1", + "cc", + "llvm-sys", +] + +[[package]] +name = "zluda_ml" +version = "0.0.0" +dependencies = [ + "atiadlxx-sys", + "rocm_smi-sys", +] + +[[package]] +name = "zluda_redirect" +version = "0.0.0" +dependencies = [ + "detours-sys", + "memchr", + "wchar", + "winapi", +] + +[[package]] +name = "zluda_rt" +version = "0.0.0" +dependencies = [ + "comgr", + "data-encoding", + "dirs", + "float-cmp", + "generic-array", + "glam", + "hip_common", + "hip_runtime-sys", + "hiprt-sys", + "libloading", + "nougat", + "optix_base", + "optix_types", + "paste", + "ptx", + "rusqlite", + "rustc-hash", + "serde", + "serde_json", + "serde_with", + "sha2", + "static_assertions", + "time", + "typenum", + "vergen", + "winapi", +] + +[[package]] +name = "zluda_sparse" +version = "0.0.0" +dependencies = [ + "cuda_types", + "hip_common", + "hip_runtime-sys", + "rocsparse-sys", + "zluda_dark_api", +] diff --git a/Cargo.toml b/Cargo.toml index 2e9a6ed..c20fa2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,4 +58,4 @@ opt-level = 3 opt-level = 3 [profile.dev.package.xtask] -opt-level = 3 +opt-level = 2 diff --git a/Makefile.toml b/Makefile.toml deleted file mode 100644 index adab2b9..0000000 --- a/Makefile.toml +++ /dev/null @@ -1,57 +0,0 @@ -[config] -default_to_workspace = false -skip_core_tasks = true - -[tasks.build] -run_task = [ - { name = "build-windows", condition = { platforms = ["windows"] } }, - { name = "build-linux", condition = { platforms = ["linux"] } }, -] - -[tasks.build-windows] -command = "cargo" -args = [ - "build", - "-p", "offline_compiler", - "-p", "zluda_dump", - "-p", "zluda_inject", - "-p", "zluda_lib", - "-p", "zluda_ml", - "-p", "zluda_redirect", -] - -[tasks.build-linux] -command = "cargo" -args = [ - "build", - "-p", "offline_compiler", - "-p", "zluda_blas", - "-p", "zluda_blaslt", - "-p", "zluda_ccl", - "-p", "zluda_dnn", - "-p", "zluda_dump", - "-p", "zluda_fft", - "-p", "zluda_lib", - "-p", "zluda_ml", - "-p", "zluda_sparse", -] - -[tasks.build-release] -command = "cargo" -args = [ - "build", - "--release", - "-p", "offline_compiler", - "-p", "zluda_blas", - "-p", "zluda_blaslt", - "-p", "zluda_ccl", - "-p", "zluda_dnn", - "-p", "zluda_dump", - "-p", "zluda_fft", - "-p", "zluda_lib", - "-p", "zluda_ml", - "-p", "zluda_sparse", -] - -[tasks.default] -alias = "build" diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 18ec079..3679f6c 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -92,13 +92,16 @@ If you are dumping original CUDA use: ### Linux -Known bug: when dumping from original CUDA you should remove (or rename) all the files in `/dump` except `libcuda.so` and `libcuda.so.1`. - -Use it like this: +If dumping from ZLUDA use it like this: ``` LD_LIBRARY_PATH="/dump:$LD_LIBRARY_PATH" ``` +If dumping from NVIDIA CUDA use it like this: +``` +LD_LIBRARY_PATH="/dump_nvidia:$LD_LIBRARY_PATH" +``` + ### Result If all went well you should see lines like this in the console output and in the log file specified by `ZLUDA_DUMP_DIR`: diff --git a/process_address_table/Cargo.toml b/process_address_table/Cargo.toml index 2de38f1..97d6083 100644 --- a/process_address_table/Cargo.toml +++ b/process_address_table/Cargo.toml @@ -18,3 +18,4 @@ features = [ [package.metadata.zluda] debug_only = true +skip_zip = true diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml index 2a214e4..edc0965 100644 --- a/xtask/Cargo.toml +++ b/xtask/Cargo.toml @@ -11,4 +11,12 @@ cargo_metadata = "=0.17.0" # cargo-platform is a cargo_metadata, version 0.1.6 requires rust 1.70 or higher cargo-platform = "=0.1.5" serde = "1.0.193" -serde_json = "1.0.108" \ No newline at end of file +serde_json = "1.0.108" +time = { version = "=0.3.23", features = ["local-offset"] } + +[target.'cfg(windows)'.dependencies] +zip = { version = "0.6.6", features = ["deflate", "time"], default-features = false } + +[target.'cfg(unix)'.dependencies] +flate2 = { version = "1.0.28", features = ["cloudflare_zlib"], default-features = false } +tar = "0.4" \ No newline at end of file diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 317ec01..d5e0cf8 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -1,4 +1,5 @@ use argh::{EarlyExit, FromArgs, TopLevelCommand}; +use cargo_metadata::camino::Utf8PathBuf; use serde::Deserialize; use std::{ env, @@ -60,7 +61,7 @@ struct BuildCommand { } #[derive(FromArgs)] -/// Package build artifacts into an archive (.zip or .tar.gz) +/// Compile ZLUDA and package binaries into an archive (.zip or .tar.gz) #[argh(subcommand, name = "zip")] struct ZipCommand { /// use artifacts from release mode @@ -73,10 +74,15 @@ fn main() -> Result<(), DynError> { let args: Arguments = argh::from_env(); std::process::exit(match args.command { Subcommand::Build(BuildCommand { release }) => build(!release)?, - Subcommand::Zip(_) => panic!(), + Subcommand::Zip(ZipCommand { release }) => build_and_zip(!release), }) } +fn build_and_zip(is_debug: bool) -> i32 { + let workspace = build_impl(is_debug).unwrap(); + os::zip(workspace) +} + #[derive(Deserialize)] struct ZludaMetadata { zluda: Project, @@ -92,8 +98,6 @@ struct Project { #[serde(skip_deserializing)] kind: TargetKind, #[serde(default)] - top_level: bool, - #[serde(default)] windows_only: bool, #[serde(default)] linux_only: bool, @@ -104,9 +108,13 @@ struct Project { #[serde(default)] skip_dump_link: bool, #[serde(default)] + skip_zip: bool, + #[serde(default)] linux_names: Vec, #[serde(default)] dump_names: Vec, + #[serde(default)] + dump_nvidia_names: Vec, } #[derive(Clone, Copy, Default, PartialEq, Debug)] @@ -116,14 +124,56 @@ enum TargetKind { Cdylib, } +struct Workspace { + pub cargo: String, + pub project_root: PathBuf, + pub projects: Vec, + pub target_directory: Utf8PathBuf, +} + +impl Workspace { + fn open(is_debug: bool) -> Result { + let cargo = env::var("CARGO").unwrap_or_else(|_| "cargo".to_string()); + let project_root = Self::project_root()?; + let mut cmd = cargo_metadata::MetadataCommand::new(); + cmd.cargo_path(&cargo).current_dir(&project_root).no_deps(); + let cargo_metadata = cmd.exec()?; + let projects = cargo_metadata + .packages + .into_iter() + .filter_map(Project::new) + .filter(|p| !p.skip_build(is_debug)) + .collect::>(); + let mut target_directory = cargo_metadata.target_directory; + target_directory.push(if is_debug { "debug" } else { "release" }); + Ok(Workspace { + cargo, + project_root, + projects, + target_directory, + }) + } + + fn project_root() -> Result { + Ok(Path::new(&env!("CARGO_MANIFEST_DIR")) + .ancestors() + .nth(1) + .ok_or::("CARGO_MANIFEST_DIR".into())? + .to_path_buf()) + } + + fn cargo_command(&self) -> Command { + let mut command = Command::new(&self.cargo); + command.current_dir(&self.project_root); + command + } +} + impl Project { - fn new(json_pkg: cargo_metadata::Package) -> Self { - let mut project = serde_json::from_value::>(json_pkg.metadata) - .unwrap() - .map_or(Default::default(), |x| x.zluda); - if project != Default::default() { - project.top_level = true; - } + fn new(json_pkg: cargo_metadata::Package) -> Option { + let project_metadata = + serde_json::from_value::>(json_pkg.metadata).unwrap()?; + let mut project = project_metadata.zluda; project.name = json_pkg.name; if let Some((target_name, kind)) = json_pkg.targets.into_iter().find_map(|t| { match t.kind.first().map(std::ops::Deref::deref) { @@ -135,13 +185,10 @@ impl Project { project.target_name = target_name; project.kind = kind; } - project + Some(project) } fn skip_build(&self, is_debug: bool) -> bool { - if !self.top_level { - return true; - } if self.broken { return true; } @@ -159,67 +206,76 @@ impl Project { } fn build(is_debug: bool) -> Result { - let cargo = env::var("CARGO").unwrap_or_else(|_| "cargo".to_string()); - let project_root = project_root()?; - let mut cmd = cargo_metadata::MetadataCommand::new(); - cmd.cargo_path(&cargo).current_dir(&project_root).no_deps(); - let metadata = cmd.exec()?; - let projects = metadata - .packages - .into_iter() - .map(Project::new) - .filter(|p| !p.skip_build(is_debug)) - .collect::>(); - let mut command = Command::new(&cargo); - command.current_dir(&project_root).arg("build"); - projects.iter().fold(&mut command, |command, proj| { - command.args(["-p", &proj.name]) - }); + build_impl(is_debug)?; + Ok(0) +} + +fn build_impl(is_debug: bool) -> Result { + let workspace = Workspace::open(is_debug)?; + let mut command = workspace.cargo_command(); + command.arg("build"); + workspace + .projects + .iter() + .fold(&mut command, |command, proj| { + command.args(["-p", &proj.name]) + }); if !is_debug { command.arg("--release"); } let build_result = command.status()?.code().unwrap(); if build_result != 0 { - return Ok(build_result); + return Err(format!("{command:?} failed with exit code {build_result}").into()); } - os::create_dump_dir_and_symlinks(is_debug, metadata.target_directory, projects); - Ok(0) + os::create_dump_dir_and_symlinks(&workspace); + Ok(workspace) } -fn project_root() -> Result { - Ok(Path::new(&env!("CARGO_MANIFEST_DIR")) - .ancestors() - .nth(1) - .ok_or::("CARGO_MANIFEST_DIR".into())? - .to_path_buf()) -} +impl TargetKind { + #[cfg(unix)] + fn prefix(self) -> &'static str { + match self { + TargetKind::Binary => "", + TargetKind::Cdylib => "lib", + } + } -#[cfg(not(unix))] -mod os { - use super::Project; - use cargo_metadata::camino::Utf8PathBuf; + #[cfg(unix)] + fn suffix(self) -> &'static str { + match self { + TargetKind::Binary => "", + TargetKind::Cdylib => ".so", + } + } - // This is 100% intentional, we don't want symlinks on Windows since - // we use completely different scheme for injections here - pub(crate) fn create_dump_dir_and_symlinks(_: bool, _: Utf8PathBuf, _: Vec) {} + #[cfg(windows)] + fn suffix(self) -> &'static str { + match self { + TargetKind::Binary => ".exe", + TargetKind::Cdylib => ".dll", + } + } } #[cfg(unix)] mod os { - use super::{Project, TargetKind}; + use crate::Workspace; use cargo_metadata::camino::Utf8PathBuf; + use flate2::{write::GzEncoder, Compression}; + use std::{ + fs::File, + time::{Duration, SystemTime}, + }; - pub(crate) fn create_dump_dir_and_symlinks( - is_debug: bool, - mut target_directory: Utf8PathBuf, - projects: Vec, - ) { + pub(crate) fn create_dump_dir_and_symlinks(workspace: &Workspace) { use std::fs; - target_directory.push(if is_debug { "debug" } else { "release" }); - let mut dump_dir = target_directory.clone(); + let mut dump_dir = workspace.target_directory.clone(); dump_dir.push("dump"); fs::create_dir_all(&dump_dir).unwrap(); - for project in projects { + let mut dump_nvidia_dir = dump_dir.clone(); + dump_nvidia_dir.set_file_name("dump_nvidia"); + fs::create_dir_all(&dump_nvidia_dir).unwrap(); + for project in workspace.projects.iter() { let dst = format!( "{}{}{}", project.kind.prefix(), @@ -227,15 +283,18 @@ mod os { project.kind.suffix() ); let dump_dst = format!("../{}", dst); - for src_file in project.linux_names { - force_symlink(&dst, &target_directory, &src_file); + for src_file in project.linux_names.iter() { + force_symlink(&dst, &workspace.target_directory, src_file); if project.skip_dump_link { continue; } - force_symlink(&dump_dst, &dump_dir, &src_file); + force_symlink(&dump_dst, &dump_dir, src_file); } - for src_file in project.dump_names { - force_symlink(&dump_dst, &dump_dir, &src_file); + for src_file in project.dump_names.iter() { + force_symlink(&dump_dst, &dump_dir, src_file); + } + for src_file in project.dump_nvidia_names.iter() { + force_symlink(&dump_dst, &dump_nvidia_dir, src_file); } } } @@ -263,19 +322,128 @@ mod os { } } - impl TargetKind { - fn prefix(self) -> &'static str { - match self { - TargetKind::Binary => "", - TargetKind::Cdylib => "lib", + pub fn zip(workspace: Workspace) -> i32 { + let mut target_file = workspace.target_directory.clone(); + target_file.push("zluda.tar.gz"); + let gz_file = File::create(target_file).unwrap(); + let gz = GzEncoder::new(gz_file, Compression::default()); + let mut tar = tar::Builder::new(gz); + let time = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or(Duration::ZERO); + for project in workspace.projects { + if project.skip_zip { + continue; + } + let mut src_file = File::open(format!( + "{}/{}{}{}", + &workspace.target_directory, + project.kind.prefix(), + project.target_name, + project.kind.suffix() + )) + .unwrap(); + let file_name = format!( + "{}{}{}", + project.kind.prefix(), + project.target_name, + project.kind.suffix() + ); + tar.append_file(format!("zluda/{file_name}"), &mut src_file) + .unwrap(); + for linux_name in project.linux_names.iter() { + let mut header = tar_header_symlink(time); + tar.append_link(&mut header, format!("zluda/{}", linux_name), &file_name) + .unwrap(); + if project.skip_dump_link { + continue; + } + let mut header = tar_header_symlink(time); + tar.append_link( + &mut header, + format!("zluda/dump/{}", linux_name), + format!("../{file_name}"), + ) + .unwrap(); + } + for dump_name in project.dump_names.iter() { + let mut header = tar_header_symlink(time); + tar.append_link( + &mut header, + format!("zluda/dump/{}", dump_name), + format!("../{file_name}"), + ) + .unwrap(); + } + for dump_name in project.dump_nvidia_names.iter() { + let mut header = tar_header_symlink(time); + tar.append_link( + &mut header, + format!("zluda/dump_nvidia/{}", dump_name), + format!("../{file_name}"), + ) + .unwrap(); } } + tar.finish().unwrap(); + 0 + } - fn suffix(self) -> &'static str { - match self { - TargetKind::Binary => "", - TargetKind::Cdylib => ".so", - } - } + fn tar_header_symlink(time: Duration) -> tar::Header { + let mut header = tar::Header::new_gnu(); + header.set_mtime(time.as_secs()); + header.set_entry_type(tar::EntryType::Symlink); + header + } +} + +#[cfg(windows)] +mod os { + use crate::Workspace; + use std::{convert::TryFrom, fs::File}; + + // This is 100% intentional, we don't want symlinks on Windows since + // we use a completely different scheme for injections there + pub(crate) fn create_dump_dir_and_symlinks(_: &Workspace) {} + + pub(crate) fn zip(workspace: Workspace) -> i32 { + fn get_zip_entry_options( + f: &File, + time_offset: time::UtcOffset, + ) -> Option { + let time = f.metadata().unwrap().modified().unwrap(); + let time = time::OffsetDateTime::from(time).to_offset(time_offset); + Some( + zip::write::FileOptions::default() + .last_modified_time(zip::DateTime::try_from(time).unwrap()), + ) + } + let mut target_file = workspace.target_directory.clone(); + target_file.push("zluda.zip"); + let zip_archive = File::create(target_file).unwrap(); + let mut zip_writer = zip::write::ZipWriter::new(zip_archive); + let time_offset = time::UtcOffset::current_local_offset().unwrap_or(time::UtcOffset::UTC); + for p in workspace.projects { + if p.skip_zip { + continue; + } + let mut src_file = File::open(format!( + "{}/{}{}", + &workspace.target_directory, + p.target_name, + p.kind.suffix() + )) + .unwrap(); + zip_writer + .start_file( + format!("zluda/{}{}", p.target_name, p.kind.suffix()), + get_zip_entry_options(&src_file, time_offset) + .unwrap_or(zip::write::FileOptions::default()), + ) + .unwrap(); + std::io::copy(&mut src_file, &mut zip_writer).unwrap(); + } + zip_writer.finish().unwrap(); + 0 } } diff --git a/zluda_api/Cargo.toml b/zluda_api/Cargo.toml index b708cbd..79231b0 100644 --- a/zluda_api/Cargo.toml +++ b/zluda_api/Cargo.toml @@ -26,3 +26,4 @@ features = [ [package.metadata.zluda] debug_only = true windows_only = true +skip_zip = true diff --git a/zluda_dump/Cargo.toml b/zluda_dump/Cargo.toml index 2ee1592..1499905 100644 --- a/zluda_dump/Cargo.toml +++ b/zluda_dump/Cargo.toml @@ -44,3 +44,4 @@ rand = "0.8.5" # Nominally debug_only, but useful for power users [package.metadata.zluda] dump_names = ["libcuda.so", "libcuda.so.1"] +dump_nvidia_names = ["libcuda.so", "libcuda.so.1"] diff --git a/zluda_ml/Cargo.toml b/zluda_ml/Cargo.toml index 25d88a9..452cc0e 100644 --- a/zluda_ml/Cargo.toml +++ b/zluda_ml/Cargo.toml @@ -15,5 +15,4 @@ atiadlxx-sys = { path = "../atiadlxx-sys" } rocm_smi-sys = { path = "../rocm_smi-sys" } [package.metadata.zluda] -top_level = true linux_names = ["libnvidia-ml.so", "libnvidia-ml.so.1"] From bdc652f9ebcac9a79849eeee84a391a4ac107913 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Mon, 29 Apr 2024 15:09:14 +0200 Subject: [PATCH 11/14] Correctly report emulated wave32 CUDA device (#216) --- xtask/src/main.rs | 2 +- zluda/src/impl/device.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/xtask/src/main.rs b/xtask/src/main.rs index d5e0cf8..3f1f224 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -322,7 +322,7 @@ mod os { } } - pub fn zip(workspace: Workspace) -> i32 { + pub(crate) fn zip(workspace: Workspace) -> i32 { let mut target_file = workspace.target_directory.clone(); target_file.push("zluda.tar.gz"); let gz_file = File::create(target_file).unwrap(); diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs index c7e8190..b7dd0f5 100644 --- a/zluda/src/impl/device.rs +++ b/zluda/src/impl/device.rs @@ -109,6 +109,10 @@ pub(crate) unsafe fn get_attribute( return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } let hip_attrib = match attrib { + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => { + *pi = 32; + return Ok(()); + } CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => { *pi = 1; return Ok(()); From 27c0e136777a2db49dbb0caa888d561819230493 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Mon, 6 May 2024 00:28:49 +0200 Subject: [PATCH 12/14] Minor codegen improvements (#225) --- ptx/src/emit.rs | 88 +++++--- ptx/src/test/spirv_run/abs.ll | 14 +- ptx/src/test/spirv_run/activemask.ll | 6 +- ptx/src/test/spirv_run/add.ll | 6 +- ptx/src/test/spirv_run/add_global.ll | 6 +- ptx/src/test/spirv_run/add_non_coherent.ll | 6 +- ptx/src/test/spirv_run/add_param_ptr.ll | 26 ++- ptx/src/test/spirv_run/add_tuning.ll | 6 +- ptx/src/test/spirv_run/addc_cc.ll | 54 ++--- ptx/src/test/spirv_run/addc_cc2.ll | 56 ++--- ptx/src/test/spirv_run/alloca_call.ll | 26 ++- ptx/src/test/spirv_run/amdgpu_unnamed.ll | 32 +-- ptx/src/test/spirv_run/and.ll | 10 +- ptx/src/test/spirv_run/assertfail.ll | 32 +-- ptx/src/test/spirv_run/atom_add.ll | 14 +- ptx/src/test/spirv_run/atom_add_f16.ll | 14 +- ptx/src/test/spirv_run/atom_add_float.ll | 14 +- ptx/src/test/spirv_run/atom_cas.ll | 20 +- ptx/src/test/spirv_run/atom_inc.ll | 14 +- ptx/src/test/spirv_run/atom_ld_st.ll | 6 +- ptx/src/test/spirv_run/atom_ld_st_vec.ll | 18 +- ptx/src/test/spirv_run/atom_max_u32.ll | 10 +- ptx/src/test/spirv_run/b64tof64.ll | 12 +- ptx/src/test/spirv_run/barrier.ll | 4 +- ptx/src/test/spirv_run/bfe.ll | 14 +- ptx/src/test/spirv_run/bfi.ll | 18 +- ptx/src/test/spirv_run/bfind.ll | 46 ++-- ptx/src/test/spirv_run/bfind_shiftamt.ll | 40 ++-- ptx/src/test/spirv_run/block.ll | 6 +- ptx/src/test/spirv_run/bra.ll | 12 +- ptx/src/test/spirv_run/brev.ll | 6 +- ptx/src/test/spirv_run/call.ll | 12 +- ptx/src/test/spirv_run/call_bug.ll | 20 +- ptx/src/test/spirv_run/call_multi_return.ll | 32 +-- ptx/src/test/spirv_run/callprototype.ll | 16 +- ptx/src/test/spirv_run/carry_set_all.ll | 210 +++++++++--------- ptx/src/test/spirv_run/clz.ll | 10 +- ptx/src/test/spirv_run/const.ll | 18 +- ptx/src/test/spirv_run/constant_f32.ll | 6 +- ptx/src/test/spirv_run/constant_negative.ll | 6 +- ptx/src/test/spirv_run/cos.ll | 6 +- ptx/src/test/spirv_run/cvt_clamp.ll | 30 +-- ptx/src/test/spirv_run/cvt_f32_f16.ll | 6 +- ptx/src/test/spirv_run/cvt_f32_s32.ll | 30 +-- ptx/src/test/spirv_run/cvt_f64_f32.ll | 6 +- ptx/src/test/spirv_run/cvt_rni.ll | 14 +- ptx/src/test/spirv_run/cvt_rzi.ll | 14 +- ptx/src/test/spirv_run/cvt_s16_s8.ll | 10 +- ptx/src/test/spirv_run/cvt_s32_f32.ll | 14 +- ptx/src/test/spirv_run/cvt_s64_s32.ll | 6 +- ptx/src/test/spirv_run/cvt_sat_s_u.ll | 20 +- ptx/src/test/spirv_run/cvt_u32_s16.ll | 6 +- ptx/src/test/spirv_run/cvta.ll | 18 +- ptx/src/test/spirv_run/div_approx.ll | 10 +- ptx/src/test/spirv_run/dp4a.ll | 14 +- ptx/src/test/spirv_run/ex2.ll | 30 +-- ptx/src/test/spirv_run/extern_shared.ll | 6 +- ptx/src/test/spirv_run/extern_shared_call.ll | 18 +- ptx/src/test/spirv_run/fma.ll | 14 +- ptx/src/test/spirv_run/func_ptr.ll | 12 +- ptx/src/test/spirv_run/generic.ll | 12 +- ptx/src/test/spirv_run/global_array.ll | 12 +- ptx/src/test/spirv_run/lanemask_lt.ll | 12 +- ptx/src/test/spirv_run/ld_st.ll | 6 +- ptx/src/test/spirv_run/ld_st_implicit.ll | 20 +- ptx/src/test/spirv_run/ld_st_offset.ll | 14 +- ptx/src/test/spirv_run/lg2.ll | 6 +- ptx/src/test/spirv_run/local_align.ll | 6 +- ptx/src/test/spirv_run/mad_hi_cc.ll | 58 ++--- ptx/src/test/spirv_run/mad_s32.ll | 50 +++-- ptx/src/test/spirv_run/madc_cc.ll | 42 ++-- ptx/src/test/spirv_run/max.ll | 10 +- ptx/src/test/spirv_run/membar.ll | 6 +- ptx/src/test/spirv_run/min.ll | 10 +- ptx/src/test/spirv_run/mov.ll | 12 +- ptx/src/test/spirv_run/mov_address.ll | 12 +- ptx/src/test/spirv_run/mov_vector_cast.ll | 22 +- ptx/src/test/spirv_run/mul_ftz.ll | 10 +- ptx/src/test/spirv_run/mul_hi.ll | 6 +- ptx/src/test/spirv_run/mul_lo.ll | 6 +- ptx/src/test/spirv_run/mul_non_ftz.ll | 10 +- ptx/src/test/spirv_run/mul_wide.ll | 16 +- ptx/src/test/spirv_run/multireg.ll | 6 +- ptx/src/test/spirv_run/neg.ll | 6 +- .../test/spirv_run/non_scalar_ptr_offset.ll | 10 +- ptx/src/test/spirv_run/not.ll | 6 +- ptx/src/test/spirv_run/ntid.ll | 12 +- ptx/src/test/spirv_run/or.ll | 10 +- ptx/src/test/spirv_run/param_ptr.ll | 12 +- ptx/src/test/spirv_run/popc.ll | 6 +- ptx/src/test/spirv_run/pred_not.ll | 26 ++- ptx/src/test/spirv_run/prmt.ll | 42 ++-- ptx/src/test/spirv_run/prmt_non_immediate.ll | 24 +- ptx/src/test/spirv_run/rcp.ll | 6 +- ptx/src/test/spirv_run/reg_local.ll | 14 +- ptx/src/test/spirv_run/rem.ll | 10 +- ptx/src/test/spirv_run/rsqrt.ll | 10 +- ptx/src/test/spirv_run/s64_min.ll | 12 +- ptx/src/test/spirv_run/sad.ll | 38 ++-- ptx/src/test/spirv_run/selp.ll | 10 +- ptx/src/test/spirv_run/selp_true.ll | 10 +- ptx/src/test/spirv_run/set_f16x2.ll | 32 +-- ptx/src/test/spirv_run/setp.ll | 26 ++- ptx/src/test/spirv_run/setp_bool.ll | 44 ++-- ptx/src/test/spirv_run/setp_gt.ll | 26 ++- ptx/src/test/spirv_run/setp_leu.ll | 26 ++- ptx/src/test/spirv_run/setp_nan.ll | 98 ++++---- ptx/src/test/spirv_run/setp_num.ll | 98 ++++---- ptx/src/test/spirv_run/setp_pred2.ll | 26 ++- ptx/src/test/spirv_run/shared_ptr_32.ll | 16 +- .../test/spirv_run/shared_ptr_take_address.ll | 12 +- ptx/src/test/spirv_run/shared_unify_decl.ll | 34 +-- ptx/src/test/spirv_run/shared_unify_extern.ll | 34 +-- ptx/src/test/spirv_run/shared_unify_local.ll | 36 +-- ptx/src/test/spirv_run/shared_variable.ll | 6 +- ptx/src/test/spirv_run/shf.ll | 10 +- ptx/src/test/spirv_run/shl.ll | 10 +- ptx/src/test/spirv_run/shl_link_hack.ll | 10 +- ptx/src/test/spirv_run/shl_overflow.ll | 44 ++-- ptx/src/test/spirv_run/shr_s32.ll | 16 +- ptx/src/test/spirv_run/shr_u32.ll | 30 +-- ptx/src/test/spirv_run/sign_extend.ll | 6 +- ptx/src/test/spirv_run/sin.ll | 6 +- ptx/src/test/spirv_run/sqrt.ll | 6 +- ptx/src/test/spirv_run/sub.ll | 6 +- ptx/src/test/spirv_run/subc_cc.ll | 54 ++--- ptx/src/test/spirv_run/vector.ll | 74 +++--- ptx/src/test/spirv_run/vector4.ll | 12 +- ptx/src/test/spirv_run/vector_extract.ll | 48 ++-- ptx/src/test/spirv_run/vote_ballot.ll | 22 +- ptx/src/test/spirv_run/vshr.ll | 22 +- ptx/src/test/spirv_run/xor.ll | 10 +- ptx/src/translate.rs | 106 --------- 133 files changed, 1543 insertions(+), 1341 deletions(-) diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs index 9e62d5b..7388203 100644 --- a/ptx/src/emit.rs +++ b/ptx/src/emit.rs @@ -7,7 +7,7 @@ use std::ffi::CStr; use std::fmt::Display; use std::io::Write; use std::ptr::null_mut; -use std::{convert, iter, mem, ptr}; +use std::{iter, mem, ptr}; use zluda_llvm::core::*; use zluda_llvm::prelude::*; use zluda_llvm::zluda::*; @@ -157,7 +157,7 @@ impl NamedIdGenerator { if let Some(id) = id { self.register_result(id, func) } else { - func(b"\0".as_ptr() as _) + func(LLVM_UNNAMED) } } @@ -505,10 +505,12 @@ fn emit_function_variable( ) -> Result<(), TranslateError> { let builder = ctx.builder.get(); let llvm_type = get_llvm_type(ctx, &variable.type_)?; - let addr_space = get_llvm_address_space(&ctx.constants, variable.state_space)?; - let value = ctx.names.register_result(variable.name, |name| unsafe { - LLVMZludaBuildAlloca(builder, llvm_type, addr_space, name) - }); + let value = emit_alloca( + ctx, + llvm_type, + get_llvm_address_space(&ctx.constants, variable.state_space)?, + Some(variable.name), + ); match variable.initializer { None => {} Some(init) => { @@ -531,12 +533,27 @@ fn emit_method<'a, 'input>( let llvm_method = emit_method_declaration(ctx, &method)?; emit_linkage_for_method(&method, is_kernel, llvm_method); emit_tuning(ctx, llvm_method, &method.tuning); - for statement in method.body.iter().flat_map(convert::identity) { + let statements = match method.body { + Some(statements) => statements, + None => return Ok(()), + }; + // Initial BB that holds all the variable declarations + let bb_with_variables = + unsafe { LLVMAppendBasicBlockInContext(ctx.context.get(), llvm_method, LLVM_UNNAMED) }; + // Rest of the code + let starting_bb = + unsafe { LLVMAppendBasicBlockInContext(ctx.context.get(), llvm_method, LLVM_UNNAMED) }; + unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), starting_bb) }; + for statement in statements.iter() { register_basic_blocks(ctx, llvm_method, statement); } - for statement in method.body.into_iter().flatten() { + for statement in statements.into_iter() { emit_statement(ctx, is_kernel, statement)?; } + // happens if there is a post-ret trailing label + terminate_current_block_if_needed(ctx, None); + unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), bb_with_variables) }; + unsafe { LLVMBuildBr(ctx.builder.get(), starting_bb) }; Ok(()) } @@ -604,7 +621,6 @@ fn emit_statement( is_kernel: bool, statement: crate::translate::ExpandedStatement, ) -> Result<(), TranslateError> { - start_synthetic_basic_block_if_needed(ctx, &statement); Ok(match statement { crate::translate::Statement::Label(label) => emit_label(ctx, label)?, crate::translate::Statement::Variable(var) => emit_function_variable(ctx, var)?, @@ -749,27 +765,6 @@ fn emit_ret_value( Ok(()) } -fn start_synthetic_basic_block_if_needed( - ctx: &mut EmitContext, - statement: &crate::translate::ExpandedStatement, -) { - let current_block = unsafe { LLVMGetInsertBlock(ctx.builder.get()) }; - if current_block == ptr::null_mut() { - return; - } - let terminator = unsafe { LLVMGetBasicBlockTerminator(current_block) }; - if terminator == ptr::null_mut() { - return; - } - if let crate::translate::Statement::Label(..) = statement { - return; - } - let new_block = - unsafe { LLVMCreateBasicBlockInContext(ctx.context.get(), b"\0".as_ptr() as _) }; - unsafe { LLVMInsertExistingBasicBlockAfterInsertBlock(ctx.builder.get(), new_block) }; - unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) }; -} - fn emit_ptr_access( ctx: &mut EmitContext, ptr_access: &crate::translate::PtrAccess, @@ -1073,7 +1068,7 @@ fn emit_value_copy( ) -> Result<(), TranslateError> { let builder = ctx.builder.get(); let type_ = get_llvm_type(ctx, type_)?; - let temp_value = unsafe { LLVMBuildAlloca(builder, type_, LLVM_UNNAMED) }; + let temp_value = emit_alloca(ctx, type_, ctx.constants.private_space, None); unsafe { LLVMBuildStore(builder, src, temp_value) }; ctx.names.register_result(dst, |dst| unsafe { LLVMBuildLoad2(builder, type_, temp_value, dst) @@ -1081,6 +1076,28 @@ fn emit_value_copy( Ok(()) } +// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html): +// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca +// instructions that are in the entry basic block. Given SSA is the canonical form expected by much +// of the optimizer; if allocas can not be eliminated by Mem2Reg or SROA, the optimizer is likely to +// be less effective than it could be." +fn emit_alloca( + ctx: &mut EmitContext, + type_: LLVMTypeRef, + addr_space: u32, + name: Option, +) -> LLVMValueRef { + let builder = ctx.builder.get(); + let current_bb = unsafe { LLVMGetInsertBlock(builder) }; + let variables_bb = unsafe { LLVMGetFirstBasicBlock(LLVMGetBasicBlockParent(current_bb)) }; + unsafe { LLVMPositionBuilderAtEnd(builder, variables_bb) }; + let result = ctx.names.register_result_option(name, |name| unsafe { + LLVMZludaBuildAlloca(builder, type_, addr_space, name) + }); + unsafe { LLVMPositionBuilderAtEnd(builder, current_bb) }; + result +} + fn emit_instruction( ctx: &mut EmitContext, is_kernel: bool, @@ -3494,12 +3511,12 @@ fn emit_store_var( fn emit_label(ctx: &mut EmitContext, label: Id) -> Result<(), TranslateError> { let new_block = unsafe { LLVMValueAsBasicBlock(ctx.names.value(label)?) }; - terminate_current_block_if_needed(ctx, new_block); + terminate_current_block_if_needed(ctx, Some(new_block)); unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) }; Ok(()) } -fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: LLVMBasicBlockRef) { +fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: Option) { let current_block = unsafe { LLVMGetInsertBlock(ctx.builder.get()) }; if current_block == ptr::null_mut() { return; @@ -3508,7 +3525,10 @@ fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: LLVMBasic if terminator != ptr::null_mut() { return; } - unsafe { LLVMBuildBr(ctx.builder.get(), new_block) }; + match new_block { + Some(new_block) => unsafe { LLVMBuildBr(ctx.builder.get(), new_block) }, + None => unsafe { LLVMBuildUnreachable(ctx.builder.get()) }, + }; } fn emit_method_declaration<'input>( diff --git a/ptx/src/test/spirv_run/abs.ll b/ptx/src/test/spirv_run/abs.ll index e086eda..4300790 100644 --- a/ptx/src/test/spirv_run/abs.ll +++ b/ptx/src/test/spirv_run/abs.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"37": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr store i32 %"29", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"14" to ptr - %"39" = getelementptr inbounds i8, ptr %"31", i64 4 - %"32" = load i32, ptr %"39", align 4 + %"38" = getelementptr inbounds i8, ptr %"31", i64 4 + %"32" = load i32, ptr %"38", align 4 store i32 %"32", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"15" = call i32 @llvm.abs.i32(i32 %"16", i1 false) @@ -35,8 +37,8 @@ define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr %"21" = load i64, ptr addrspace(5) %"5", align 8 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"35" = inttoptr i64 %"21" to ptr - %"41" = getelementptr inbounds i8, ptr %"35", i64 4 - store i32 %"22", ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"35", i64 4 + store i32 %"22", ptr %"40", align 4 ret void } diff --git a/ptx/src/test/spirv_run/activemask.ll b/ptx/src/test/spirv_run/activemask.ll index 5ca886c..684f89a 100644 --- a/ptx/src/test/spirv_run/activemask.ll +++ b/ptx/src/test/spirv_run/activemask.ll @@ -4,11 +4,13 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__activemask() #0 define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"11", ptr addrspace(4) byref(i64) %"12") #1 { -"15": %"6" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"6", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"6", align 1 %"7" = load i64, ptr addrspace(4) %"12", align 8 store i64 %"7", ptr addrspace(5) %"4", align 8 %"8" = call i32 @__zluda_ptx_impl__activemask() diff --git a/ptx/src/test/spirv_run/add.ll b/ptx/src/test/spirv_run/add.ll index 6a8ed12..babe5bb 100644 --- a/ptx/src/test/spirv_run/add.ll +++ b/ptx/src/test/spirv_run/add.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/add_global.ll b/ptx/src/test/spirv_run/add_global.ll index 754623c..7034857 100644 --- a/ptx/src/test/spirv_run/add_global.ll +++ b/ptx/src/test/spirv_run/add_global.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" @PI = protected addrspace(1) externally_initialized global float 0x400921FB60000000, align 4 define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 { -"24": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"20", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"21", align 8 diff --git a/ptx/src/test/spirv_run/add_non_coherent.ll b/ptx/src/test/spirv_run/add_non_coherent.ll index ab8d0bc..4d97dad 100644 --- a/ptx/src/test/spirv_run/add_non_coherent.ll +++ b/ptx/src/test/spirv_run/add_non_coherent.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/add_param_ptr.ll b/ptx/src/test/spirv_run/add_param_ptr.ll index 810e9c8..9553fa5 100644 --- a/ptx/src/test/spirv_run/add_param_ptr.ll +++ b/ptx/src/test/spirv_run/add_param_ptr.ll @@ -2,32 +2,34 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { -"38": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"31" = ptrtoint ptr addrspace(4) %"26" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"31", ptr addrspace(5) %0, align 8 - %"30" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"31", ptr addrspace(5) %1, align 8 + %"30" = load i64, ptr addrspace(5) %1, align 8 store i64 %"30", ptr addrspace(5) %"4", align 8 %"33" = ptrtoint ptr addrspace(4) %"27" to i64 - %1 = alloca i64, align 8, addrspace(5) - store i64 %"33", ptr addrspace(5) %1, align 8 - %"32" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"33", ptr addrspace(5) %2, align 8 + %"32" = load i64, ptr addrspace(5) %2, align 8 store i64 %"32", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(5) %"4", align 8 %"34" = inttoptr i64 %"12" to ptr addrspace(4) - %"40" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0 - %"11" = load i64, ptr addrspace(4) %"40", align 8 + %"39" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0 + %"11" = load i64, ptr addrspace(4) %"39", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(5) %"5", align 8 %"35" = inttoptr i64 %"14" to ptr addrspace(4) - %"42" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0 - %"13" = load i64, ptr addrspace(4) %"42", align 8 + %"41" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0 + %"13" = load i64, ptr addrspace(4) %"41", align 8 store i64 %"13", ptr addrspace(5) %"5", align 8 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"36" = inttoptr i64 %"16" to ptr diff --git a/ptx/src/test/spirv_run/add_tuning.ll b/ptx/src/test/spirv_run/add_tuning.ll index 9ec6795..ac2972c 100644 --- a/ptx/src/test/spirv_run/add_tuning.ll +++ b/ptx/src/test/spirv_run/add_tuning.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/addc_cc.ll b/ptx/src/test/spirv_run/addc_cc.ll index 3299982..d781744 100644 --- a/ptx/src/test/spirv_run/addc_cc.ll +++ b/ptx/src/test/spirv_run/addc_cc.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 { -"68": %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,6 +12,10 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"13", align 1 %"14" = load i64, ptr addrspace(4) %"53", align 8 store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"54", align 8 @@ -24,45 +26,45 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", store i32 %"55", ptr addrspace(5) %"9", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"57" = inttoptr i64 %"19" to ptr - %"70" = getelementptr inbounds i8, ptr %"57", i64 4 - %"58" = load i32, ptr %"70", align 4 + %"69" = getelementptr inbounds i8, ptr %"57", i64 4 + %"58" = load i32, ptr %"69", align 4 store i32 %"58", ptr addrspace(5) %"10", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"59" = inttoptr i64 %"21" to ptr - %"72" = getelementptr inbounds i8, ptr %"59", i64 8 - %"20" = load i32, ptr %"72", align 4 + %"71" = getelementptr inbounds i8, ptr %"59", i64 8 + %"20" = load i32, ptr %"71", align 4 store i32 %"20", ptr addrspace(5) %"11", align 4 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"60" = inttoptr i64 %"23" to ptr - %"74" = getelementptr inbounds i8, ptr %"60", i64 12 - %"22" = load i32, ptr %"74", align 4 + %"73" = getelementptr inbounds i8, ptr %"60", i64 12 + %"22" = load i32, ptr %"73", align 4 store i32 %"22", ptr addrspace(5) %"12", align 4 %"26" = load i32, ptr addrspace(5) %"9", align 4 %"27" = load i32, ptr addrspace(5) %"10", align 4 - %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27") - %"24" = extractvalue { i32, i1 } %0, 0 - %"25" = extractvalue { i32, i1 } %0, 1 + %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27") + %"24" = extractvalue { i32, i1 } %2, 0 + %"25" = extractvalue { i32, i1 } %2, 1 store i32 %"24", ptr addrspace(5) %"6", align 4 store i1 %"25", ptr addrspace(5) %"13", align 1 %"30" = load i1, ptr addrspace(5) %"13", align 1 %"31" = load i32, ptr addrspace(5) %"6", align 4 %"32" = load i32, ptr addrspace(5) %"11", align 4 - %1 = zext i1 %"30" to i32 - %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32") - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"28" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"29" = xor i1 %4, %6 + %3 = zext i1 %"30" to i32 + %4 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32") + %5 = extractvalue { i32, i1 } %4, 0 + %6 = extractvalue { i32, i1 } %4, 1 + %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %5, i32 %3) + %"28" = extractvalue { i32, i1 } %7, 0 + %8 = extractvalue { i32, i1 } %7, 1 + %"29" = xor i1 %6, %8 store i32 %"28", ptr addrspace(5) %"7", align 4 store i1 %"29", ptr addrspace(5) %"13", align 1 %"34" = load i1, ptr addrspace(5) %"13", align 1 %"35" = load i32, ptr addrspace(5) %"7", align 4 %"36" = load i32, ptr addrspace(5) %"12", align 4 - %7 = zext i1 %"34" to i32 - %8 = add i32 %"35", %"36" - %"33" = add i32 %8, %7 + %9 = zext i1 %"34" to i32 + %10 = add i32 %"35", %"36" + %"33" = add i32 %10, %9 store i32 %"33", ptr addrspace(5) %"8", align 4 %"37" = load i64, ptr addrspace(5) %"5", align 8 %"38" = load i32, ptr addrspace(5) %"6", align 4 @@ -71,13 +73,13 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", %"39" = load i64, ptr addrspace(5) %"5", align 8 %"40" = load i32, ptr addrspace(5) %"7", align 4 %"66" = inttoptr i64 %"39" to ptr - %"76" = getelementptr inbounds i8, ptr %"66", i64 4 - store i32 %"40", ptr %"76", align 4 + %"75" = getelementptr inbounds i8, ptr %"66", i64 4 + store i32 %"40", ptr %"75", align 4 %"41" = load i64, ptr addrspace(5) %"5", align 8 %"42" = load i32, ptr addrspace(5) %"8", align 4 %"67" = inttoptr i64 %"41" to ptr - %"78" = getelementptr inbounds i8, ptr %"67", i64 8 - store i32 %"42", ptr %"78", align 4 + %"77" = getelementptr inbounds i8, ptr %"67", i64 8 + store i32 %"42", ptr %"77", align 4 ret void } diff --git a/ptx/src/test/spirv_run/addc_cc2.ll b/ptx/src/test/spirv_run/addc_cc2.ll index 836d8d5..cd06ea2 100644 --- a/ptx/src/test/spirv_run/addc_cc2.ll +++ b/ptx/src/test/spirv_run/addc_cc2.ll @@ -2,50 +2,52 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { -"50": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"40", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) - %"41" = extractvalue { i32, i1 } %0, 0 - %"12" = extractvalue { i32, i1 } %0, 1 + %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"41" = extractvalue { i32, i1 } %2, 0 + %"12" = extractvalue { i32, i1 } %2, 1 store i32 %"41", ptr addrspace(5) %"6", align 4 store i1 %"12", ptr addrspace(5) %"9", align 1 %"15" = load i1, ptr addrspace(5) %"9", align 1 - %1 = zext i1 %"15" to i32 - %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4) - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"42" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"14" = xor i1 %4, %6 + %3 = zext i1 %"15" to i32 + %4 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4) + %5 = extractvalue { i32, i1 } %4, 0 + %6 = extractvalue { i32, i1 } %4, 1 + %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %5, i32 %3) + %"42" = extractvalue { i32, i1 } %7, 0 + %8 = extractvalue { i32, i1 } %7, 1 + %"14" = xor i1 %6, %8 store i32 %"42", ptr addrspace(5) %"6", align 4 store i1 %"14", ptr addrspace(5) %"9", align 1 %"17" = load i1, ptr addrspace(5) %"9", align 1 - %7 = zext i1 %"17" to i32 - %"43" = add i32 0, %7 + %9 = zext i1 %"17" to i32 + %"43" = add i32 0, %9 store i32 %"43", ptr addrspace(5) %"7", align 4 %"20" = load i1, ptr addrspace(5) %"9", align 1 - %8 = zext i1 %"20" to i32 - %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1) - %10 = extractvalue { i32, i1 } %9, 0 - %11 = extractvalue { i32, i1 } %9, 1 - %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %10, i32 %8) - %"44" = extractvalue { i32, i1 } %12, 0 - %13 = extractvalue { i32, i1 } %12, 1 - %"19" = xor i1 %11, %13 + %10 = zext i1 %"20" to i32 + %11 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1) + %12 = extractvalue { i32, i1 } %11, 0 + %13 = extractvalue { i32, i1 } %11, 1 + %14 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %12, i32 %10) + %"44" = extractvalue { i32, i1 } %14, 0 + %15 = extractvalue { i32, i1 } %14, 1 + %"19" = xor i1 %13, %15 store i32 %"44", ptr addrspace(5) %"6", align 4 store i1 %"19", ptr addrspace(5) %"9", align 1 %"22" = load i1, ptr addrspace(5) %"9", align 1 - %14 = zext i1 %"22" to i32 - %"45" = add i32 0, %14 + %16 = zext i1 %"22" to i32 + %"45" = add i32 0, %16 store i32 %"45", ptr addrspace(5) %"8", align 4 %"23" = load i64, ptr addrspace(5) %"5", align 8 %"24" = load i32, ptr addrspace(5) %"7", align 4 @@ -54,8 +56,8 @@ define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", %"25" = load i64, ptr addrspace(5) %"5", align 8 %"26" = load i32, ptr addrspace(5) %"8", align 4 %"48" = inttoptr i64 %"25" to ptr - %"52" = getelementptr inbounds i8, ptr %"48", i64 4 - store i32 %"26", ptr %"52", align 4 + %"51" = getelementptr inbounds i8, ptr %"48", i64 4 + store i32 %"26", ptr %"51", align 4 ret void } diff --git a/ptx/src/test/spirv_run/alloca_call.ll b/ptx/src/test/spirv_run/alloca_call.ll index e6a9d6f..aae7a91 100644 --- a/ptx/src/test/spirv_run/alloca_call.ll +++ b/ptx/src/test/spirv_run/alloca_call.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { -"58": %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"7" = alloca i1, align 1, addrspace(5) %"8" = alloca double, align 8, addrspace(5) %"9" = alloca double, align 8, addrspace(5) @@ -14,6 +12,10 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr %"13" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5) %"48" = alloca [4 x i32], align 16, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"22", align 1 %"50" = load i64, ptr addrspace(4) %"42", align 8 store i64 %"50", ptr addrspace(5) %"10", align 8 %"51" = load i64, ptr addrspace(4) %"43", align 8 @@ -29,30 +31,30 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr %"30" = load i1, ptr addrspace(5) %"7", align 1 br i1 %"30", label %"6", label %"18" -"18": ; preds = %"58" +"18": ; preds = %1 %"31" = load i64, ptr addrspace(5) %"11", align 8 - %"60" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0 - store i64 %"31", ptr addrspace(5) %"60", align 8 + %"59" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0 + store i64 %"31", ptr addrspace(5) %"59", align 8 %"32" = load i64, ptr addrspace(5) %"11", align 8 - %0 = inttoptr i64 %"32" to ptr - %"21" = call [4 x i32] %0() + %2 = inttoptr i64 %"32" to ptr + %"21" = call [4 x i32] %2() store [4 x i32] %"21", ptr addrspace(5) %"48", align 4 - %"62" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0 - %"19" = load <2 x double>, ptr addrspace(5) %"62", align 16 + %"61" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0 + %"19" = load <2 x double>, ptr addrspace(5) %"61", align 16 %"33" = extractelement <2 x double> %"19", i32 0 %"34" = extractelement <2 x double> %"19", i32 1 store double %"33", ptr addrspace(5) %"8", align 8 store double %"34", ptr addrspace(5) %"9", align 8 %"35" = load double, ptr addrspace(5) %"8", align 8 %"36" = load double, ptr addrspace(5) %"9", align 8 - %1 = insertelement <2 x double> undef, double %"35", i32 0 - %"20" = insertelement <2 x double> %1, double %"36", i32 1 + %3 = insertelement <2 x double> undef, double %"35", i32 0 + %"20" = insertelement <2 x double> %3, double %"36", i32 1 %"37" = load i64, ptr addrspace(5) %"10", align 8 %"57" = inttoptr i64 %"37" to ptr addrspace(1) store <2 x double> %"20", ptr addrspace(1) %"57", align 16 br label %"6" -"6": ; preds = %"18", %"58" +"6": ; preds = %"18", %1 ret void } diff --git a/ptx/src/test/spirv_run/amdgpu_unnamed.ll b/ptx/src/test/spirv_run/amdgpu_unnamed.ll index 61e3de4..1a1ce58 100644 --- a/ptx/src/test/spirv_run/amdgpu_unnamed.ll +++ b/ptx/src/test/spirv_run/amdgpu_unnamed.ll @@ -8,9 +8,7 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0 define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 { -"73": %"33" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"33", align 1 %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) @@ -19,10 +17,17 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"19" = alloca i64, align 8, addrspace(5) %"20" = alloca i32, align 4, addrspace(5) %"59" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) %"60" = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) %"61" = alloca i32, align 4, addrspace(5) %"62" = alloca i64, align 8, addrspace(5) + %3 = alloca i64, align 8, addrspace(5) %"63" = alloca i64, align 8, addrspace(5) + br label %4 + +4: ; preds = %0 + store i1 false, ptr addrspace(5) %"33", align 1 %"34" = load i64, ptr addrspace(4) %"57", align 8 store i64 %"34", ptr addrspace(5) %"14", align 8 %"35" = load i64, ptr addrspace(4) %"58", align 8 @@ -37,28 +42,25 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"40" = load i1, ptr addrspace(5) %"18", align 1 br i1 %"40", label %"13", label %"27" -"27": ; preds = %"73" - %0 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %0, align 8 - %"66" = load i64, ptr addrspace(5) %0, align 8 +"27": ; preds = %4 + store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %1, align 8 + %"66" = load i64, ptr addrspace(5) %1, align 8 store i64 %"66", ptr addrspace(5) %"19", align 8 %"42" = load i64, ptr addrspace(5) %"19", align 8 store i64 %"42", ptr addrspace(5) %"59", align 8 - %1 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %1, align 8 - %"68" = load i64, ptr addrspace(5) %1, align 8 + store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %2, align 8 + %"68" = load i64, ptr addrspace(5) %2, align 8 store i64 %"68", ptr addrspace(5) %"19", align 8 %"44" = load i64, ptr addrspace(5) %"19", align 8 store i64 %"44", ptr addrspace(5) %"60", align 8 store i32 1, ptr addrspace(5) %"61", align 4 - %2 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %2, align 8 - %"70" = load i64, ptr addrspace(5) %2, align 8 + store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %3, align 8 + %"70" = load i64, ptr addrspace(5) %3, align 8 store i64 %"70", ptr addrspace(5) %"19", align 8 %"46" = load i64, ptr addrspace(5) %"19", align 8 store i64 %"46", ptr addrspace(5) %"62", align 8 - %"75" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0 - store i64 1, ptr addrspace(5) %"75", align 8 + %"74" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0 + store i64 1, ptr addrspace(5) %"74", align 8 %"28" = load i64, ptr addrspace(5) %"59", align 8 %"29" = load i64, ptr addrspace(5) %"60", align 8 %"30" = load i32, ptr addrspace(5) %"61", align 4 @@ -67,7 +69,7 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) call void @__zluda_ptx_impl____assertfail(i64 %"28", i64 %"29", i32 %"30", i64 %"31", i64 %"32") br label %"13" -"13": ; preds = %"27", %"73" +"13": ; preds = %"27", %4 %"48" = load i64, ptr addrspace(5) %"16", align 8 %"47" = add i64 %"48", 1 store i64 %"47", ptr addrspace(5) %"17", align 8 diff --git a/ptx/src/test/spirv_run/and.ll b/ptx/src/test/spirv_run/and.ll index c90f390..7bb262d 100644 --- a/ptx/src/test/spirv_run/and.ll +++ b/ptx/src/test/spirv_run/and.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"30": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"32" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"32", align 4 + %"31" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"31", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/assertfail.ll b/ptx/src/test/spirv_run/assertfail.ll index 001dbfe..9334859 100644 --- a/ptx/src/test/spirv_run/assertfail.ll +++ b/ptx/src/test/spirv_run/assertfail.ll @@ -4,42 +4,44 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0 define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"62", ptr addrspace(4) byref(i64) %"63") #1 { -"81": %"35" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"35", align 1 %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"18" = alloca i64, align 8, addrspace(5) %"19" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) %"64" = alloca i64, align 8, addrspace(5) %"66" = alloca i64, align 8, addrspace(5) %"68" = alloca i32, align 4, addrspace(5) %"70" = alloca i64, align 8, addrspace(5) %"72" = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"35", align 1 %"36" = load i64, ptr addrspace(4) %"62", align 8 store i64 %"36", ptr addrspace(5) %"15", align 8 %"37" = load i64, ptr addrspace(4) %"63", align 8 store i64 %"37", ptr addrspace(5) %"16", align 8 - %0 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %0, align 4 - %"74" = load i32, ptr addrspace(5) %0, align 4 + store i32 0, ptr addrspace(5) %1, align 4 + %"74" = load i32, ptr addrspace(5) %1, align 4 store i32 %"74", ptr addrspace(5) %"19", align 4 %"39" = load i64, ptr addrspace(5) %"15", align 8 - %"83" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0 - store i64 %"39", ptr addrspace(5) %"83", align 8 + %"82" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0 + store i64 %"39", ptr addrspace(5) %"82", align 8 %"40" = load i64, ptr addrspace(5) %"15", align 8 - %"85" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0 - store i64 %"40", ptr addrspace(5) %"85", align 8 + %"84" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0 + store i64 %"40", ptr addrspace(5) %"84", align 8 %"41" = load i32, ptr addrspace(5) %"19", align 4 - %"87" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0 - store i32 %"41", ptr addrspace(5) %"87", align 4 + %"86" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0 + store i32 %"41", ptr addrspace(5) %"86", align 4 %"42" = load i64, ptr addrspace(5) %"15", align 8 - %"89" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0 - store i64 %"42", ptr addrspace(5) %"89", align 8 + %"88" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0 + store i64 %"42", ptr addrspace(5) %"88", align 8 %"43" = load i64, ptr addrspace(5) %"15", align 8 - %"91" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0 - store i64 %"43", ptr addrspace(5) %"91", align 8 + %"90" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0 + store i64 %"43", ptr addrspace(5) %"90", align 8 %"30" = load i64, ptr addrspace(5) %"64", align 8 %"31" = load i64, ptr addrspace(5) %"66", align 8 %"32" = load i32, ptr addrspace(5) %"68", align 4 diff --git a/ptx/src/test/spirv_run/atom_add.ll b/ptx/src/test/spirv_run/atom_add.ll index dff9e0e..6dd159f 100644 --- a/ptx/src/test/spirv_run/atom_add.ll +++ b/ptx/src/test/spirv_run/atom_add.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"37": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -21,8 +23,8 @@ define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", store i32 %"12", ptr addrspace(5) %"7", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"31" = inttoptr i64 %"15" to ptr - %"39" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load i32, ptr %"39", align 4 + %"38" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"38", align 4 store i32 %"14", ptr addrspace(5) %"8", align 4 %"16" = load i32, ptr addrspace(5) %"7", align 4 store i32 %"16", ptr addrspace(3) @"4", align 4 @@ -38,8 +40,8 @@ define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load i32, ptr addrspace(5) %"8", align 4 %"36" = inttoptr i64 %"22" to ptr - %"41" = getelementptr inbounds i8, ptr %"36", i64 4 - store i32 %"23", ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"36", i64 4 + store i32 %"23", ptr %"40", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_add_f16.ll b/ptx/src/test/spirv_run/atom_add_f16.ll index e63de90..a8fa430 100644 --- a/ptx/src/test/spirv_run/atom_add_f16.ll +++ b/ptx/src/test/spirv_run/atom_add_f16.ll @@ -4,20 +4,22 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { -"37": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca half, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"26", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"10", ptr addrspace(5) %"6", align 8 %"12" = load i64, ptr addrspace(5) %"5", align 8 %"28" = inttoptr i64 %"12" to ptr - %"39" = getelementptr inbounds i8, ptr %"28", i64 2 - %"29" = load i16, ptr %"39", align 2 + %"38" = getelementptr inbounds i8, ptr %"28", i64 2 + %"29" = load i16, ptr %"38", align 2 %"11" = bitcast i16 %"29" to half store half %"11", ptr addrspace(5) %"7", align 2 %"14" = load i64, ptr addrspace(5) %"5", align 8 @@ -38,9 +40,9 @@ define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %" %"20" = load i64, ptr addrspace(5) %"6", align 8 %"21" = load half, ptr addrspace(5) %"7", align 2 %"35" = inttoptr i64 %"20" to ptr - %"41" = getelementptr inbounds i8, ptr %"35", i64 2 + %"40" = getelementptr inbounds i8, ptr %"35", i64 2 %"36" = bitcast half %"21" to i16 - store i16 %"36", ptr %"41", align 2 + store i16 %"36", ptr %"40", align 2 ret void } diff --git a/ptx/src/test/spirv_run/atom_add_float.ll b/ptx/src/test/spirv_run/atom_add_float.ll index 329d198..d0e3c14 100644 --- a/ptx/src/test/spirv_run/atom_add_float.ll +++ b/ptx/src/test/spirv_run/atom_add_float.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"37": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -21,8 +23,8 @@ define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) store float %"12", ptr addrspace(5) %"7", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"31" = inttoptr i64 %"15" to ptr - %"39" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load float, ptr %"39", align 4 + %"38" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load float, ptr %"38", align 4 store float %"14", ptr addrspace(5) %"8", align 4 %"16" = load float, ptr addrspace(5) %"7", align 4 store float %"16", ptr addrspace(3) @"4", align 4 @@ -38,8 +40,8 @@ define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load float, ptr addrspace(5) %"8", align 4 %"36" = inttoptr i64 %"22" to ptr - %"41" = getelementptr inbounds i8, ptr %"36", i64 4 - store float %"23", ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"36", i64 4 + store float %"23", ptr %"40", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_cas.ll b/ptx/src/test/spirv_run/atom_cas.ll index 2e0475a..a9af2c4 100644 --- a/ptx/src/test/spirv_run/atom_cas.ll +++ b/ptx/src/test/spirv_run/atom_cas.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"38": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"29", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"30", align 8 @@ -20,14 +22,14 @@ define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", %"14" = load i64, ptr addrspace(5) %"4", align 8 %"15" = load i32, ptr addrspace(5) %"6", align 4 %"32" = inttoptr i64 %"14" to ptr - %"40" = getelementptr inbounds i8, ptr %"32", i64 4 - %0 = cmpxchg ptr %"40", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 - %"33" = extractvalue { i32, i1 } %0, 0 + %"39" = getelementptr inbounds i8, ptr %"32", i64 4 + %2 = cmpxchg ptr %"39", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 + %"33" = extractvalue { i32, i1 } %2, 0 store i32 %"33", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"35" = inttoptr i64 %"17" to ptr - %"42" = getelementptr inbounds i8, ptr %"35", i64 4 - %"16" = load i32, ptr %"42", align 4 + %"41" = getelementptr inbounds i8, ptr %"35", i64 4 + %"16" = load i32, ptr %"41", align 4 store i32 %"16", ptr addrspace(5) %"7", align 4 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i32, ptr addrspace(5) %"6", align 4 @@ -36,8 +38,8 @@ define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load i32, ptr addrspace(5) %"7", align 4 %"37" = inttoptr i64 %"20" to ptr - %"44" = getelementptr inbounds i8, ptr %"37", i64 4 - store i32 %"21", ptr %"44", align 4 + %"43" = getelementptr inbounds i8, ptr %"37", i64 4 + store i32 %"21", ptr %"43", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_inc.ll b/ptx/src/test/spirv_run/atom_inc.ll index 6fdc3c7..212c592 100644 --- a/ptx/src/test/spirv_run/atom_inc.ll +++ b/ptx/src/test/spirv_run/atom_inc.ll @@ -6,14 +6,16 @@ declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0 declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1), i32) #0 define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #1 { -"38": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"30", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"31", align 8 @@ -37,13 +39,13 @@ define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load i32, ptr addrspace(5) %"7", align 4 %"36" = inttoptr i64 %"20" to ptr - %"48" = getelementptr inbounds i8, ptr %"36", i64 4 - store i32 %"21", ptr %"48", align 4 + %"47" = getelementptr inbounds i8, ptr %"36", i64 4 + store i32 %"21", ptr %"47", align 4 %"22" = load i64, ptr addrspace(5) %"5", align 8 %"23" = load i32, ptr addrspace(5) %"8", align 4 %"37" = inttoptr i64 %"22" to ptr - %"50" = getelementptr inbounds i8, ptr %"37", i64 8 - store i32 %"23", ptr %"50", align 4 + %"49" = getelementptr inbounds i8, ptr %"37", i64 8 + store i32 %"23", ptr %"49", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_ld_st.ll b/ptx/src/test/spirv_run/atom_ld_st.ll index 3b6488c..eb59d31 100644 --- a/ptx/src/test/spirv_run/atom_ld_st.ll +++ b/ptx/src/test/spirv_run/atom_ld_st.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { -"18": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"14", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 diff --git a/ptx/src/test/spirv_run/atom_ld_st_vec.ll b/ptx/src/test/spirv_run/atom_ld_st_vec.ll index 7ea0fc5..5fa2409 100644 --- a/ptx/src/test/spirv_run/atom_ld_st_vec.ll +++ b/ptx/src/test/spirv_run/atom_ld_st_vec.ll @@ -2,33 +2,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"19", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"20", align 8 store i64 %"12", ptr addrspace(5) %"5", align 8 %"13" = load i64, ptr addrspace(5) %"4", align 8 %"21" = inttoptr i64 %"13" to ptr - %0 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16 - %"8" = bitcast i128 %0 to <2 x i64> + %2 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16 + %"8" = bitcast i128 %2 to <2 x i64> %"14" = extractelement <2 x i64> %"8", i32 0 %"15" = extractelement <2 x i64> %"8", i32 1 store i64 %"14", ptr addrspace(5) %"6", align 8 store i64 %"15", ptr addrspace(5) %"7", align 8 %"16" = load i64, ptr addrspace(5) %"6", align 8 %"17" = load i64, ptr addrspace(5) %"7", align 8 - %1 = insertelement <2 x i64> undef, i64 %"16", i32 0 - %"9" = insertelement <2 x i64> %1, i64 %"17", i32 1 + %3 = insertelement <2 x i64> undef, i64 %"16", i32 0 + %"9" = insertelement <2 x i64> %3, i64 %"17", i32 1 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"22" = inttoptr i64 %"18" to ptr - %2 = bitcast <2 x i64> %"9" to i128 - store atomic i128 %2, ptr %"22" syncscope("agent-one-as") release, align 16 + %4 = bitcast <2 x i64> %"9" to i128 + store atomic i128 %4, ptr %"22" syncscope("agent-one-as") release, align 16 ret void } diff --git a/ptx/src/test/spirv_run/atom_max_u32.ll b/ptx/src/test/spirv_run/atom_max_u32.ll index 64cb430..8135e3d 100644 --- a/ptx/src/test/spirv_run/atom_max_u32.ll +++ b/ptx/src/test/spirv_run/atom_max_u32.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"30": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -23,8 +25,8 @@ define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %" store i32 %"14", ptr %"25", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"16" to ptr - %"32" = getelementptr inbounds i8, ptr %"26", i64 4 - %"15" = load i32, ptr %"32", align 4 + %"31" = getelementptr inbounds i8, ptr %"26", i64 4 + %"15" = load i32, ptr %"31", align 4 store i32 %"15", ptr addrspace(5) %"7", align 4 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/b64tof64.ll b/ptx/src/test/spirv_run/b64tof64.ll index 5cd7a2c..4a8d9b3 100644 --- a/ptx/src/test/spirv_run/b64tof64.ll +++ b/ptx/src/test/spirv_run/b64tof64.ll @@ -2,22 +2,24 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca double, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load double, ptr addrspace(4) %"17", align 8 store double %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"10", ptr addrspace(5) %"6", align 8 %"12" = load double, ptr addrspace(5) %"4", align 8 %"20" = bitcast double %"12" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"20", ptr addrspace(5) %0, align 8 - %"11" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"20", ptr addrspace(5) %1, align 8 + %"11" = load i64, ptr addrspace(5) %1, align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"14" = load i64, ptr addrspace(5) %"5", align 8 %"21" = inttoptr i64 %"14" to ptr diff --git a/ptx/src/test/spirv_run/barrier.ll b/ptx/src/test/spirv_run/barrier.ll index e2e65f2..55d0c93 100644 --- a/ptx/src/test/spirv_run/barrier.ll +++ b/ptx/src/test/spirv_run/barrier.ll @@ -4,8 +4,10 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl__barrier_sync(i32) #0 define protected amdgpu_kernel void @barrier() #1 { -"4": %"2" = alloca i1, align 1, addrspace(5) + br label %1 + +1: ; preds = %0 store i1 false, ptr addrspace(5) %"2", align 1 call void @__zluda_ptx_impl__barrier_sync(i32 0) ret void diff --git a/ptx/src/test/spirv_run/bfe.ll b/ptx/src/test/spirv_run/bfe.ll index 99fd766..6644c20 100644 --- a/ptx/src/test/spirv_run/bfe.ll +++ b/ptx/src/test/spirv_run/bfe.ll @@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__bfe_u32(i32, i32, i32) #0 define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { -"34": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -22,13 +24,13 @@ define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"15" to ptr - %"41" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load i32, ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"40", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"32" = inttoptr i64 %"17" to ptr - %"43" = getelementptr inbounds i8, ptr %"32", i64 8 - %"16" = load i32, ptr %"43", align 4 + %"42" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load i32, ptr %"42", align 4 store i32 %"16", ptr addrspace(5) %"8", align 4 %"19" = load i32, ptr addrspace(5) %"6", align 4 %"20" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/bfi.ll b/ptx/src/test/spirv_run/bfi.ll index bea4ac5..3c6a377 100644 --- a/ptx/src/test/spirv_run/bfi.ll +++ b/ptx/src/test/spirv_run/bfi.ll @@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__bfi_b32(i32, i32, i32, i32) #0 define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 { -"44": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"35", align 8 @@ -23,18 +25,18 @@ define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr store i32 %"13", ptr addrspace(5) %"6", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"37" = inttoptr i64 %"16" to ptr - %"52" = getelementptr inbounds i8, ptr %"37", i64 4 - %"15" = load i32, ptr %"52", align 4 + %"51" = getelementptr inbounds i8, ptr %"37", i64 4 + %"15" = load i32, ptr %"51", align 4 store i32 %"15", ptr addrspace(5) %"7", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"38" = inttoptr i64 %"18" to ptr - %"54" = getelementptr inbounds i8, ptr %"38", i64 8 - %"17" = load i32, ptr %"54", align 4 + %"53" = getelementptr inbounds i8, ptr %"38", i64 8 + %"17" = load i32, ptr %"53", align 4 store i32 %"17", ptr addrspace(5) %"8", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"39" = inttoptr i64 %"20" to ptr - %"56" = getelementptr inbounds i8, ptr %"39", i64 12 - %"19" = load i32, ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"39", i64 12 + %"19" = load i32, ptr %"55", align 4 store i32 %"19", ptr addrspace(5) %"9", align 4 %"22" = load i32, ptr addrspace(5) %"6", align 4 %"23" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/bfind.ll b/ptx/src/test/spirv_run/bfind.ll index ebd9fea..a427332 100644 --- a/ptx/src/test/spirv_run/bfind.ll +++ b/ptx/src/test/spirv_run/bfind.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { -"52": %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -13,6 +11,10 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"12", align 1 %"13" = load i64, ptr addrspace(4) %"41", align 8 store i64 %"13", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(4) %"42", align 8 @@ -23,31 +25,31 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt store i32 %"15", ptr addrspace(5) %"6", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"18" to ptr - %"54" = getelementptr inbounds i8, ptr %"44", i64 4 - %"17" = load i32, ptr %"54", align 4 + %"53" = getelementptr inbounds i8, ptr %"44", i64 4 + %"17" = load i32, ptr %"53", align 4 store i32 %"17", ptr addrspace(5) %"7", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"45" = inttoptr i64 %"20" to ptr - %"56" = getelementptr inbounds i8, ptr %"45", i64 8 - %"19" = load i32, ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"45", i64 8 + %"19" = load i32, ptr %"55", align 4 store i32 %"19", ptr addrspace(5) %"8", align 4 %"22" = load i32, ptr addrspace(5) %"6", align 4 - %0 = icmp eq i32 %"22", 0 - %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) - %2 = sub i32 31, %1 - %"46" = select i1 %0, i32 -1, i32 %2 + %2 = icmp eq i32 %"22", 0 + %3 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) + %4 = sub i32 31, %3 + %"46" = select i1 %2, i32 -1, i32 %4 store i32 %"46", ptr addrspace(5) %"9", align 4 %"24" = load i32, ptr addrspace(5) %"7", align 4 - %3 = icmp eq i32 %"24", 0 - %4 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) - %5 = sub i32 31, %4 - %"47" = select i1 %3, i32 -1, i32 %5 + %5 = icmp eq i32 %"24", 0 + %6 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) + %7 = sub i32 31, %6 + %"47" = select i1 %5, i32 -1, i32 %7 store i32 %"47", ptr addrspace(5) %"10", align 4 %"26" = load i32, ptr addrspace(5) %"8", align 4 - %6 = icmp eq i32 %"26", 0 - %7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) - %8 = sub i32 31, %7 - %"48" = select i1 %6, i32 -1, i32 %8 + %8 = icmp eq i32 %"26", 0 + %9 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) + %10 = sub i32 31, %9 + %"48" = select i1 %8, i32 -1, i32 %10 store i32 %"48", ptr addrspace(5) %"11", align 4 %"27" = load i64, ptr addrspace(5) %"5", align 8 %"28" = load i32, ptr addrspace(5) %"9", align 4 @@ -56,13 +58,13 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt %"29" = load i64, ptr addrspace(5) %"5", align 8 %"30" = load i32, ptr addrspace(5) %"10", align 4 %"50" = inttoptr i64 %"29" to ptr - %"58" = getelementptr inbounds i8, ptr %"50", i64 4 - store i32 %"30", ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"50", i64 4 + store i32 %"30", ptr %"57", align 4 %"31" = load i64, ptr addrspace(5) %"5", align 8 %"32" = load i32, ptr addrspace(5) %"11", align 4 %"51" = inttoptr i64 %"31" to ptr - %"60" = getelementptr inbounds i8, ptr %"51", i64 8 - store i32 %"32", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"51", i64 8 + store i32 %"32", ptr %"59", align 4 ret void } diff --git a/ptx/src/test/spirv_run/bfind_shiftamt.ll b/ptx/src/test/spirv_run/bfind_shiftamt.ll index fd21514..9968d85 100644 --- a/ptx/src/test/spirv_run/bfind_shiftamt.ll +++ b/ptx/src/test/spirv_run/bfind_shiftamt.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { -"52": %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -13,6 +11,10 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"12", align 1 %"13" = load i64, ptr addrspace(4) %"41", align 8 store i64 %"13", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(4) %"42", align 8 @@ -23,28 +25,28 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) store i32 %"15", ptr addrspace(5) %"6", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"18" to ptr - %"54" = getelementptr inbounds i8, ptr %"44", i64 4 - %"17" = load i32, ptr %"54", align 4 + %"53" = getelementptr inbounds i8, ptr %"44", i64 4 + %"17" = load i32, ptr %"53", align 4 store i32 %"17", ptr addrspace(5) %"7", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"45" = inttoptr i64 %"20" to ptr - %"56" = getelementptr inbounds i8, ptr %"45", i64 8 - %"19" = load i32, ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"45", i64 8 + %"19" = load i32, ptr %"55", align 4 store i32 %"19", ptr addrspace(5) %"8", align 4 %"22" = load i32, ptr addrspace(5) %"6", align 4 - %0 = icmp eq i32 %"22", 0 - %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) - %"46" = select i1 %0, i32 -1, i32 %1 + %2 = icmp eq i32 %"22", 0 + %3 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) + %"46" = select i1 %2, i32 -1, i32 %3 store i32 %"46", ptr addrspace(5) %"9", align 4 %"24" = load i32, ptr addrspace(5) %"7", align 4 - %2 = icmp eq i32 %"24", 0 - %3 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) - %"47" = select i1 %2, i32 -1, i32 %3 + %4 = icmp eq i32 %"24", 0 + %5 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) + %"47" = select i1 %4, i32 -1, i32 %5 store i32 %"47", ptr addrspace(5) %"10", align 4 %"26" = load i32, ptr addrspace(5) %"8", align 4 - %4 = icmp eq i32 %"26", 0 - %5 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) - %"48" = select i1 %4, i32 -1, i32 %5 + %6 = icmp eq i32 %"26", 0 + %7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) + %"48" = select i1 %6, i32 -1, i32 %7 store i32 %"48", ptr addrspace(5) %"11", align 4 %"27" = load i64, ptr addrspace(5) %"5", align 8 %"28" = load i32, ptr addrspace(5) %"9", align 4 @@ -53,13 +55,13 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"29" = load i64, ptr addrspace(5) %"5", align 8 %"30" = load i32, ptr addrspace(5) %"10", align 4 %"50" = inttoptr i64 %"29" to ptr - %"58" = getelementptr inbounds i8, ptr %"50", i64 4 - store i32 %"30", ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"50", i64 4 + store i32 %"30", ptr %"57", align 4 %"31" = load i64, ptr addrspace(5) %"5", align 8 %"32" = load i32, ptr addrspace(5) %"11", align 4 %"51" = inttoptr i64 %"31" to ptr - %"60" = getelementptr inbounds i8, ptr %"51", i64 8 - store i32 %"32", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"51", i64 8 + store i32 %"32", ptr %"59", align 4 ret void } diff --git a/ptx/src/test/spirv_run/block.ll b/ptx/src/test/spirv_run/block.ll index 87dd227..b482fe2 100644 --- a/ptx/src/test/spirv_run/block.ll +++ b/ptx/src/test/spirv_run/block.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"26": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 diff --git a/ptx/src/test/spirv_run/bra.ll b/ptx/src/test/spirv_run/bra.ll index 6d62cca..4173392 100644 --- a/ptx/src/test/spirv_run/bra.ll +++ b/ptx/src/test/spirv_run/bra.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"28": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"24", align 8 store i64 %"12", ptr addrspace(5) %"7", align 8 %"13" = load i64, ptr addrspace(4) %"25", align 8 @@ -19,19 +21,19 @@ define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr store i64 %"14", ptr addrspace(5) %"9", align 8 br label %"4" -"4": ; preds = %"28" +"4": ; preds = %1 %"17" = load i64, ptr addrspace(5) %"9", align 8 %"16" = add i64 %"17", 1 store i64 %"16", ptr addrspace(5) %"10", align 8 br label %"6" -0: ; No predecessors! +"5": ; No predecessors! %"19" = load i64, ptr addrspace(5) %"9", align 8 %"18" = add i64 %"19", 2 store i64 %"18", ptr addrspace(5) %"10", align 8 br label %"6" -"6": ; preds = %0, %"4" +"6": ; preds = %"5", %"4" %"20" = load i64, ptr addrspace(5) %"8", align 8 %"21" = load i64, ptr addrspace(5) %"10", align 8 %"27" = inttoptr i64 %"20" to ptr diff --git a/ptx/src/test/spirv_run/brev.ll b/ptx/src/test/spirv_run/brev.ll index a519c2b..d838750 100644 --- a/ptx/src/test/spirv_run/brev.ll +++ b/ptx/src/test/spirv_run/brev.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/call.ll b/ptx/src/test/spirv_run/call.ll index d89322e..684bb0c 100644 --- a/ptx/src/test/spirv_run/call.ll +++ b/ptx/src/test/spirv_run/call.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private i64 @incr(i64 %"29") #0 { -"49": %"18" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"42" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 store i64 %"29", ptr addrspace(5) %"18", align 8 + store i1 false, ptr addrspace(5) %"20", align 1 %"30" = load i64, ptr addrspace(5) %"18", align 8 store i64 %"30", ptr addrspace(5) %"43", align 8 %"31" = load i64, ptr addrspace(5) %"43", align 8 @@ -27,14 +29,16 @@ define private i64 @incr(i64 %"29") #0 { } define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { -"48": %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"19", align 1 %"21" = load i64, ptr addrspace(4) %"38", align 8 store i64 %"21", ptr addrspace(5) %"7", align 8 %"22" = load i64, ptr addrspace(4) %"39", align 8 diff --git a/ptx/src/test/spirv_run/call_bug.ll b/ptx/src/test/spirv_run/call_bug.ll index 3ad9146..12c8e2c 100644 --- a/ptx/src/test/spirv_run/call_bug.ll +++ b/ptx/src/test/spirv_run/call_bug.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private [2 x i32] @incr(i64 %"21") #0 { -"56": %"16" = alloca i64, align 8, addrspace(5) %"15" = alloca [2 x i32], align 4, addrspace(5) %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"42" = alloca [2 x i32], align 4, addrspace(5) %"43" = alloca i64, align 8, addrspace(5) %"4" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 store i64 %"21", ptr addrspace(5) %"16", align 8 + store i1 false, ptr addrspace(5) %"19", align 1 %"22" = load i64, ptr addrspace(5) %"16", align 8 store i64 %"22", ptr addrspace(5) %"43", align 8 %"23" = load i64, ptr addrspace(5) %"43", align 8 @@ -27,15 +29,17 @@ define private [2 x i32] @incr(i64 %"21") #0 { } define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { -"57": %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5) %"47" = alloca [2 x i32], align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"20", align 1 %"29" = load i64, ptr addrspace(4) %"44", align 8 store i64 %"29", ptr addrspace(5) %"8", align 8 %"30" = load i64, ptr addrspace(4) %"45", align 8 @@ -49,11 +53,11 @@ define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"11", align 8 %"17" = load i64, ptr addrspace(5) %"46", align 8 %"35" = load i64, ptr addrspace(5) %"11", align 8 - %0 = inttoptr i64 %"35" to ptr - %"18" = call [2 x i32] %0(i64 %"17") + %2 = inttoptr i64 %"35" to ptr + %"18" = call [2 x i32] %2(i64 %"17") store [2 x i32] %"18", ptr addrspace(5) %"47", align 4 - %"59" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0 - %"36" = load i64, ptr addrspace(5) %"59", align 8 + %"57" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0 + %"36" = load i64, ptr addrspace(5) %"57", align 8 store i64 %"36", ptr addrspace(5) %"10", align 8 %"37" = load i64, ptr addrspace(5) %"9", align 8 %"38" = load i64, ptr addrspace(5) %"10", align 8 diff --git a/ptx/src/test/spirv_run/call_multi_return.ll b/ptx/src/test/spirv_run/call_multi_return.ll index 35cc5e0..5cf701b 100644 --- a/ptx/src/test/spirv_run/call_multi_return.ll +++ b/ptx/src/test/spirv_run/call_multi_return.ll @@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa" %struct.i64i32 = type { i64, i32 } define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 { -"62": %"18" = alloca i32, align 4, addrspace(5) %"19" = alloca i32, align 4, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i32, align 4, addrspace(5) %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"20" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 store i32 %"39", ptr addrspace(5) %"18", align 4 store i32 %"40", ptr addrspace(5) %"19", align 4 + store i1 false, ptr addrspace(5) %"22", align 1 %"42" = load i32, ptr addrspace(5) %"18", align 4 %"43" = load i32, ptr addrspace(5) %"19", align 4 %"41" = add i32 %"42", %"43" @@ -27,15 +29,13 @@ define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 { store i32 %"46", ptr addrspace(5) %"17", align 4 %"49" = load i64, ptr addrspace(5) %"16", align 8 %"50" = load i32, ptr addrspace(5) %"17", align 4 - %0 = insertvalue %struct.i64i32 undef, i64 %"49", 0 - %1 = insertvalue %struct.i64i32 %0, i32 %"50", 1 - ret %struct.i64i32 %1 + %2 = insertvalue %struct.i64i32 undef, i64 %"49", 0 + %3 = insertvalue %struct.i64i32 %2, i32 %"50", 1 + ret %struct.i64i32 %3 } define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #0 { -"61": %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) @@ -43,6 +43,10 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6 %"13" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"21", align 1 %"23" = load i64, ptr addrspace(4) %"55", align 8 store i64 %"23", ptr addrspace(5) %"9", align 8 %"24" = load i64, ptr addrspace(4) %"56", align 8 @@ -53,14 +57,14 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6 store i32 %"25", ptr addrspace(5) %"11", align 4 %"28" = load i64, ptr addrspace(5) %"9", align 8 %"58" = inttoptr i64 %"28" to ptr addrspace(1) - %"64" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4 - %"27" = load i32, ptr addrspace(1) %"64", align 4 + %"62" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4 + %"27" = load i32, ptr addrspace(1) %"62", align 4 store i32 %"27", ptr addrspace(5) %"12", align 4 %"31" = load i32, ptr addrspace(5) %"11", align 4 %"32" = load i32, ptr addrspace(5) %"12", align 4 - %0 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32") - %"29" = extractvalue %struct.i64i32 %0, 0 - %"30" = extractvalue %struct.i64i32 %0, 1 + %2 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32") + %"29" = extractvalue %struct.i64i32 %2, 0 + %"30" = extractvalue %struct.i64i32 %2, 1 store i64 %"29", ptr addrspace(5) %"13", align 8 store i32 %"30", ptr addrspace(5) %"15", align 4 %"34" = load i32, ptr addrspace(5) %"15", align 4 @@ -73,8 +77,8 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6 %"37" = load i64, ptr addrspace(5) %"10", align 8 %"38" = load i64, ptr addrspace(5) %"14", align 8 %"60" = inttoptr i64 %"37" to ptr addrspace(1) - %"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8 - store i64 %"38", ptr addrspace(1) %"66", align 8 + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8 + store i64 %"38", ptr addrspace(1) %"64", align 8 ret void } diff --git a/ptx/src/test/spirv_run/callprototype.ll b/ptx/src/test/spirv_run/callprototype.ll index be431ea..9cba37c 100644 --- a/ptx/src/test/spirv_run/callprototype.ll +++ b/ptx/src/test/spirv_run/callprototype.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private i64 @incr(i64 %"33") #0 { -"54": %"20" = alloca i64, align 8, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"46" = alloca i64, align 8, addrspace(5) %"47" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 store i64 %"33", ptr addrspace(5) %"20", align 8 + store i1 false, ptr addrspace(5) %"22", align 1 %"34" = load i64, ptr addrspace(5) %"20", align 8 store i64 %"34", ptr addrspace(5) %"47", align 8 %"35" = load i64, ptr addrspace(5) %"47", align 8 @@ -27,15 +29,17 @@ define private i64 @incr(i64 %"33") #0 { } define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { -"53": %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"44" = alloca i64, align 8, addrspace(5) %"45" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"21", align 1 %"23" = load i64, ptr addrspace(4) %"42", align 8 store i64 %"23", ptr addrspace(5) %"7", align 8 %"24" = load i64, ptr addrspace(4) %"43", align 8 @@ -49,8 +53,8 @@ define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) % store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"10", align 8 %"17" = load i64, ptr addrspace(5) %"44", align 8 %"29" = load i64, ptr addrspace(5) %"10", align 8 - %0 = inttoptr i64 %"29" to ptr - %"18" = call i64 %0(i64 %"17") + %2 = inttoptr i64 %"29" to ptr + %"18" = call i64 %2(i64 %"17") store i64 %"18", ptr addrspace(5) %"45", align 8 %"30" = load i64, ptr addrspace(5) %"45", align 8 store i64 %"30", ptr addrspace(5) %"9", align 8 diff --git a/ptx/src/test/spirv_run/carry_set_all.ll b/ptx/src/test/spirv_run/carry_set_all.ll index 8b412c1..8983b70 100644 --- a/ptx/src/test/spirv_run/carry_set_all.ll +++ b/ptx/src/test/spirv_run/carry_set_all.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %"208", ptr addrspace(4) byref(i64) %"209") #0 { -"268": %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -23,147 +21,151 @@ define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) % %"19" = alloca i32, align 4, addrspace(5) %"20" = alloca i32, align 4, addrspace(5) %"21" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"22", align 1 %"37" = load i64, ptr addrspace(4) %"209", align 8 store i64 %"37", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) - %"210" = extractvalue { i32, i1 } %0, 0 - %"23" = extractvalue { i32, i1 } %0, 1 + %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) + %"210" = extractvalue { i32, i1 } %2, 0 + %"23" = extractvalue { i32, i1 } %2, 1 store i32 %"210", ptr addrspace(5) %"6", align 4 %"39" = xor i1 %"23", true store i1 %"39", ptr addrspace(5) %"22", align 1 %"41" = load i1, ptr addrspace(5) %"22", align 1 - %1 = zext i1 %"41" to i32 - %"211" = add i32 0, %1 + %3 = zext i1 %"41" to i32 + %"211" = add i32 0, %3 store i32 %"211", ptr addrspace(5) %"6", align 4 %"42" = load i1, ptr addrspace(5) %"22", align 1 %"24" = xor i1 %"42", true - %2 = zext i1 %"24" to i32 - %"212" = sub i32 0, %2 + %4 = zext i1 %"24" to i32 + %"212" = sub i32 0, %4 store i32 %"212", ptr addrspace(5) %"7", align 4 - %3 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"213" = extractvalue { i32, i1 } %3, 0 - %"25" = extractvalue { i32, i1 } %3, 1 + %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) + %"213" = extractvalue { i32, i1 } %5, 0 + %"25" = extractvalue { i32, i1 } %5, 1 store i32 %"213", ptr addrspace(5) %"8", align 4 %"45" = xor i1 %"25", true store i1 %"45", ptr addrspace(5) %"22", align 1 %"47" = load i1, ptr addrspace(5) %"22", align 1 - %4 = zext i1 %"47" to i32 - %"214" = add i32 0, %4 + %6 = zext i1 %"47" to i32 + %"214" = add i32 0, %6 store i32 %"214", ptr addrspace(5) %"8", align 4 %"48" = load i1, ptr addrspace(5) %"22", align 1 %"26" = xor i1 %"48", true - %5 = zext i1 %"26" to i32 - %"215" = sub i32 0, %5 + %7 = zext i1 %"26" to i32 + %"215" = sub i32 0, %7 store i32 %"215", ptr addrspace(5) %"9", align 4 - %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) - %"216" = extractvalue { i32, i1 } %6, 0 - %"51" = extractvalue { i32, i1 } %6, 1 + %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"216" = extractvalue { i32, i1 } %8, 0 + %"51" = extractvalue { i32, i1 } %8, 1 store i32 %"216", ptr addrspace(5) %"10", align 4 store i1 %"51", ptr addrspace(5) %"22", align 1 %"53" = load i1, ptr addrspace(5) %"22", align 1 - %7 = zext i1 %"53" to i32 - %"217" = add i32 0, %7 + %9 = zext i1 %"53" to i32 + %"217" = add i32 0, %9 store i32 %"217", ptr addrspace(5) %"10", align 4 %"54" = load i1, ptr addrspace(5) %"22", align 1 %"27" = xor i1 %"54", true - %8 = zext i1 %"27" to i32 - %"218" = sub i32 0, %8 + %10 = zext i1 %"27" to i32 + %"218" = sub i32 0, %10 store i32 %"218", ptr addrspace(5) %"11", align 4 - %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) - %"219" = extractvalue { i32, i1 } %9, 0 - %"57" = extractvalue { i32, i1 } %9, 1 + %11 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"219" = extractvalue { i32, i1 } %11, 0 + %"57" = extractvalue { i32, i1 } %11, 1 store i32 %"219", ptr addrspace(5) %"12", align 4 store i1 %"57", ptr addrspace(5) %"22", align 1 %"59" = load i1, ptr addrspace(5) %"22", align 1 - %10 = zext i1 %"59" to i32 - %"220" = add i32 0, %10 + %12 = zext i1 %"59" to i32 + %"220" = add i32 0, %12 store i32 %"220", ptr addrspace(5) %"12", align 4 %"60" = load i1, ptr addrspace(5) %"22", align 1 %"28" = xor i1 %"60", true - %11 = zext i1 %"28" to i32 - %"221" = sub i32 0, %11 + %13 = zext i1 %"28" to i32 + %"221" = sub i32 0, %13 store i32 %"221", ptr addrspace(5) %"13", align 4 - %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) - %"222" = extractvalue { i32, i1 } %12, 0 - %"63" = extractvalue { i32, i1 } %12, 1 + %14 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"222" = extractvalue { i32, i1 } %14, 0 + %"63" = extractvalue { i32, i1 } %14, 1 store i32 %"222", ptr addrspace(5) %"14", align 4 store i1 %"63", ptr addrspace(5) %"22", align 1 %"65" = load i1, ptr addrspace(5) %"22", align 1 - %13 = zext i1 %"65" to i32 - %"223" = add i32 0, %13 + %15 = zext i1 %"65" to i32 + %"223" = add i32 0, %15 store i32 %"223", ptr addrspace(5) %"14", align 4 %"66" = load i1, ptr addrspace(5) %"22", align 1 %"29" = xor i1 %"66", true - %14 = zext i1 %"29" to i32 - %"224" = sub i32 0, %14 + %16 = zext i1 %"29" to i32 + %"224" = sub i32 0, %16 store i32 %"224", ptr addrspace(5) %"15", align 4 - %15 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) - %"225" = extractvalue { i32, i1 } %15, 0 - %"69" = extractvalue { i32, i1 } %15, 1 + %17 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"225" = extractvalue { i32, i1 } %17, 0 + %"69" = extractvalue { i32, i1 } %17, 1 store i32 %"225", ptr addrspace(5) %"16", align 4 store i1 %"69", ptr addrspace(5) %"22", align 1 %"71" = load i1, ptr addrspace(5) %"22", align 1 - %16 = zext i1 %"71" to i32 - %"226" = add i32 0, %16 + %18 = zext i1 %"71" to i32 + %"226" = add i32 0, %18 store i32 %"226", ptr addrspace(5) %"16", align 4 %"72" = load i1, ptr addrspace(5) %"22", align 1 %"30" = xor i1 %"72", true - %17 = zext i1 %"30" to i32 - %"227" = sub i32 0, %17 + %19 = zext i1 %"30" to i32 + %"227" = sub i32 0, %19 store i32 %"227", ptr addrspace(5) %"17", align 4 - %18 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) - %"228" = extractvalue { i32, i1 } %18, 0 - %"75" = extractvalue { i32, i1 } %18, 1 + %20 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"228" = extractvalue { i32, i1 } %20, 0 + %"75" = extractvalue { i32, i1 } %20, 1 store i32 %"228", ptr addrspace(5) %"18", align 4 store i1 %"75", ptr addrspace(5) %"22", align 1 %"76" = load i1, ptr addrspace(5) %"22", align 1 %"31" = xor i1 %"76", true - %19 = zext i1 %"31" to i32 - %20 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) - %21 = extractvalue { i32, i1 } %20, 0 - %22 = extractvalue { i32, i1 } %20, 1 - %23 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %21, i32 %19) - %"229" = extractvalue { i32, i1 } %23, 0 - %24 = extractvalue { i32, i1 } %23, 1 - %"32" = xor i1 %22, %24 + %21 = zext i1 %"31" to i32 + %22 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) + %23 = extractvalue { i32, i1 } %22, 0 + %24 = extractvalue { i32, i1 } %22, 1 + %25 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %23, i32 %21) + %"229" = extractvalue { i32, i1 } %25, 0 + %26 = extractvalue { i32, i1 } %25, 1 + %"32" = xor i1 %24, %26 store i32 %"229", ptr addrspace(5) %"18", align 4 %"78" = xor i1 %"32", true store i1 %"78", ptr addrspace(5) %"22", align 1 %"80" = load i1, ptr addrspace(5) %"22", align 1 - %25 = zext i1 %"80" to i32 - %"230" = add i32 0, %25 + %27 = zext i1 %"80" to i32 + %"230" = add i32 0, %27 store i32 %"230", ptr addrspace(5) %"18", align 4 %"81" = load i1, ptr addrspace(5) %"22", align 1 %"33" = xor i1 %"81", true - %26 = zext i1 %"33" to i32 - %"231" = sub i32 0, %26 + %28 = zext i1 %"33" to i32 + %"231" = sub i32 0, %28 store i32 %"231", ptr addrspace(5) %"19", align 4 - %27 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) - %"232" = extractvalue { i32, i1 } %27, 0 - %"84" = extractvalue { i32, i1 } %27, 1 + %29 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"232" = extractvalue { i32, i1 } %29, 0 + %"84" = extractvalue { i32, i1 } %29, 1 store i32 %"232", ptr addrspace(5) %"20", align 4 store i1 %"84", ptr addrspace(5) %"22", align 1 %"85" = load i1, ptr addrspace(5) %"22", align 1 %"34" = xor i1 %"85", true - %28 = zext i1 %"34" to i32 - %29 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %30 = extractvalue { i32, i1 } %29, 0 - %31 = extractvalue { i32, i1 } %29, 1 - %32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %30, i32 %28) - %"233" = extractvalue { i32, i1 } %32, 0 - %33 = extractvalue { i32, i1 } %32, 1 - %"35" = xor i1 %31, %33 + %30 = zext i1 %"34" to i32 + %31 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) + %32 = extractvalue { i32, i1 } %31, 0 + %33 = extractvalue { i32, i1 } %31, 1 + %34 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %32, i32 %30) + %"233" = extractvalue { i32, i1 } %34, 0 + %35 = extractvalue { i32, i1 } %34, 1 + %"35" = xor i1 %33, %35 store i32 %"233", ptr addrspace(5) %"20", align 4 %"87" = xor i1 %"35", true store i1 %"87", ptr addrspace(5) %"22", align 1 %"89" = load i1, ptr addrspace(5) %"22", align 1 - %34 = zext i1 %"89" to i32 - %"234" = add i32 0, %34 + %36 = zext i1 %"89" to i32 + %"234" = add i32 0, %36 store i32 %"234", ptr addrspace(5) %"20", align 4 %"90" = load i1, ptr addrspace(5) %"22", align 1 %"36" = xor i1 %"90", true - %35 = zext i1 %"36" to i32 - %"235" = sub i32 0, %35 + %37 = zext i1 %"36" to i32 + %"235" = sub i32 0, %37 store i32 %"235", ptr addrspace(5) %"21", align 4 %"92" = load i64, ptr addrspace(5) %"5", align 8 %"93" = load i32, ptr addrspace(5) %"6", align 4 @@ -172,78 +174,78 @@ define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) % %"94" = load i64, ptr addrspace(5) %"5", align 8 %"95" = load i32, ptr addrspace(5) %"8", align 4 %"238" = inttoptr i64 %"94" to ptr - %"270" = getelementptr inbounds i8, ptr %"238", i64 4 - store i32 %"95", ptr %"270", align 4 + %"269" = getelementptr inbounds i8, ptr %"238", i64 4 + store i32 %"95", ptr %"269", align 4 %"96" = load i64, ptr addrspace(5) %"5", align 8 %"97" = load i32, ptr addrspace(5) %"10", align 4 %"240" = inttoptr i64 %"96" to ptr - %"272" = getelementptr inbounds i8, ptr %"240", i64 8 - store i32 %"97", ptr %"272", align 4 + %"271" = getelementptr inbounds i8, ptr %"240", i64 8 + store i32 %"97", ptr %"271", align 4 %"98" = load i64, ptr addrspace(5) %"5", align 8 %"99" = load i32, ptr addrspace(5) %"12", align 4 %"242" = inttoptr i64 %"98" to ptr - %"274" = getelementptr inbounds i8, ptr %"242", i64 12 - store i32 %"99", ptr %"274", align 4 + %"273" = getelementptr inbounds i8, ptr %"242", i64 12 + store i32 %"99", ptr %"273", align 4 %"100" = load i64, ptr addrspace(5) %"5", align 8 %"101" = load i32, ptr addrspace(5) %"14", align 4 %"244" = inttoptr i64 %"100" to ptr - %"276" = getelementptr inbounds i8, ptr %"244", i64 16 - store i32 %"101", ptr %"276", align 4 + %"275" = getelementptr inbounds i8, ptr %"244", i64 16 + store i32 %"101", ptr %"275", align 4 %"102" = load i64, ptr addrspace(5) %"5", align 8 %"103" = load i32, ptr addrspace(5) %"16", align 4 %"246" = inttoptr i64 %"102" to ptr - %"278" = getelementptr inbounds i8, ptr %"246", i64 20 - store i32 %"103", ptr %"278", align 4 + %"277" = getelementptr inbounds i8, ptr %"246", i64 20 + store i32 %"103", ptr %"277", align 4 %"104" = load i64, ptr addrspace(5) %"5", align 8 %"105" = load i32, ptr addrspace(5) %"18", align 4 %"248" = inttoptr i64 %"104" to ptr - %"280" = getelementptr inbounds i8, ptr %"248", i64 24 - store i32 %"105", ptr %"280", align 4 + %"279" = getelementptr inbounds i8, ptr %"248", i64 24 + store i32 %"105", ptr %"279", align 4 %"106" = load i64, ptr addrspace(5) %"5", align 8 %"107" = load i32, ptr addrspace(5) %"20", align 4 %"250" = inttoptr i64 %"106" to ptr - %"282" = getelementptr inbounds i8, ptr %"250", i64 28 - store i32 %"107", ptr %"282", align 4 + %"281" = getelementptr inbounds i8, ptr %"250", i64 28 + store i32 %"107", ptr %"281", align 4 %"108" = load i64, ptr addrspace(5) %"5", align 8 %"109" = load i32, ptr addrspace(5) %"7", align 4 %"252" = inttoptr i64 %"108" to ptr - %"284" = getelementptr inbounds i8, ptr %"252", i64 32 - store i32 %"109", ptr %"284", align 4 + %"283" = getelementptr inbounds i8, ptr %"252", i64 32 + store i32 %"109", ptr %"283", align 4 %"110" = load i64, ptr addrspace(5) %"5", align 8 %"111" = load i32, ptr addrspace(5) %"9", align 4 %"254" = inttoptr i64 %"110" to ptr - %"286" = getelementptr inbounds i8, ptr %"254", i64 36 - store i32 %"111", ptr %"286", align 4 + %"285" = getelementptr inbounds i8, ptr %"254", i64 36 + store i32 %"111", ptr %"285", align 4 %"112" = load i64, ptr addrspace(5) %"5", align 8 %"113" = load i32, ptr addrspace(5) %"11", align 4 %"256" = inttoptr i64 %"112" to ptr - %"288" = getelementptr inbounds i8, ptr %"256", i64 40 - store i32 %"113", ptr %"288", align 4 + %"287" = getelementptr inbounds i8, ptr %"256", i64 40 + store i32 %"113", ptr %"287", align 4 %"114" = load i64, ptr addrspace(5) %"5", align 8 %"115" = load i32, ptr addrspace(5) %"13", align 4 %"258" = inttoptr i64 %"114" to ptr - %"290" = getelementptr inbounds i8, ptr %"258", i64 44 - store i32 %"115", ptr %"290", align 4 + %"289" = getelementptr inbounds i8, ptr %"258", i64 44 + store i32 %"115", ptr %"289", align 4 %"116" = load i64, ptr addrspace(5) %"5", align 8 %"117" = load i32, ptr addrspace(5) %"15", align 4 %"260" = inttoptr i64 %"116" to ptr - %"292" = getelementptr inbounds i8, ptr %"260", i64 48 - store i32 %"117", ptr %"292", align 4 + %"291" = getelementptr inbounds i8, ptr %"260", i64 48 + store i32 %"117", ptr %"291", align 4 %"118" = load i64, ptr addrspace(5) %"5", align 8 %"119" = load i32, ptr addrspace(5) %"17", align 4 %"262" = inttoptr i64 %"118" to ptr - %"294" = getelementptr inbounds i8, ptr %"262", i64 52 - store i32 %"119", ptr %"294", align 4 + %"293" = getelementptr inbounds i8, ptr %"262", i64 52 + store i32 %"119", ptr %"293", align 4 %"120" = load i64, ptr addrspace(5) %"5", align 8 %"121" = load i32, ptr addrspace(5) %"19", align 4 %"264" = inttoptr i64 %"120" to ptr - %"296" = getelementptr inbounds i8, ptr %"264", i64 56 - store i32 %"121", ptr %"296", align 4 + %"295" = getelementptr inbounds i8, ptr %"264", i64 56 + store i32 %"121", ptr %"295", align 4 %"122" = load i64, ptr addrspace(5) %"5", align 8 %"123" = load i32, ptr addrspace(5) %"21", align 4 %"266" = inttoptr i64 %"122" to ptr - %"298" = getelementptr inbounds i8, ptr %"266", i64 60 - store i32 %"123", ptr %"298", align 4 + %"297" = getelementptr inbounds i8, ptr %"266", i64 60 + store i32 %"123", ptr %"297", align 4 ret void } diff --git a/ptx/src/test/spirv_run/clz.ll b/ptx/src/test/spirv_run/clz.ll index 31f408d..5a93145 100644 --- a/ptx/src/test/spirv_run/clz.ll +++ b/ptx/src/test/spirv_run/clz.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 @@ -17,8 +19,8 @@ define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr %"10" = load i32, ptr %"18", align 4 store i32 %"10", ptr addrspace(5) %"6", align 4 %"13" = load i32, ptr addrspace(5) %"6", align 4 - %0 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false) - store i32 %0, ptr addrspace(5) %"6", align 4 + %2 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false) + store i32 %2, ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"5", align 8 %"15" = load i32, ptr addrspace(5) %"6", align 4 %"19" = inttoptr i64 %"14" to ptr diff --git a/ptx/src/test/spirv_run/const.ll b/ptx/src/test/spirv_run/const.ll index 80fcc07..df0de94 100644 --- a/ptx/src/test/spirv_run/const.ll +++ b/ptx/src/test/spirv_run/const.ll @@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa" @constparams = protected addrspace(4) externally_initialized global [4 x i16] [i16 10, i16 20, i16 30, i16 40], align 8 define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { -"52": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) %"8" = alloca i16, align 2, addrspace(5) %"9" = alloca i16, align 2, addrspace(5) %"10" = alloca i16, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"38", align 8 store i64 %"12", ptr addrspace(5) %"5", align 8 %"13" = load i64, ptr addrspace(4) %"39", align 8 @@ -32,18 +34,18 @@ define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", pt %"20" = load i64, ptr addrspace(5) %"6", align 8 %"21" = load i16, ptr addrspace(5) %"8", align 2 %"46" = inttoptr i64 %"20" to ptr - %"60" = getelementptr inbounds i8, ptr %"46", i64 2 - store i16 %"21", ptr %"60", align 2 + %"59" = getelementptr inbounds i8, ptr %"46", i64 2 + store i16 %"21", ptr %"59", align 2 %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load i16, ptr addrspace(5) %"9", align 2 %"48" = inttoptr i64 %"22" to ptr - %"62" = getelementptr inbounds i8, ptr %"48", i64 4 - store i16 %"23", ptr %"62", align 2 + %"61" = getelementptr inbounds i8, ptr %"48", i64 4 + store i16 %"23", ptr %"61", align 2 %"24" = load i64, ptr addrspace(5) %"6", align 8 %"25" = load i16, ptr addrspace(5) %"10", align 2 %"50" = inttoptr i64 %"24" to ptr - %"64" = getelementptr inbounds i8, ptr %"50", i64 6 - store i16 %"25", ptr %"64", align 2 + %"63" = getelementptr inbounds i8, ptr %"50", i64 6 + store i16 %"25", ptr %"63", align 2 ret void } diff --git a/ptx/src/test/spirv_run/constant_f32.ll b/ptx/src/test/spirv_run/constant_f32.ll index e0309ea..a6558c9 100644 --- a/ptx/src/test/spirv_run/constant_f32.ll +++ b/ptx/src/test/spirv_run/constant_f32.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/constant_negative.ll b/ptx/src/test/spirv_run/constant_negative.ll index 337689f..c3e7e86 100644 --- a/ptx/src/test/spirv_run/constant_negative.ll +++ b/ptx/src/test/spirv_run/constant_negative.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cos.ll b/ptx/src/test/spirv_run/cos.ll index d385e1f..da48297 100644 --- a/ptx/src/test/spirv_run/cos.ll +++ b/ptx/src/test/spirv_run/cos.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/cvt_clamp.ll b/ptx/src/test/spirv_run/cvt_clamp.ll index f2be477..b610ca9 100644 --- a/ptx/src/test/spirv_run/cvt_clamp.ll +++ b/ptx/src/test/spirv_run/cvt_clamp.ll @@ -4,12 +4,14 @@ target triple = "amdgcn-amd-amdhsa" declare float @__zluda_ptx_impl__cvt_sat_f32_f32(float) #0 define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 { -"56": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"46", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"47", align 8 @@ -27,8 +29,8 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46" store float %"15", ptr addrspace(1) %"49", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"50" = inttoptr i64 %"17" to ptr addrspace(1) - %"61" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4 - %"16" = load float, ptr addrspace(1) %"61", align 4 + %"60" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4 + %"16" = load float, ptr addrspace(1) %"60", align 4 store float %"16", ptr addrspace(5) %"6", align 4 %"19" = load float, ptr addrspace(5) %"6", align 4 %"18" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"19") @@ -36,12 +38,12 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46" %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load float, ptr addrspace(5) %"6", align 4 %"51" = inttoptr i64 %"20" to ptr addrspace(1) - %"63" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4 - store float %"21", ptr addrspace(1) %"63", align 4 + %"62" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4 + store float %"21", ptr addrspace(1) %"62", align 4 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"52" = inttoptr i64 %"23" to ptr addrspace(1) - %"65" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8 - %"22" = load float, ptr addrspace(1) %"65", align 4 + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8 + %"22" = load float, ptr addrspace(1) %"64", align 4 store float %"22", ptr addrspace(5) %"6", align 4 %"25" = load float, ptr addrspace(5) %"6", align 4 %"24" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"25") @@ -49,12 +51,12 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46" %"26" = load i64, ptr addrspace(5) %"5", align 8 %"27" = load float, ptr addrspace(5) %"6", align 4 %"53" = inttoptr i64 %"26" to ptr addrspace(1) - %"67" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8 - store float %"27", ptr addrspace(1) %"67", align 4 + %"66" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8 + store float %"27", ptr addrspace(1) %"66", align 4 %"29" = load i64, ptr addrspace(5) %"4", align 8 %"54" = inttoptr i64 %"29" to ptr addrspace(1) - %"69" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12 - %"28" = load float, ptr addrspace(1) %"69", align 4 + %"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12 + %"28" = load float, ptr addrspace(1) %"68", align 4 store float %"28", ptr addrspace(5) %"6", align 4 %"31" = load float, ptr addrspace(5) %"6", align 4 %"30" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"31") @@ -62,8 +64,8 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46" %"32" = load i64, ptr addrspace(5) %"5", align 8 %"33" = load float, ptr addrspace(5) %"6", align 4 %"55" = inttoptr i64 %"32" to ptr addrspace(1) - %"71" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12 - store float %"33", ptr addrspace(1) %"71", align 4 + %"70" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12 + store float %"33", ptr addrspace(1) %"70", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f32_f16.ll b/ptx/src/test/spirv_run/cvt_f32_f16.ll index e3acdb6..7379876 100644 --- a/ptx/src/test/spirv_run/cvt_f32_f16.ll +++ b/ptx/src/test/spirv_run/cvt_f32_f16.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca half, align 2, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cvt_f32_s32.ll b/ptx/src/test/spirv_run/cvt_f32_s32.ll index 65b00ce..90b0e4a 100644 --- a/ptx/src/test/spirv_run/cvt_f32_s32.ll +++ b/ptx/src/test/spirv_run/cvt_f32_s32.ll @@ -10,15 +10,17 @@ declare float @__zluda_ptx_impl__cvt_rp_f32_s32(i32) #0 declare float @__zluda_ptx_impl__cvt_rz_f32_s32(i32) #0 define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #1 { -"75": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"49", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"50", align 8 @@ -29,18 +31,18 @@ define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"4 store i32 %"51", ptr addrspace(5) %"6", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"53" = inttoptr i64 %"16" to ptr - %"89" = getelementptr inbounds i8, ptr %"53", i64 4 - %"54" = load i32, ptr %"89", align 4 + %"88" = getelementptr inbounds i8, ptr %"53", i64 4 + %"54" = load i32, ptr %"88", align 4 store i32 %"54", ptr addrspace(5) %"7", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"55" = inttoptr i64 %"18" to ptr - %"91" = getelementptr inbounds i8, ptr %"55", i64 8 - %"56" = load i32, ptr %"91", align 4 + %"90" = getelementptr inbounds i8, ptr %"55", i64 8 + %"56" = load i32, ptr %"90", align 4 store i32 %"56", ptr addrspace(5) %"8", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"57" = inttoptr i64 %"20" to ptr - %"93" = getelementptr inbounds i8, ptr %"57", i64 12 - %"58" = load i32, ptr %"93", align 4 + %"92" = getelementptr inbounds i8, ptr %"57", i64 12 + %"58" = load i32, ptr %"92", align 4 store i32 %"58", ptr addrspace(5) %"9", align 4 %"22" = load i32, ptr addrspace(5) %"6", align 4 %"59" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"22") @@ -66,21 +68,21 @@ define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"4 %"31" = load i64, ptr addrspace(5) %"5", align 8 %"32" = load i32, ptr addrspace(5) %"7", align 4 %"69" = inttoptr i64 %"31" to ptr addrspace(1) - %"95" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4 + %"94" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4 %"70" = bitcast i32 %"32" to float - store float %"70", ptr addrspace(1) %"95", align 4 + store float %"70", ptr addrspace(1) %"94", align 4 %"33" = load i64, ptr addrspace(5) %"5", align 8 %"34" = load i32, ptr addrspace(5) %"8", align 4 %"71" = inttoptr i64 %"33" to ptr addrspace(1) - %"97" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8 + %"96" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8 %"72" = bitcast i32 %"34" to float - store float %"72", ptr addrspace(1) %"97", align 4 + store float %"72", ptr addrspace(1) %"96", align 4 %"35" = load i64, ptr addrspace(5) %"5", align 8 %"36" = load i32, ptr addrspace(5) %"9", align 4 %"73" = inttoptr i64 %"35" to ptr addrspace(1) - %"99" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12 + %"98" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12 %"74" = bitcast i32 %"36" to float - store float %"74", ptr addrspace(1) %"99", align 4 + store float %"74", ptr addrspace(1) %"98", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f64_f32.ll b/ptx/src/test/spirv_run/cvt_f64_f32.ll index 96267f4..64b4bb8 100644 --- a/ptx/src/test/spirv_run/cvt_f64_f32.ll +++ b/ptx/src/test/spirv_run/cvt_f64_f32.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca double, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cvt_rni.ll b/ptx/src/test/spirv_run/cvt_rni.ll index 5eb6eaa..77d2999 100644 --- a/ptx/src/test/spirv_run/cvt_rni.ll +++ b/ptx/src/test/spirv_run/cvt_rni.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"33": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"30" = inttoptr i64 %"14" to ptr - %"35" = getelementptr inbounds i8, ptr %"30", i64 4 - %"13" = load float, ptr %"35", align 4 + %"34" = getelementptr inbounds i8, ptr %"30", i64 4 + %"13" = load float, ptr %"34", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"15" = call float @llvm.rint.f32(float %"16") @@ -35,8 +37,8 @@ define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", %"21" = load i64, ptr addrspace(5) %"5", align 8 %"22" = load float, ptr addrspace(5) %"7", align 4 %"32" = inttoptr i64 %"21" to ptr - %"37" = getelementptr inbounds i8, ptr %"32", i64 4 - store float %"22", ptr %"37", align 4 + %"36" = getelementptr inbounds i8, ptr %"32", i64 4 + store float %"22", ptr %"36", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_rzi.ll b/ptx/src/test/spirv_run/cvt_rzi.ll index 83783d8..e651db5 100644 --- a/ptx/src/test/spirv_run/cvt_rzi.ll +++ b/ptx/src/test/spirv_run/cvt_rzi.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"33": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"30" = inttoptr i64 %"14" to ptr - %"35" = getelementptr inbounds i8, ptr %"30", i64 4 - %"13" = load float, ptr %"35", align 4 + %"34" = getelementptr inbounds i8, ptr %"30", i64 4 + %"13" = load float, ptr %"34", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"15" = call float @llvm.trunc.f32(float %"16") @@ -35,8 +37,8 @@ define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", %"21" = load i64, ptr addrspace(5) %"5", align 8 %"22" = load float, ptr addrspace(5) %"7", align 4 %"32" = inttoptr i64 %"21" to ptr - %"37" = getelementptr inbounds i8, ptr %"32", i64 4 - store float %"22", ptr %"37", align 4 + %"36" = getelementptr inbounds i8, ptr %"32", i64 4 + store float %"22", ptr %"36", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s16_s8.ll b/ptx/src/test/spirv_run/cvt_s16_s8.ll index 841178e..6f49cea 100644 --- a/ptx/src/test/spirv_run/cvt_s16_s8.ll +++ b/ptx/src/test/spirv_run/cvt_s16_s8.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 @@ -18,8 +20,8 @@ define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17 %"11" = load i32, ptr addrspace(1) %"19", align 4 store i32 %"11", ptr addrspace(5) %"7", align 4 %"14" = load i32, ptr addrspace(5) %"7", align 4 - %"25" = trunc i32 %"14" to i8 - %"20" = sext i8 %"25" to i16 + %"24" = trunc i32 %"14" to i8 + %"20" = sext i8 %"24" to i16 %"13" = sext i16 %"20" to i32 store i32 %"13", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 diff --git a/ptx/src/test/spirv_run/cvt_s32_f32.ll b/ptx/src/test/spirv_run/cvt_s32_f32.ll index bd1b9e3..e8b8bc1 100644 --- a/ptx/src/test/spirv_run/cvt_s32_f32.ll +++ b/ptx/src/test/spirv_run/cvt_s32_f32.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float) #0 define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 { -"41": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 @@ -22,8 +24,8 @@ define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"2 store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"14" to ptr - %"46" = getelementptr inbounds i8, ptr %"31", i64 4 - %"32" = load float, ptr %"46", align 4 + %"45" = getelementptr inbounds i8, ptr %"31", i64 4 + %"32" = load float, ptr %"45", align 4 %"13" = bitcast float %"32" to i32 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 @@ -41,8 +43,8 @@ define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"2 %"21" = load i64, ptr addrspace(5) %"5", align 8 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"39" = inttoptr i64 %"21" to ptr addrspace(1) - %"48" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4 - store i32 %"22", ptr addrspace(1) %"48", align 4 + %"47" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4 + store i32 %"22", ptr addrspace(1) %"47", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s64_s32.ll b/ptx/src/test/spirv_run/cvt_s64_s32.ll index 4958266..799b90a 100644 --- a/ptx/src/test/spirv_run/cvt_s64_s32.ll +++ b/ptx/src/test/spirv_run/cvt_s64_s32.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cvt_sat_s_u.ll b/ptx/src/test/spirv_run/cvt_sat_s_u.ll index 3af6ef5..5e8d015 100644 --- a/ptx/src/test/spirv_run/cvt_sat_s_u.ll +++ b/ptx/src/test/spirv_run/cvt_sat_s_u.ll @@ -2,14 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { -"34": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + %2 = alloca i32, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + br label %4 + +4: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"26", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"27", align 8 @@ -19,18 +24,15 @@ define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"2 %"12" = load i32, ptr %"28", align 4 store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i32, ptr addrspace(5) %"6", align 4 - %0 = call i32 @llvm.smax.i32(i32 %"15", i32 0) - %1 = alloca i32, align 4, addrspace(5) - store i32 %0, ptr addrspace(5) %1, align 4 + %5 = call i32 @llvm.smax.i32(i32 %"15", i32 0) + store i32 %5, ptr addrspace(5) %1, align 4 %"14" = load i32, ptr addrspace(5) %1, align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 - %2 = alloca i32, align 4, addrspace(5) store i32 %"17", ptr addrspace(5) %2, align 4 %"29" = load i32, ptr addrspace(5) %2, align 4 store i32 %"29", ptr addrspace(5) %"7", align 4 %"19" = load i32, ptr addrspace(5) %"6", align 4 - %3 = alloca i32, align 4, addrspace(5) store i32 %"19", ptr addrspace(5) %3, align 4 %"30" = load i32, ptr addrspace(5) %3, align 4 store i32 %"30", ptr addrspace(5) %"8", align 4 @@ -41,8 +43,8 @@ define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"2 %"22" = load i64, ptr addrspace(5) %"5", align 8 %"23" = load i32, ptr addrspace(5) %"8", align 4 %"33" = inttoptr i64 %"22" to ptr - %"36" = getelementptr inbounds i8, ptr %"33", i64 4 - store i32 %"23", ptr %"36", align 4 + %"35" = getelementptr inbounds i8, ptr %"33", i64 4 + store i32 %"23", ptr %"35", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_u32_s16.ll b/ptx/src/test/spirv_run/cvt_u32_s16.ll index 141f83f..1b868a5 100644 --- a/ptx/src/test/spirv_run/cvt_u32_s16.ll +++ b/ptx/src/test/spirv_run/cvt_u32_s16.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cvta.ll b/ptx/src/test/spirv_run/cvta.ll index d5c0f73..7b73f8c 100644 --- a/ptx/src/test/spirv_run/cvta.ll +++ b/ptx/src/test/spirv_run/cvta.ll @@ -2,25 +2,27 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"26": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"19", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(5) %"4", align 8 - %0 = inttoptr i64 %"11" to ptr - %1 = addrspacecast ptr %0 to ptr addrspace(1) - %"20" = ptrtoint ptr addrspace(1) %1 to i64 + %2 = inttoptr i64 %"11" to ptr + %3 = addrspacecast ptr %2 to ptr addrspace(1) + %"20" = ptrtoint ptr addrspace(1) %3 to i64 store i64 %"20", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(5) %"5", align 8 - %2 = inttoptr i64 %"13" to ptr - %3 = addrspacecast ptr %2 to ptr addrspace(1) - %"22" = ptrtoint ptr addrspace(1) %3 to i64 + %4 = inttoptr i64 %"13" to ptr + %5 = addrspacecast ptr %4 to ptr addrspace(1) + %"22" = ptrtoint ptr addrspace(1) %5 to i64 store i64 %"22", ptr addrspace(5) %"5", align 8 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"24" = inttoptr i64 %"15" to ptr addrspace(1) diff --git a/ptx/src/test/spirv_run/div_approx.ll b/ptx/src/test/spirv_run/div_approx.ll index 833065e..d4b889f 100644 --- a/ptx/src/test/spirv_run/div_approx.ll +++ b/ptx/src/test/spirv_run/div_approx.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22 store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load float, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"28", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"17" = load float, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/dp4a.ll b/ptx/src/test/spirv_run/dp4a.ll index 2ada6cb..97f4098 100644 --- a/ptx/src/test/spirv_run/dp4a.ll +++ b/ptx/src/test/spirv_run/dp4a.ll @@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__dp4a_s32_s32(i32, i32, i32) #0 define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { -"38": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -22,13 +24,13 @@ define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"15" to ptr - %"45" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load i32, ptr %"45", align 4 + %"44" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"44", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"32" = inttoptr i64 %"17" to ptr - %"47" = getelementptr inbounds i8, ptr %"32", i64 8 - %"16" = load i32, ptr %"47", align 4 + %"46" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load i32, ptr %"46", align 4 store i32 %"16", ptr addrspace(5) %"8", align 4 %"19" = load i32, ptr addrspace(5) %"6", align 4 %"20" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/ex2.ll b/ptx/src/test/spirv_run/ex2.ll index b5e671e..aa0c1d5 100644 --- a/ptx/src/test/spirv_run/ex2.ll +++ b/ptx/src/test/spirv_run/ex2.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"56": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"46", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"47", align 8 @@ -25,8 +27,8 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr store float %"15", ptr %"49", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"50" = inttoptr i64 %"17" to ptr - %"58" = getelementptr inbounds i8, ptr %"50", i64 4 - %"16" = load float, ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"50", i64 4 + %"16" = load float, ptr %"57", align 4 store float %"16", ptr addrspace(5) %"6", align 4 %"19" = load float, ptr addrspace(5) %"6", align 4 %"18" = call afn float @llvm.exp2.f32(float %"19") @@ -34,12 +36,12 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load float, ptr addrspace(5) %"6", align 4 %"51" = inttoptr i64 %"20" to ptr - %"60" = getelementptr inbounds i8, ptr %"51", i64 4 - store float %"21", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"51", i64 4 + store float %"21", ptr %"59", align 4 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"52" = inttoptr i64 %"23" to ptr - %"62" = getelementptr inbounds i8, ptr %"52", i64 8 - %"22" = load float, ptr %"62", align 4 + %"61" = getelementptr inbounds i8, ptr %"52", i64 8 + %"22" = load float, ptr %"61", align 4 store float %"22", ptr addrspace(5) %"6", align 4 %"25" = load float, ptr addrspace(5) %"6", align 4 %"24" = call afn float @llvm.exp2.f32(float %"25") @@ -47,12 +49,12 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr %"26" = load i64, ptr addrspace(5) %"5", align 8 %"27" = load float, ptr addrspace(5) %"6", align 4 %"53" = inttoptr i64 %"26" to ptr - %"64" = getelementptr inbounds i8, ptr %"53", i64 8 - store float %"27", ptr %"64", align 4 + %"63" = getelementptr inbounds i8, ptr %"53", i64 8 + store float %"27", ptr %"63", align 4 %"29" = load i64, ptr addrspace(5) %"4", align 8 %"54" = inttoptr i64 %"29" to ptr - %"66" = getelementptr inbounds i8, ptr %"54", i64 12 - %"28" = load float, ptr %"66", align 4 + %"65" = getelementptr inbounds i8, ptr %"54", i64 12 + %"28" = load float, ptr %"65", align 4 store float %"28", ptr addrspace(5) %"6", align 4 %"31" = load float, ptr addrspace(5) %"6", align 4 %"30" = call afn float @llvm.exp2.f32(float %"31") @@ -60,8 +62,8 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr %"32" = load i64, ptr addrspace(5) %"5", align 8 %"33" = load float, ptr addrspace(5) %"6", align 4 %"55" = inttoptr i64 %"32" to ptr - %"68" = getelementptr inbounds i8, ptr %"55", i64 12 - store float %"33", ptr %"68", align 4 + %"67" = getelementptr inbounds i8, ptr %"55", i64 12 + store float %"33", ptr %"67", align 4 ret void } diff --git a/ptx/src/test/spirv_run/extern_shared.ll b/ptx/src/test/spirv_run/extern_shared.ll index eeb0d50..e7d0a21 100644 --- a/ptx/src/test/spirv_run/extern_shared.ll +++ b/ptx/src/test/spirv_run/extern_shared.ll @@ -4,12 +4,14 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i32] define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/extern_shared_call.ll b/ptx/src/test/spirv_run/extern_shared_call.ll index cdd37be..a2b6c10 100644 --- a/ptx/src/test/spirv_run/extern_shared_call.ll +++ b/ptx/src/test/spirv_run/extern_shared_call.ll @@ -3,28 +3,32 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i32], align 4 -define private void @"2"(ptr addrspace(3) %"35") #0 { -"33": +define private void @"2"(ptr addrspace(3) %"33") #0 { %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"3" = alloca i64, align 8, addrspace(5) - %"12" = load i64, ptr addrspace(3) %"35", align 8 + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 + %"12" = load i64, ptr addrspace(3) %"33", align 8 store i64 %"12", ptr addrspace(5) %"3", align 8 %"14" = load i64, ptr addrspace(5) %"3", align 8 %"13" = add i64 %"14", 2 store i64 %"13", ptr addrspace(5) %"3", align 8 %"15" = load i64, ptr addrspace(5) %"3", align 8 - store i64 %"15", ptr addrspace(3) %"35", align 8 + store i64 %"15", ptr addrspace(3) %"33", align 8 ret void } define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"34": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"16" = load i64, ptr addrspace(4) %"25", align 8 store i64 %"16", ptr addrspace(5) %"7", align 8 %"17" = load i64, ptr addrspace(4) %"26", align 8 diff --git a/ptx/src/test/spirv_run/fma.ll b/ptx/src/test/spirv_run/fma.ll index 1dff2b8..61ef775 100644 --- a/ptx/src/test/spirv_run/fma.ll +++ b/ptx/src/test/spirv_run/fma.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"34": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -20,13 +22,13 @@ define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr store float %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"15" to ptr - %"36" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load float, ptr %"36", align 4 + %"35" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load float, ptr %"35", align 4 store float %"14", ptr addrspace(5) %"7", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"32" = inttoptr i64 %"17" to ptr - %"38" = getelementptr inbounds i8, ptr %"32", i64 8 - %"16" = load float, ptr %"38", align 4 + %"37" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load float, ptr %"37", align 4 store float %"16", ptr addrspace(5) %"8", align 4 %"19" = load float, ptr addrspace(5) %"6", align 4 %"20" = load float, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/func_ptr.ll b/ptx/src/test/spirv_run/func_ptr.ll index 1160a76..ad4392b 100644 --- a/ptx/src/test/spirv_run/func_ptr.ll +++ b/ptx/src/test/spirv_run/func_ptr.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private float @"1"(float %"15", float %"16") #0 { -"38": %"3" = alloca float, align 4, addrspace(5) %"4" = alloca float, align 4, addrspace(5) %"2" = alloca float, align 4, addrspace(5) %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 + br label %1 + +1: ; preds = %0 store float %"15", ptr addrspace(5) %"3", align 4 store float %"16", ptr addrspace(5) %"4", align 4 + store i1 false, ptr addrspace(5) %"13", align 1 %"18" = load float, ptr addrspace(5) %"3", align 4 %"19" = load float, ptr addrspace(5) %"4", align 4 %"17" = fadd float %"18", %"19" @@ -19,14 +21,16 @@ define private float @"1"(float %"15", float %"16") #0 { } define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"39": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"21" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"21", ptr addrspace(5) %"8", align 8 %"22" = load i64, ptr addrspace(4) %"35", align 8 diff --git a/ptx/src/test/spirv_run/generic.ll b/ptx/src/test/spirv_run/generic.ll index 312a7cd..44b4ef9 100644 --- a/ptx/src/test/spirv_run/generic.ll +++ b/ptx/src/test/spirv_run/generic.ll @@ -5,18 +5,20 @@ target triple = "amdgcn-amd-amdhsa" @bar = protected addrspace(1) externally_initialized global [4 x i64] [i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 4), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 8), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 12)] define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"57": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"47", align 8 store i64 %"11", ptr addrspace(5) %"7", align 8 - %0 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %0, align 4 - %"12" = load i32, ptr addrspace(5) %0, align 4 + store i32 1, ptr addrspace(5) %1, align 4 + %"12" = load i32, ptr addrspace(5) %1, align 4 store i32 %"12", ptr addrspace(5) %"8", align 4 %"13" = load i64, ptr addrspace(1) @bar, align 8 store i64 %"13", ptr addrspace(5) %"6", align 8 diff --git a/ptx/src/test/spirv_run/global_array.ll b/ptx/src/test/spirv_run/global_array.ll index e2ad2f2..59a66ea 100644 --- a/ptx/src/test/spirv_run/global_array.ll +++ b/ptx/src/test/spirv_run/global_array.ll @@ -5,15 +5,17 @@ target triple = "amdgcn-amd-amdhsa" @foobar = protected addrspace(1) externally_initialized global [4 x [2 x i64]] [[2 x i64] [i64 -1, i64 2], [2 x i64] [i64 3, i64 0], [2 x i64] [i64 ptrtoint (ptr addrspace(1) @asdas to i64), i64 0], [2 x i64] zeroinitializer] define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"21": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) - %0 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %0, align 8 - %"10" = load i64, ptr addrspace(5) %0, align 8 + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 + store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %1, align 8 + %"10" = load i64, ptr addrspace(5) %1, align 8 store i64 %"10", ptr addrspace(5) %"6", align 8 %"11" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"11", ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/lanemask_lt.ll b/ptx/src/test/spirv_run/lanemask_lt.ll index efa1746..cc81383 100644 --- a/ptx/src/test/spirv_run/lanemask_lt.ll +++ b/ptx/src/test/spirv_run/lanemask_lt.ll @@ -4,14 +4,17 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__sreg_lanemask_lt() #0 define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 { -"39": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"14" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"28", align 8 @@ -24,9 +27,8 @@ define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"2 %"31" = add i32 %"19", 1 store i32 %"31", ptr addrspace(5) %"7", align 4 %"11" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt() - %0 = alloca i32, align 4, addrspace(5) - store i32 %"11", ptr addrspace(5) %0, align 4 - %"33" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"11", ptr addrspace(5) %1, align 4 + %"33" = load i32, ptr addrspace(5) %1, align 4 store i32 %"33", ptr addrspace(5) %"8", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"23" = load i32, ptr addrspace(5) %"8", align 4 diff --git a/ptx/src/test/spirv_run/ld_st.ll b/ptx/src/test/spirv_run/ld_st.ll index 0fe06f2..4b23120 100644 --- a/ptx/src/test/spirv_run/ld_st.ll +++ b/ptx/src/test/spirv_run/ld_st.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { -"18": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"14", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 diff --git a/ptx/src/test/spirv_run/ld_st_implicit.ll b/ptx/src/test/spirv_run/ld_st_implicit.ll index 3ec1474..71baa92 100644 --- a/ptx/src/test/spirv_run/ld_st_implicit.ll +++ b/ptx/src/test/spirv_run/ld_st_implicit.ll @@ -2,31 +2,33 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"22": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 81985529216486895, ptr addrspace(5) %0, align 8 - %"10" = load i64, ptr addrspace(5) %0, align 8 + store i64 81985529216486895, ptr addrspace(5) %1, align 8 + %"10" = load i64, ptr addrspace(5) %1, align 8 store i64 %"10", ptr addrspace(5) %"6", align 8 %"12" = load i64, ptr addrspace(5) %"4", align 8 %"19" = inttoptr i64 %"12" to ptr addrspace(1) %"18" = load float, ptr addrspace(1) %"19", align 4 - %"23" = bitcast float %"18" to i32 - %"11" = zext i32 %"23" to i64 + %"22" = bitcast float %"18" to i32 + %"11" = zext i32 %"22" to i64 store i64 %"11", ptr addrspace(5) %"6", align 8 %"13" = load i64, ptr addrspace(5) %"5", align 8 %"14" = load i64, ptr addrspace(5) %"6", align 8 %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"25" = trunc i64 %"14" to i32 - %"21" = bitcast i32 %"25" to float + %"24" = trunc i64 %"14" to i32 + %"21" = bitcast i32 %"24" to float store float %"21", ptr addrspace(1) %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/ld_st_offset.ll b/ptx/src/test/spirv_run/ld_st_offset.ll index ee8bde6..959aa53 100644 --- a/ptx/src/test/spirv_run/ld_st_offset.ll +++ b/ptx/src/test/spirv_run/ld_st_offset.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"29": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %" store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"14" to ptr - %"31" = getelementptr inbounds i8, ptr %"26", i64 4 - %"13" = load i32, ptr %"31", align 4 + %"30" = getelementptr inbounds i8, ptr %"26", i64 4 + %"13" = load i32, ptr %"30", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"16" = load i32, ptr addrspace(5) %"7", align 4 @@ -29,8 +31,8 @@ define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %" %"17" = load i64, ptr addrspace(5) %"5", align 8 %"18" = load i32, ptr addrspace(5) %"6", align 4 %"28" = inttoptr i64 %"17" to ptr - %"33" = getelementptr inbounds i8, ptr %"28", i64 4 - store i32 %"18", ptr %"33", align 4 + %"32" = getelementptr inbounds i8, ptr %"28", i64 4 + store i32 %"18", ptr %"32", align 4 ret void } diff --git a/ptx/src/test/spirv_run/lg2.ll b/ptx/src/test/spirv_run/lg2.ll index 7dd63d6..9e4500e 100644 --- a/ptx/src/test/spirv_run/lg2.ll +++ b/ptx/src/test/spirv_run/lg2.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/local_align.ll b/ptx/src/test/spirv_run/local_align.ll index 13fbe4b..284a081 100644 --- a/ptx/src/test/spirv_run/local_align.ll +++ b/ptx/src/test/spirv_run/local_align.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"19": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca [8 x i8], align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"15", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"16", align 8 diff --git a/ptx/src/test/spirv_run/mad_hi_cc.ll b/ptx/src/test/spirv_run/mad_hi_cc.ll index 6c86dbc..f9a27b4 100644 --- a/ptx/src/test/spirv_run/mad_hi_cc.ll +++ b/ptx/src/test/spirv_run/mad_hi_cc.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60", ptr addrspace(4) byref(i64) %"61") #0 { -"77": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -15,6 +13,10 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60" %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) %"13" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"60", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"61", align 8 @@ -25,44 +27,44 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60" store i32 %"62", ptr addrspace(5) %"8", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"64" = inttoptr i64 %"20" to ptr - %"79" = getelementptr inbounds i8, ptr %"64", i64 4 - %"65" = load i32, ptr %"79", align 4 + %"78" = getelementptr inbounds i8, ptr %"64", i64 4 + %"65" = load i32, ptr %"78", align 4 store i32 %"65", ptr addrspace(5) %"9", align 4 %"22" = load i64, ptr addrspace(5) %"4", align 8 %"66" = inttoptr i64 %"22" to ptr - %"81" = getelementptr inbounds i8, ptr %"66", i64 8 - %"21" = load i32, ptr %"81", align 4 + %"80" = getelementptr inbounds i8, ptr %"66", i64 8 + %"21" = load i32, ptr %"80", align 4 store i32 %"21", ptr addrspace(5) %"10", align 4 %"25" = load i32, ptr addrspace(5) %"8", align 4 %"26" = load i32, ptr addrspace(5) %"9", align 4 %"27" = load i32, ptr addrspace(5) %"10", align 4 - %0 = sext i32 %"25" to i64 - %1 = sext i32 %"26" to i64 - %2 = mul nsw i64 %0, %1 - %3 = lshr i64 %2, 32 - %4 = trunc i64 %3 to i32 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"27") - %"23" = extractvalue { i32, i1 } %5, 0 - %"24" = extractvalue { i32, i1 } %5, 1 + %2 = sext i32 %"25" to i64 + %3 = sext i32 %"26" to i64 + %4 = mul nsw i64 %2, %3 + %5 = lshr i64 %4, 32 + %6 = trunc i64 %5 to i32 + %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %6, i32 %"27") + %"23" = extractvalue { i32, i1 } %7, 0 + %"24" = extractvalue { i32, i1 } %7, 1 store i32 %"23", ptr addrspace(5) %"7", align 4 store i1 %"24", ptr addrspace(5) %"14", align 1 - %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2) - %"28" = extractvalue { i32, i1 } %6, 0 - %"29" = extractvalue { i32, i1 } %6, 1 + %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2) + %"28" = extractvalue { i32, i1 } %8, 0 + %"29" = extractvalue { i32, i1 } %8, 1 store i32 %"28", ptr addrspace(5) %"6", align 4 store i1 %"29", ptr addrspace(5) %"14", align 1 %"31" = load i1, ptr addrspace(5) %"14", align 1 - %7 = zext i1 %"31" to i32 - %"70" = add i32 0, %7 + %9 = zext i1 %"31" to i32 + %"70" = add i32 0, %9 store i32 %"70", ptr addrspace(5) %"12", align 4 - %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) - %"32" = extractvalue { i32, i1 } %8, 0 - %"33" = extractvalue { i32, i1 } %8, 1 + %10 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) + %"32" = extractvalue { i32, i1 } %10, 0 + %"33" = extractvalue { i32, i1 } %10, 1 store i32 %"32", ptr addrspace(5) %"6", align 4 store i1 %"33", ptr addrspace(5) %"14", align 1 %"35" = load i1, ptr addrspace(5) %"14", align 1 - %9 = zext i1 %"35" to i32 - %"71" = add i32 0, %9 + %11 = zext i1 %"35" to i32 + %"71" = add i32 0, %11 store i32 %"71", ptr addrspace(5) %"13", align 4 %"36" = load i64, ptr addrspace(5) %"5", align 8 %"37" = load i32, ptr addrspace(5) %"7", align 4 @@ -71,13 +73,13 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60" %"38" = load i64, ptr addrspace(5) %"5", align 8 %"39" = load i32, ptr addrspace(5) %"12", align 4 %"73" = inttoptr i64 %"38" to ptr - %"83" = getelementptr inbounds i8, ptr %"73", i64 4 - store i32 %"39", ptr %"83", align 4 + %"82" = getelementptr inbounds i8, ptr %"73", i64 4 + store i32 %"39", ptr %"82", align 4 %"40" = load i64, ptr addrspace(5) %"5", align 8 %"41" = load i32, ptr addrspace(5) %"13", align 4 %"75" = inttoptr i64 %"40" to ptr - %"85" = getelementptr inbounds i8, ptr %"75", i64 8 - store i32 %"41", ptr %"85", align 4 + %"84" = getelementptr inbounds i8, ptr %"75", i64 8 + store i32 %"41", ptr %"84", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mad_s32.ll b/ptx/src/test/spirv_run/mad_s32.ll index 5ab86ad..f1c15cf 100644 --- a/ptx/src/test/spirv_run/mad_s32.ll +++ b/ptx/src/test/spirv_run/mad_s32.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 { -"75": %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,6 +12,10 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"13", align 1 %"14" = load i64, ptr addrspace(4) %"52", align 8 store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"53", align 8 @@ -24,42 +26,42 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", store i32 %"54", ptr addrspace(5) %"9", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"56" = inttoptr i64 %"19" to ptr - %"77" = getelementptr inbounds i8, ptr %"56", i64 4 - %"57" = load i32, ptr %"77", align 4 + %"76" = getelementptr inbounds i8, ptr %"56", i64 4 + %"57" = load i32, ptr %"76", align 4 store i32 %"57", ptr addrspace(5) %"10", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"58" = inttoptr i64 %"21" to ptr - %"79" = getelementptr inbounds i8, ptr %"58", i64 8 - %"20" = load i64, ptr %"79", align 8 + %"78" = getelementptr inbounds i8, ptr %"58", i64 8 + %"20" = load i64, ptr %"78", align 8 store i64 %"20", ptr addrspace(5) %"12", align 8 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"59" = inttoptr i64 %"23" to ptr - %"81" = getelementptr inbounds i8, ptr %"59", i64 16 - %"60" = load i32, ptr %"81", align 4 + %"80" = getelementptr inbounds i8, ptr %"59", i64 16 + %"60" = load i32, ptr %"80", align 4 store i32 %"60", ptr addrspace(5) %"11", align 4 %"25" = load i32, ptr addrspace(5) %"9", align 4 %"26" = load i32, ptr addrspace(5) %"10", align 4 %"27" = load i32, ptr addrspace(5) %"11", align 4 - %0 = mul i32 %"25", %"26" - %"24" = add i32 %0, %"27" + %2 = mul i32 %"25", %"26" + %"24" = add i32 %2, %"27" store i32 %"24", ptr addrspace(5) %"6", align 4 %"29" = load i32, ptr addrspace(5) %"9", align 4 %"30" = load i32, ptr addrspace(5) %"10", align 4 %"31" = load i32, ptr addrspace(5) %"11", align 4 - %1 = sext i32 %"29" to i64 - %2 = sext i32 %"30" to i64 - %3 = mul nsw i64 %1, %2 - %4 = lshr i64 %3, 32 - %5 = trunc i64 %4 to i32 - %"28" = add i32 %5, %"31" + %3 = sext i32 %"29" to i64 + %4 = sext i32 %"30" to i64 + %5 = mul nsw i64 %3, %4 + %6 = lshr i64 %5, 32 + %7 = trunc i64 %6 to i32 + %"28" = add i32 %7, %"31" store i32 %"28", ptr addrspace(5) %"7", align 4 %"33" = load i32, ptr addrspace(5) %"9", align 4 %"34" = load i32, ptr addrspace(5) %"10", align 4 %"35" = load i64, ptr addrspace(5) %"12", align 8 - %6 = sext i32 %"33" to i64 - %7 = sext i32 %"34" to i64 - %8 = mul nsw i64 %6, %7 - %"67" = add i64 %8, %"35" + %8 = sext i32 %"33" to i64 + %9 = sext i32 %"34" to i64 + %10 = mul nsw i64 %8, %9 + %"67" = add i64 %10, %"35" store i64 %"67", ptr addrspace(5) %"8", align 8 %"36" = load i64, ptr addrspace(5) %"5", align 8 %"37" = load i32, ptr addrspace(5) %"6", align 4 @@ -68,13 +70,13 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", %"38" = load i64, ptr addrspace(5) %"5", align 8 %"39" = load i32, ptr addrspace(5) %"7", align 4 %"72" = inttoptr i64 %"38" to ptr - %"83" = getelementptr inbounds i8, ptr %"72", i64 8 - store i32 %"39", ptr %"83", align 4 + %"82" = getelementptr inbounds i8, ptr %"72", i64 8 + store i32 %"39", ptr %"82", align 4 %"40" = load i64, ptr addrspace(5) %"5", align 8 %"41" = load i64, ptr addrspace(5) %"8", align 8 %"73" = inttoptr i64 %"40" to ptr - %"85" = getelementptr inbounds i8, ptr %"73", i64 16 - store i64 %"41", ptr %"85", align 8 + %"84" = getelementptr inbounds i8, ptr %"73", i64 16 + store i64 %"41", ptr %"84", align 8 ret void } diff --git a/ptx/src/test/spirv_run/madc_cc.ll b/ptx/src/test/spirv_run/madc_cc.ll index 136f320..0c9df2b 100644 --- a/ptx/src/test/spirv_run/madc_cc.ll +++ b/ptx/src/test/spirv_run/madc_cc.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { -"54": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"40", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"41", align 8 @@ -22,34 +24,34 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", store i32 %"42", ptr addrspace(5) %"8", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"17" to ptr - %"56" = getelementptr inbounds i8, ptr %"44", i64 4 - %"45" = load i32, ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"44", i64 4 + %"45" = load i32, ptr %"55", align 4 store i32 %"45", ptr addrspace(5) %"9", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"46" = inttoptr i64 %"19" to ptr - %"58" = getelementptr inbounds i8, ptr %"46", i64 8 - %"18" = load i32, ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"46", i64 8 + %"18" = load i32, ptr %"57", align 4 store i32 %"18", ptr addrspace(5) %"10", align 4 %"22" = load i32, ptr addrspace(5) %"8", align 4 %"23" = load i32, ptr addrspace(5) %"9", align 4 %"24" = load i32, ptr addrspace(5) %"10", align 4 - %0 = mul i32 %"22", %"23" - %1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"24") - %"20" = extractvalue { i32, i1 } %1, 0 - %"21" = extractvalue { i32, i1 } %1, 1 + %2 = mul i32 %"22", %"23" + %3 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %2, i32 %"24") + %"20" = extractvalue { i32, i1 } %3, 0 + %"21" = extractvalue { i32, i1 } %3, 1 store i32 %"20", ptr addrspace(5) %"6", align 4 store i1 %"21", ptr addrspace(5) %"11", align 1 %"26" = load i1, ptr addrspace(5) %"11", align 1 %"27" = load i32, ptr addrspace(5) %"8", align 4 %"28" = load i32, ptr addrspace(5) %"9", align 4 - %2 = sext i32 %"27" to i64 - %3 = sext i32 %"28" to i64 - %4 = mul nsw i64 %2, %3 - %5 = lshr i64 %4, 32 - %6 = trunc i64 %5 to i32 - %7 = zext i1 %"26" to i32 - %8 = add i32 %6, 3 - %"25" = add i32 %8, %7 + %4 = sext i32 %"27" to i64 + %5 = sext i32 %"28" to i64 + %6 = mul nsw i64 %4, %5 + %7 = lshr i64 %6, 32 + %8 = trunc i64 %7 to i32 + %9 = zext i1 %"26" to i32 + %10 = add i32 %8, 3 + %"25" = add i32 %10, %9 store i32 %"25", ptr addrspace(5) %"7", align 4 %"29" = load i64, ptr addrspace(5) %"5", align 8 %"30" = load i32, ptr addrspace(5) %"6", align 4 @@ -58,8 +60,8 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", %"31" = load i64, ptr addrspace(5) %"5", align 8 %"32" = load i32, ptr addrspace(5) %"7", align 4 %"53" = inttoptr i64 %"31" to ptr - %"60" = getelementptr inbounds i8, ptr %"53", i64 4 - store i32 %"32", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"53", i64 4 + store i32 %"32", ptr %"59", align 4 ret void } diff --git a/ptx/src/test/spirv_run/max.ll b/ptx/src/test/spirv_run/max.ll index 6dcc74d..ef0b39d 100644 --- a/ptx/src/test/spirv_run/max.ll +++ b/ptx/src/test/spirv_run/max.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"28", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/membar.ll b/ptx/src/test/spirv_run/membar.ll index 78f60c8..f24c0fb 100644 --- a/ptx/src/test/spirv_run/membar.ll +++ b/ptx/src/test/spirv_run/membar.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { -"19": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"14", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 diff --git a/ptx/src/test/spirv_run/min.ll b/ptx/src/test/spirv_run/min.ll index 58cb36a..b40c4db 100644 --- a/ptx/src/test/spirv_run/min.ll +++ b/ptx/src/test/spirv_run/min.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"28", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/mov.ll b/ptx/src/test/spirv_run/mov.ll index e24446a..d43fe68 100644 --- a/ptx/src/test/spirv_run/mov.ll +++ b/ptx/src/test/spirv_run/mov.ll @@ -2,13 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 @@ -18,9 +21,8 @@ define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr %"11" = load i64, ptr %"19", align 8 store i64 %"11", ptr addrspace(5) %"6", align 8 %"14" = load i64, ptr addrspace(5) %"6", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"14", ptr addrspace(5) %0, align 8 - %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"14", ptr addrspace(5) %1, align 8 + %"13" = load i64, ptr addrspace(5) %1, align 8 store i64 %"13", ptr addrspace(5) %"7", align 8 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"16" = load i64, ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/mov_address.ll b/ptx/src/test/spirv_run/mov_address.ll index 656410c..42d987f 100644 --- a/ptx/src/test/spirv_run/mov_address.ll +++ b/ptx/src/test/spirv_run/mov_address.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"8", ptr addrspace(4) byref(i64) %"9") #0 { -"11": %"6" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"6", align 1 %"4" = alloca [8 x i8], align 1, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"6", align 1 %"10" = ptrtoint ptr addrspace(5) %"4" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"10", ptr addrspace(5) %0, align 8 - %"7" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"10", ptr addrspace(5) %1, align 8 + %"7" = load i64, ptr addrspace(5) %1, align 8 store i64 %"7", ptr addrspace(5) %"5", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mov_vector_cast.ll b/ptx/src/test/spirv_run/mov_vector_cast.ll index e65ad94..eb81724 100644 --- a/ptx/src/test/spirv_run/mov_vector_cast.ll +++ b/ptx/src/test/spirv_run/mov_vector_cast.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"49": %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) @@ -14,6 +12,12 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"10" = alloca half, align 2, addrspace(5) %"11" = alloca half, align 2, addrspace(5) %"12" = alloca half, align 2, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"15", align 1 %"16" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"16", ptr addrspace(5) %"4", align 8 %"17" = load i64, ptr addrspace(4) %"35", align 8 @@ -23,9 +27,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"18" = load i64, ptr %"36", align 8 store i64 %"18", ptr addrspace(5) %"6", align 8 %"20" = load i64, ptr addrspace(5) %"6", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"20", ptr addrspace(5) %0, align 8 - %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"20", ptr addrspace(5) %1, align 8 + %"13" = load i64, ptr addrspace(5) %1, align 8 %"38" = bitcast i64 %"13" to <2 x i32> %"39" = extractelement <2 x i32> %"38", i32 0 %"40" = extractelement <2 x i32> %"38", i32 1 @@ -34,9 +37,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) store float %"21", ptr addrspace(5) %"7", align 4 store float %"22", ptr addrspace(5) %"8", align 4 %"23" = load i64, ptr addrspace(5) %"6", align 8 - %1 = alloca i64, align 8, addrspace(5) - store i64 %"23", ptr addrspace(5) %1, align 8 - %"14" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"23", ptr addrspace(5) %2, align 8 + %"14" = load i64, ptr addrspace(5) %2, align 8 %"42" = bitcast i64 %"14" to <4 x i16> %"43" = extractelement <4 x i16> %"42", i32 0 %"44" = extractelement <4 x i16> %"42", i32 1 @@ -57,8 +59,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"30" = load i64, ptr addrspace(5) %"5", align 8 %"31" = load float, ptr addrspace(5) %"7", align 4 %"48" = inttoptr i64 %"30" to ptr - %"51" = getelementptr inbounds i8, ptr %"48", i64 4 - store float %"31", ptr %"51", align 4 + %"50" = getelementptr inbounds i8, ptr %"48", i64 4 + store float %"31", ptr %"50", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mul_ftz.ll b/ptx/src/test/spirv_run/mul_ftz.ll index 3c32e73..38867fe 100644 --- a/ptx/src/test/spirv_run/mul_ftz.ll +++ b/ptx/src/test/spirv_run/mul_ftz.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load float, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"28", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"17" = load float, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/mul_hi.ll b/ptx/src/test/spirv_run/mul_hi.ll index 7d8ffa9..8043deb 100644 --- a/ptx/src/test/spirv_run/mul_hi.ll +++ b/ptx/src/test/spirv_run/mul_hi.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" declare i64 @__zluda_ptx_impl__mul_hi_u64(i64, i64) #0 define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #1 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/mul_lo.ll b/ptx/src/test/spirv_run/mul_lo.ll index 57a767d..9370500 100644 --- a/ptx/src/test/spirv_run/mul_lo.ll +++ b/ptx/src/test/spirv_run/mul_lo.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/mul_non_ftz.ll b/ptx/src/test/spirv_run/mul_non_ftz.ll index e6a3cc4..89f5e9f 100644 --- a/ptx/src/test/spirv_run/mul_non_ftz.ll +++ b/ptx/src/test/spirv_run/mul_non_ftz.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"2 store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load float, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"28", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"17" = load float, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/mul_wide.ll b/ptx/src/test/spirv_run/mul_wide.ll index e25a61d..a0d84f4 100644 --- a/ptx/src/test/spirv_run/mul_wide.ll +++ b/ptx/src/test/spirv_run/mul_wide.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"29": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"24", align 8 @@ -20,14 +22,14 @@ define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"15" to ptr addrspace(1) - %"31" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4 - %"14" = load i32, ptr addrspace(1) %"31", align 4 + %"30" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4 + %"14" = load i32, ptr addrspace(1) %"30", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i32, ptr addrspace(5) %"6", align 4 %"18" = load i32, ptr addrspace(5) %"7", align 4 - %0 = sext i32 %"17" to i64 - %1 = sext i32 %"18" to i64 - %"16" = mul nsw i64 %0, %1 + %2 = sext i32 %"17" to i64 + %3 = sext i32 %"18" to i64 + %"16" = mul nsw i64 %2, %3 store i64 %"16", ptr addrspace(5) %"8", align 8 %"19" = load i64, ptr addrspace(5) %"5", align 8 %"20" = load i64, ptr addrspace(5) %"8", align 8 diff --git a/ptx/src/test/spirv_run/multireg.ll b/ptx/src/test/spirv_run/multireg.ll index 657d61f..3eb31cb 100644 --- a/ptx/src/test/spirv_run/multireg.ll +++ b/ptx/src/test/spirv_run/multireg.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/neg.ll b/ptx/src/test/spirv_run/neg.ll index 1e94ed1..056b0a1 100644 --- a/ptx/src/test/spirv_run/neg.ll +++ b/ptx/src/test/spirv_run/neg.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll index 69ea8d2..d0c71eb 100644 --- a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll +++ b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll @@ -2,21 +2,23 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"26": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(5) %"4", align 8 %"24" = inttoptr i64 %"12" to ptr addrspace(1) - %"28" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8 - %"8" = load <2 x i32>, ptr addrspace(1) %"28", align 8 + %"27" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8 + %"8" = load <2 x i32>, ptr addrspace(1) %"27", align 8 %"13" = extractelement <2 x i32> %"8", i32 0 %"14" = extractelement <2 x i32> %"8", i32 1 store i32 %"13", ptr addrspace(5) %"6", align 4 diff --git a/ptx/src/test/spirv_run/not.ll b/ptx/src/test/spirv_run/not.ll index 5e86545..7c9a557 100644 --- a/ptx/src/test/spirv_run/not.ll +++ b/ptx/src/test/spirv_run/not.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/ntid.ll b/ptx/src/test/spirv_run/ntid.ll index 53216ce..29fccca 100644 --- a/ptx/src/test/spirv_run/ntid.ll +++ b/ptx/src/test/spirv_run/ntid.ll @@ -4,13 +4,16 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__sreg_ntid(i8) #0 define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #1 { -"29": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"15" = load i64, ptr addrspace(4) %"25", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"26", align 8 @@ -20,9 +23,8 @@ define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr %"17" = load i32, ptr %"27", align 4 store i32 %"17", ptr addrspace(5) %"6", align 4 %"11" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0) - %0 = alloca i32, align 4, addrspace(5) - store i32 %"11", ptr addrspace(5) %0, align 4 - %"19" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"11", ptr addrspace(5) %1, align 4 + %"19" = load i32, ptr addrspace(5) %1, align 4 store i32 %"19", ptr addrspace(5) %"7", align 4 %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/or.ll b/ptx/src/test/spirv_run/or.ll index 7b4bd7f..f929205 100644 --- a/ptx/src/test/spirv_run/or.ll +++ b/ptx/src/test/spirv_run/or.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"30": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr a store i64 %"11", ptr addrspace(5) %"6", align 8 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"32" = getelementptr inbounds i8, ptr %"25", i64 8 - %"13" = load i64, ptr %"32", align 8 + %"31" = getelementptr inbounds i8, ptr %"25", i64 8 + %"13" = load i64, ptr %"31", align 8 store i64 %"13", ptr addrspace(5) %"7", align 8 %"16" = load i64, ptr addrspace(5) %"6", align 8 %"17" = load i64, ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/param_ptr.ll b/ptx/src/test/spirv_run/param_ptr.ll index cea098c..75451de 100644 --- a/ptx/src/test/spirv_run/param_ptr.ll +++ b/ptx/src/test/spirv_run/param_ptr.ll @@ -2,18 +2,20 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 { -"28": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"24" = ptrtoint ptr addrspace(4) %"21" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"24", ptr addrspace(5) %0, align 8 - %"23" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"24", ptr addrspace(5) %1, align 8 + %"23" = load i64, ptr addrspace(5) %1, align 8 store i64 %"23", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"12" to ptr addrspace(4) diff --git a/ptx/src/test/spirv_run/popc.ll b/ptx/src/test/spirv_run/popc.ll index be9c625..15befc4 100644 --- a/ptx/src/test/spirv_run/popc.ll +++ b/ptx/src/test/spirv_run/popc.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/pred_not.ll b/ptx/src/test/spirv_run/pred_not.ll index 69f7646..8315512 100644 --- a/ptx/src/test/spirv_run/pred_not.ll +++ b/ptx/src/test/spirv_run/pred_not.ll @@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { -"41": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"36", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"37", align 8 @@ -21,8 +25,8 @@ define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", store i64 %"17", ptr addrspace(5) %"6", align 8 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"39" = inttoptr i64 %"20" to ptr - %"43" = getelementptr inbounds i8, ptr %"39", i64 8 - %"19" = load i64, ptr %"43", align 8 + %"42" = getelementptr inbounds i8, ptr %"39", i64 8 + %"19" = load i64, ptr %"42", align 8 store i64 %"19", ptr addrspace(5) %"7", align 8 %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load i64, ptr addrspace(5) %"7", align 8 @@ -34,21 +38,19 @@ define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", %"26" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"26", label %"10", label %"11" -"10": ; preds = %"41" - %0 = alloca i64, align 8, addrspace(5) - store i64 1, ptr addrspace(5) %0, align 8 - %"27" = load i64, ptr addrspace(5) %0, align 8 +"10": ; preds = %3 + store i64 1, ptr addrspace(5) %1, align 8 + %"27" = load i64, ptr addrspace(5) %1, align 8 store i64 %"27", ptr addrspace(5) %"8", align 8 br label %"11" -"11": ; preds = %"10", %"41" +"11": ; preds = %"10", %3 %"28" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"28", label %"13", label %"12" "12": ; preds = %"11" - %1 = alloca i64, align 8, addrspace(5) - store i64 2, ptr addrspace(5) %1, align 8 - %"29" = load i64, ptr addrspace(5) %1, align 8 + store i64 2, ptr addrspace(5) %2, align 8 + %"29" = load i64, ptr addrspace(5) %2, align 8 store i64 %"29", ptr addrspace(5) %"8", align 8 br label %"13" diff --git a/ptx/src/test/spirv_run/prmt.ll b/ptx/src/test/spirv_run/prmt.ll index bdcb12d..76efedc 100644 --- a/ptx/src/test/spirv_run/prmt.ll +++ b/ptx/src/test/spirv_run/prmt.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 { -"43": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"31", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"32", align 8 @@ -21,28 +23,28 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr store i32 %"13", ptr addrspace(5) %"6", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"34" = inttoptr i64 %"16" to ptr - %"45" = getelementptr inbounds i8, ptr %"34", i64 4 - %"15" = load i32, ptr %"45", align 4 + %"44" = getelementptr inbounds i8, ptr %"34", i64 4 + %"15" = load i32, ptr %"44", align 4 store i32 %"15", ptr addrspace(5) %"7", align 4 %"18" = load i32, ptr addrspace(5) %"6", align 4 %"19" = load i32, ptr addrspace(5) %"7", align 4 - %0 = bitcast i32 %"18" to <4 x i8> - %1 = bitcast i32 %"19" to <4 x i8> - %2 = shufflevector <4 x i8> %0, <4 x i8> %1, <4 x i32> - %"35" = bitcast <4 x i8> %2 to i32 + %2 = bitcast i32 %"18" to <4 x i8> + %3 = bitcast i32 %"19" to <4 x i8> + %4 = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> + %"35" = bitcast <4 x i8> %4 to i32 store i32 %"35", ptr addrspace(5) %"8", align 4 %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 - %3 = bitcast i32 %"21" to <4 x i8> - %4 = bitcast i32 %"22" to <4 x i8> - %5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> - %6 = extractelement <4 x i8> %5, i32 0 - %7 = ashr i8 %6, 7 - %8 = insertelement <4 x i8> %5, i8 %7, i32 0 - %9 = extractelement <4 x i8> %8, i32 2 - %10 = ashr i8 %9, 7 - %11 = insertelement <4 x i8> %8, i8 %10, i32 2 - %"38" = bitcast <4 x i8> %11 to i32 + %5 = bitcast i32 %"21" to <4 x i8> + %6 = bitcast i32 %"22" to <4 x i8> + %7 = shufflevector <4 x i8> %5, <4 x i8> %6, <4 x i32> + %8 = extractelement <4 x i8> %7, i32 0 + %9 = ashr i8 %8, 7 + %10 = insertelement <4 x i8> %7, i8 %9, i32 0 + %11 = extractelement <4 x i8> %10, i32 2 + %12 = ashr i8 %11, 7 + %13 = insertelement <4 x i8> %10, i8 %12, i32 2 + %"38" = bitcast <4 x i8> %13 to i32 store i32 %"38", ptr addrspace(5) %"9", align 4 %"23" = load i64, ptr addrspace(5) %"5", align 8 %"24" = load i32, ptr addrspace(5) %"8", align 4 @@ -51,8 +53,8 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr %"25" = load i64, ptr addrspace(5) %"5", align 8 %"26" = load i32, ptr addrspace(5) %"9", align 4 %"42" = inttoptr i64 %"25" to ptr - %"47" = getelementptr inbounds i8, ptr %"42", i64 4 - store i32 %"26", ptr %"47", align 4 + %"46" = getelementptr inbounds i8, ptr %"42", i64 4 + store i32 %"26", ptr %"46", align 4 ret void } diff --git a/ptx/src/test/spirv_run/prmt_non_immediate.ll b/ptx/src/test/spirv_run/prmt_non_immediate.ll index d503917..104c56d 100644 --- a/ptx/src/test/spirv_run/prmt_non_immediate.ll +++ b/ptx/src/test/spirv_run/prmt_non_immediate.ll @@ -2,14 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"33": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"25", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"26", align 8 @@ -20,19 +23,18 @@ define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"28" = inttoptr i64 %"15" to ptr - %"35" = getelementptr inbounds i8, ptr %"28", i64 4 - %"14" = load i32, ptr %"35", align 4 + %"34" = getelementptr inbounds i8, ptr %"28", i64 4 + %"14" = load i32, ptr %"34", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 - %0 = alloca i32, align 4, addrspace(5) - store i32 64, ptr addrspace(5) %0, align 4 - %"16" = load i32, ptr addrspace(5) %0, align 4 + store i32 64, ptr addrspace(5) %1, align 4 + %"16" = load i32, ptr addrspace(5) %1, align 4 store i32 %"16", ptr addrspace(5) %"8", align 4 %"18" = load i32, ptr addrspace(5) %"6", align 4 %"19" = load i32, ptr addrspace(5) %"7", align 4 - %1 = bitcast i32 %"18" to <4 x i8> - %2 = bitcast i32 %"19" to <4 x i8> - %3 = shufflevector <4 x i8> %1, <4 x i8> %2, <4 x i32> - %"29" = bitcast <4 x i8> %3 to i32 + %3 = bitcast i32 %"18" to <4 x i8> + %4 = bitcast i32 %"19" to <4 x i8> + %5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> + %"29" = bitcast <4 x i8> %5 to i32 store i32 %"29", ptr addrspace(5) %"7", align 4 %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/rcp.ll b/ptx/src/test/spirv_run/rcp.ll index 116687b..dc03416 100644 --- a/ptx/src/test/spirv_run/rcp.ll +++ b/ptx/src/test/spirv_run/rcp.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/reg_local.ll b/ptx/src/test/spirv_run/reg_local.ll index 48c881d..52bb3d1 100644 --- a/ptx/src/test/spirv_run/reg_local.ll +++ b/ptx/src/test/spirv_run/reg_local.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"33": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca [8 x i8], align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 @@ -22,14 +24,14 @@ define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23" %"27" = addrspacecast ptr addrspace(5) %"4" to ptr store i64 %"18", ptr %"27", align 8 %"29" = addrspacecast ptr addrspace(5) %"4" to ptr - %"37" = getelementptr inbounds i8, ptr %"29", i64 0 - %"30" = load i64, ptr %"37", align 8 + %"36" = getelementptr inbounds i8, ptr %"29", i64 0 + %"30" = load i64, ptr %"36", align 8 store i64 %"30", ptr addrspace(5) %"7", align 8 %"15" = load i64, ptr addrspace(5) %"6", align 8 %"16" = load i64, ptr addrspace(5) %"7", align 8 %"31" = inttoptr i64 %"15" to ptr addrspace(1) - %"39" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0 - store i64 %"16", ptr addrspace(1) %"39", align 8 + %"38" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0 + store i64 %"16", ptr addrspace(1) %"38", align 8 ret void } diff --git a/ptx/src/test/spirv_run/rem.ll b/ptx/src/test/spirv_run/rem.ll index 4535f49..0fb9cd8 100644 --- a/ptx/src/test/spirv_run/rem.ll +++ b/ptx/src/test/spirv_run/rem.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"28", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/rsqrt.ll b/ptx/src/test/spirv_run/rsqrt.ll index 7797260..40833ac 100644 --- a/ptx/src/test/spirv_run/rsqrt.ll +++ b/ptx/src/test/spirv_run/rsqrt.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca double, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 @@ -17,8 +19,8 @@ define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", pt %"10" = load double, ptr %"18", align 8 store double %"10", ptr addrspace(5) %"6", align 8 %"13" = load double, ptr addrspace(5) %"6", align 8 - %0 = call afn double @llvm.sqrt.f64(double %"13") - %"12" = fdiv arcp afn double 1.000000e+00, %0 + %2 = call afn double @llvm.sqrt.f64(double %"13") + %"12" = fdiv arcp afn double 1.000000e+00, %2 store double %"12", ptr addrspace(5) %"6", align 8 %"14" = load i64, ptr addrspace(5) %"5", align 8 %"15" = load double, ptr addrspace(5) %"6", align 8 diff --git a/ptx/src/test/spirv_run/s64_min.ll b/ptx/src/test/spirv_run/s64_min.ll index 98eee04..a96f0a4 100644 --- a/ptx/src/test/spirv_run/s64_min.ll +++ b/ptx/src/test/spirv_run/s64_min.ll @@ -2,16 +2,18 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @s64_min(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #0 { -"15": %"6" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"6", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"6", align 1 %"7" = load i64, ptr addrspace(4) %"13", align 8 store i64 %"7", ptr addrspace(5) %"4", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 -9223372036854775808, ptr addrspace(5) %0, align 8 - %"8" = load i64, ptr addrspace(5) %0, align 8 + store i64 -9223372036854775808, ptr addrspace(5) %1, align 8 + %"8" = load i64, ptr addrspace(5) %1, align 8 store i64 %"8", ptr addrspace(5) %"5", align 8 %"9" = load i64, ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(5) %"5", align 8 diff --git a/ptx/src/test/spirv_run/sad.ll b/ptx/src/test/spirv_run/sad.ll index c7a5726..aa65fce 100644 --- a/ptx/src/test/spirv_run/sad.ll +++ b/ptx/src/test/spirv_run/sad.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { -"56": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"38", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"39", align 8 @@ -22,31 +24,31 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr store i32 %"40", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"42" = inttoptr i64 %"17" to ptr - %"58" = getelementptr inbounds i8, ptr %"42", i64 4 - %"43" = load i32, ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"42", i64 4 + %"43" = load i32, ptr %"57", align 4 store i32 %"43", ptr addrspace(5) %"7", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"19" to ptr - %"60" = getelementptr inbounds i8, ptr %"44", i64 8 - %"45" = load i32, ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"44", i64 8 + %"45" = load i32, ptr %"59", align 4 store i32 %"45", ptr addrspace(5) %"8", align 4 %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"23" = load i32, ptr addrspace(5) %"8", align 4 - %0 = icmp ugt i32 %"21", %"22" - %1 = sub i32 %"21", %"22" - %2 = sub i32 %"22", %"21" - %3 = select i1 %0, i32 %1, i32 %2 - %"46" = add i32 %"23", %3 + %2 = icmp ugt i32 %"21", %"22" + %3 = sub i32 %"21", %"22" + %4 = sub i32 %"22", %"21" + %5 = select i1 %2, i32 %3, i32 %4 + %"46" = add i32 %"23", %5 store i32 %"46", ptr addrspace(5) %"9", align 4 %"25" = load i32, ptr addrspace(5) %"6", align 4 %"26" = load i32, ptr addrspace(5) %"7", align 4 %"27" = load i32, ptr addrspace(5) %"8", align 4 - %4 = icmp sgt i32 %"25", %"26" - %5 = sub i32 %"25", %"26" - %6 = sub i32 %"26", %"25" - %7 = select i1 %4, i32 %5, i32 %6 - %"50" = add i32 %"27", %7 + %6 = icmp sgt i32 %"25", %"26" + %7 = sub i32 %"25", %"26" + %8 = sub i32 %"26", %"25" + %9 = select i1 %6, i32 %7, i32 %8 + %"50" = add i32 %"27", %9 store i32 %"50", ptr addrspace(5) %"10", align 4 %"28" = load i64, ptr addrspace(5) %"5", align 8 %"29" = load i32, ptr addrspace(5) %"9", align 4 @@ -55,8 +57,8 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr %"30" = load i64, ptr addrspace(5) %"5", align 8 %"31" = load i32, ptr addrspace(5) %"10", align 4 %"55" = inttoptr i64 %"30" to ptr - %"62" = getelementptr inbounds i8, ptr %"55", i64 4 - store i32 %"31", ptr %"62", align 4 + %"61" = getelementptr inbounds i8, ptr %"55", i64 4 + store i32 %"31", ptr %"61", align 4 ret void } diff --git a/ptx/src/test/spirv_run/selp.ll b/ptx/src/test/spirv_run/selp.ll index 073ec38..0e20d6d 100644 --- a/ptx/src/test/spirv_run/selp.ll +++ b/ptx/src/test/spirv_run/selp.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr store i16 %"11", ptr addrspace(5) %"6", align 2 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"14" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 2 - %"13" = load i16, ptr %"30", align 2 + %"29" = getelementptr inbounds i8, ptr %"26", i64 2 + %"13" = load i16, ptr %"29", align 2 store i16 %"13", ptr addrspace(5) %"7", align 2 %"16" = load i16, ptr addrspace(5) %"6", align 2 %"17" = load i16, ptr addrspace(5) %"7", align 2 diff --git a/ptx/src/test/spirv_run/selp_true.ll b/ptx/src/test/spirv_run/selp_true.ll index 4eda981..9b6b41a 100644 --- a/ptx/src/test/spirv_run/selp_true.ll +++ b/ptx/src/test/spirv_run/selp_true.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"23" store i16 %"11", ptr addrspace(5) %"6", align 2 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"14" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 2 - %"13" = load i16, ptr %"30", align 2 + %"29" = getelementptr inbounds i8, ptr %"26", i64 2 + %"13" = load i16, ptr %"29", align 2 store i16 %"13", ptr addrspace(5) %"7", align 2 %"16" = load i16, ptr addrspace(5) %"6", align 2 %"17" = load i16, ptr addrspace(5) %"7", align 2 diff --git a/ptx/src/test/spirv_run/set_f16x2.ll b/ptx/src/test/spirv_run/set_f16x2.ll index 2a8caf3..d6bf7e0 100644 --- a/ptx/src/test/spirv_run/set_f16x2.ll +++ b/ptx/src/test/spirv_run/set_f16x2.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { -"58": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40" %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca <2 x half>, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"40", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"41", align 8 @@ -22,33 +24,33 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40" store i32 %"42", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"17" to ptr - %"60" = getelementptr inbounds i8, ptr %"44", i64 4 - %"45" = load i32, ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"44", i64 4 + %"45" = load i32, ptr %"59", align 4 store i32 %"45", ptr addrspace(5) %"7", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"46" = inttoptr i64 %"19" to ptr - %"62" = getelementptr inbounds i8, ptr %"46", i64 8 - %"47" = load i32, ptr %"62", align 4 + %"61" = getelementptr inbounds i8, ptr %"46", i64 8 + %"47" = load i32, ptr %"61", align 4 store i32 %"47", ptr addrspace(5) %"8", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"48" = inttoptr i64 %"21" to ptr - %"64" = getelementptr inbounds i8, ptr %"48", i64 12 - %"49" = load i32, ptr %"64", align 4 + %"63" = getelementptr inbounds i8, ptr %"48", i64 12 + %"49" = load i32, ptr %"63", align 4 store i32 %"49", ptr addrspace(5) %"9", align 4 %"23" = load i32, ptr addrspace(5) %"6", align 4 %"24" = load i32, ptr addrspace(5) %"7", align 4 %"51" = bitcast i32 %"23" to <2 x half> %"52" = bitcast i32 %"24" to <2 x half> - %0 = fcmp ugt <2 x half> %"51", %"52" - %1 = sext <2 x i1> %0 to <2 x i16> - %"50" = bitcast <2 x i16> %1 to i32 + %2 = fcmp ugt <2 x half> %"51", %"52" + %3 = sext <2 x i1> %2 to <2 x i16> + %"50" = bitcast <2 x i16> %3 to i32 store i32 %"50", ptr addrspace(5) %"6", align 4 %"26" = load i32, ptr addrspace(5) %"8", align 4 %"27" = load i32, ptr addrspace(5) %"9", align 4 %"54" = bitcast i32 %"26" to <2 x half> %"55" = bitcast i32 %"27" to <2 x half> - %2 = fcmp oeq <2 x half> %"54", %"55" - %"53" = uitofp <2 x i1> %2 to <2 x half> + %4 = fcmp oeq <2 x half> %"54", %"55" + %"53" = uitofp <2 x i1> %4 to <2 x half> %"25" = bitcast <2 x half> %"53" to i32 store i32 %"25", ptr addrspace(5) %"8", align 4 %"28" = load i64, ptr addrspace(5) %"5", align 8 @@ -58,8 +60,8 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40" %"30" = load i64, ptr addrspace(5) %"5", align 8 %"31" = load i32, ptr addrspace(5) %"8", align 4 %"57" = inttoptr i64 %"30" to ptr - %"66" = getelementptr inbounds i8, ptr %"57", i64 4 - store i32 %"31", ptr %"66", align 4 + %"65" = getelementptr inbounds i8, ptr %"57", i64 4 + store i32 %"31", ptr %"65", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp.ll b/ptx/src/test/spirv_run/setp.ll index 2f95556..1e9e1e5 100644 --- a/ptx/src/test/spirv_run/setp.ll +++ b/ptx/src/test/spirv_run/setp.ll @@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"39": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 @@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr store i64 %"17", ptr addrspace(5) %"6", align 8 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"37" = inttoptr i64 %"20" to ptr - %"41" = getelementptr inbounds i8, ptr %"37", i64 8 - %"19" = load i64, ptr %"41", align 8 + %"40" = getelementptr inbounds i8, ptr %"37", i64 8 + %"19" = load i64, ptr %"40", align 8 store i64 %"19", ptr addrspace(5) %"7", align 8 %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load i64, ptr addrspace(5) %"7", align 8 @@ -31,21 +35,19 @@ define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr %"24" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"24", label %"10", label %"11" -"10": ; preds = %"39" - %0 = alloca i64, align 8, addrspace(5) - store i64 1, ptr addrspace(5) %0, align 8 - %"25" = load i64, ptr addrspace(5) %0, align 8 +"10": ; preds = %3 + store i64 1, ptr addrspace(5) %1, align 8 + %"25" = load i64, ptr addrspace(5) %1, align 8 store i64 %"25", ptr addrspace(5) %"8", align 8 br label %"11" -"11": ; preds = %"10", %"39" +"11": ; preds = %"10", %3 %"26" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"26", label %"13", label %"12" "12": ; preds = %"11" - %1 = alloca i64, align 8, addrspace(5) - store i64 2, ptr addrspace(5) %1, align 8 - %"27" = load i64, ptr addrspace(5) %1, align 8 + store i64 2, ptr addrspace(5) %2, align 8 + %"27" = load i64, ptr addrspace(5) %2, align 8 store i64 %"27", ptr addrspace(5) %"8", align 8 br label %"13" diff --git a/ptx/src/test/spirv_run/setp_bool.ll b/ptx/src/test/spirv_run/setp_bool.ll index ac1b2bb..f0b659f 100644 --- a/ptx/src/test/spirv_run/setp_bool.ll +++ b/ptx/src/test/spirv_run/setp_bool.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { -"50": %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -13,6 +11,13 @@ define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44" %"9" = alloca i1, align 1, addrspace(5) %"10" = alloca i1, align 1, addrspace(5) %"11" = alloca i1, align 1, addrspace(5) + %1 = alloca i1, align 1, addrspace(5) + %2 = alloca float, align 4, addrspace(5) + %3 = alloca float, align 4, addrspace(5) + br label %4 + +4: ; preds = %0 + store i1 false, ptr addrspace(5) %"16", align 1 %"17" = load i64, ptr addrspace(4) %"44", align 8 store i64 %"17", ptr addrspace(5) %"4", align 8 %"18" = load i64, ptr addrspace(4) %"45", align 8 @@ -23,47 +28,44 @@ define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44" store float %"19", ptr addrspace(5) %"6", align 4 %"22" = load i64, ptr addrspace(5) %"4", align 8 %"47" = inttoptr i64 %"22" to ptr - %"52" = getelementptr inbounds i8, ptr %"47", i64 4 - %"21" = load float, ptr %"52", align 4 + %"51" = getelementptr inbounds i8, ptr %"47", i64 4 + %"21" = load float, ptr %"51", align 4 store float %"21", ptr addrspace(5) %"7", align 4 %"24" = load i64, ptr addrspace(5) %"4", align 8 %"48" = inttoptr i64 %"24" to ptr - %"54" = getelementptr inbounds i8, ptr %"48", i64 8 - %"23" = load float, ptr %"54", align 4 + %"53" = getelementptr inbounds i8, ptr %"48", i64 8 + %"23" = load float, ptr %"53", align 4 store float %"23", ptr addrspace(5) %"8", align 4 - %0 = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %0, align 1 - %"25" = load i1, ptr addrspace(5) %0, align 1 + store i1 false, ptr addrspace(5) %1, align 1 + %"25" = load i1, ptr addrspace(5) %1, align 1 store i1 %"25", ptr addrspace(5) %"9", align 1 %"28" = load float, ptr addrspace(5) %"6", align 4 %"29" = load float, ptr addrspace(5) %"7", align 4 %"30" = load i1, ptr addrspace(5) %"9", align 1 - %1 = fcmp ogt float %"28", %"29" - %2 = xor i1 %1, true - %"26" = and i1 %1, %"30" - %"27" = and i1 %2, %"30" + %5 = fcmp ogt float %"28", %"29" + %6 = xor i1 %5, true + %"26" = and i1 %5, %"30" + %"27" = and i1 %6, %"30" store i1 %"26", ptr addrspace(5) %"10", align 1 store i1 %"27", ptr addrspace(5) %"11", align 1 %"31" = load i1, ptr addrspace(5) %"10", align 1 br i1 %"31", label %"12", label %"13" -"12": ; preds = %"50" +"12": ; preds = %4 %"33" = load float, ptr addrspace(5) %"6", align 4 - %3 = alloca float, align 4, addrspace(5) - store float %"33", ptr addrspace(5) %3, align 4 - %"32" = load float, ptr addrspace(5) %3, align 4 + store float %"33", ptr addrspace(5) %2, align 4 + %"32" = load float, ptr addrspace(5) %2, align 4 store float %"32", ptr addrspace(5) %"8", align 4 br label %"13" -"13": ; preds = %"12", %"50" +"13": ; preds = %"12", %4 %"34" = load i1, ptr addrspace(5) %"11", align 1 br i1 %"34", label %"14", label %"15" "14": ; preds = %"13" %"36" = load float, ptr addrspace(5) %"7", align 4 - %4 = alloca float, align 4, addrspace(5) - store float %"36", ptr addrspace(5) %4, align 4 - %"35" = load float, ptr addrspace(5) %4, align 4 + store float %"36", ptr addrspace(5) %3, align 4 + %"35" = load float, ptr addrspace(5) %3, align 4 store float %"35", ptr addrspace(5) %"8", align 4 br label %"15" diff --git a/ptx/src/test/spirv_run/setp_gt.ll b/ptx/src/test/spirv_run/setp_gt.ll index 3a8b965..dbaf20a 100644 --- a/ptx/src/test/spirv_run/setp_gt.ll +++ b/ptx/src/test/spirv_run/setp_gt.ll @@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"39": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %1 = alloca float, align 4, addrspace(5) + %2 = alloca float, align 4, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 @@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", store float %"17", ptr addrspace(5) %"6", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"37" = inttoptr i64 %"20" to ptr - %"41" = getelementptr inbounds i8, ptr %"37", i64 4 - %"19" = load float, ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"37", i64 4 + %"19" = load float, ptr %"40", align 4 store float %"19", ptr addrspace(5) %"7", align 4 %"22" = load float, ptr addrspace(5) %"6", align 4 %"23" = load float, ptr addrspace(5) %"7", align 4 @@ -31,23 +35,21 @@ define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", %"24" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"24", label %"10", label %"11" -"10": ; preds = %"39" +"10": ; preds = %3 %"26" = load float, ptr addrspace(5) %"6", align 4 - %0 = alloca float, align 4, addrspace(5) - store float %"26", ptr addrspace(5) %0, align 4 - %"25" = load float, ptr addrspace(5) %0, align 4 + store float %"26", ptr addrspace(5) %1, align 4 + %"25" = load float, ptr addrspace(5) %1, align 4 store float %"25", ptr addrspace(5) %"8", align 4 br label %"11" -"11": ; preds = %"10", %"39" +"11": ; preds = %"10", %3 %"27" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"27", label %"13", label %"12" "12": ; preds = %"11" %"29" = load float, ptr addrspace(5) %"7", align 4 - %1 = alloca float, align 4, addrspace(5) - store float %"29", ptr addrspace(5) %1, align 4 - %"28" = load float, ptr addrspace(5) %1, align 4 + store float %"29", ptr addrspace(5) %2, align 4 + %"28" = load float, ptr addrspace(5) %2, align 4 store float %"28", ptr addrspace(5) %"8", align 4 br label %"13" diff --git a/ptx/src/test/spirv_run/setp_leu.ll b/ptx/src/test/spirv_run/setp_leu.ll index 9699fde..d27b96a 100644 --- a/ptx/src/test/spirv_run/setp_leu.ll +++ b/ptx/src/test/spirv_run/setp_leu.ll @@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"39": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %1 = alloca float, align 4, addrspace(5) + %2 = alloca float, align 4, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 @@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", store float %"17", ptr addrspace(5) %"6", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"37" = inttoptr i64 %"20" to ptr - %"41" = getelementptr inbounds i8, ptr %"37", i64 4 - %"19" = load float, ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"37", i64 4 + %"19" = load float, ptr %"40", align 4 store float %"19", ptr addrspace(5) %"7", align 4 %"22" = load float, ptr addrspace(5) %"6", align 4 %"23" = load float, ptr addrspace(5) %"7", align 4 @@ -31,23 +35,21 @@ define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", %"24" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"24", label %"10", label %"11" -"10": ; preds = %"39" +"10": ; preds = %3 %"26" = load float, ptr addrspace(5) %"6", align 4 - %0 = alloca float, align 4, addrspace(5) - store float %"26", ptr addrspace(5) %0, align 4 - %"25" = load float, ptr addrspace(5) %0, align 4 + store float %"26", ptr addrspace(5) %1, align 4 + %"25" = load float, ptr addrspace(5) %1, align 4 store float %"25", ptr addrspace(5) %"8", align 4 br label %"11" -"11": ; preds = %"10", %"39" +"11": ; preds = %"10", %3 %"27" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"27", label %"13", label %"12" "12": ; preds = %"11" %"29" = load float, ptr addrspace(5) %"7", align 4 - %1 = alloca float, align 4, addrspace(5) - store float %"29", ptr addrspace(5) %1, align 4 - %"28" = load float, ptr addrspace(5) %1, align 4 + store float %"29", ptr addrspace(5) %2, align 4 + %"28" = load float, ptr addrspace(5) %2, align 4 store float %"28", ptr addrspace(5) %"8", align 4 br label %"13" diff --git a/ptx/src/test/spirv_run/setp_nan.ll b/ptx/src/test/spirv_run/setp_nan.ll index 1368386..709ed89 100644 --- a/ptx/src/test/spirv_run/setp_nan.ll +++ b/ptx/src/test/spirv_run/setp_nan.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 { -"129": %"32" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"32", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -17,6 +15,18 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"13" = alloca float, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i1, align 1, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + %2 = alloca i32, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + %4 = alloca i32, align 4, addrspace(5) + %5 = alloca i32, align 4, addrspace(5) + %6 = alloca i32, align 4, addrspace(5) + %7 = alloca i32, align 4, addrspace(5) + %8 = alloca i32, align 4, addrspace(5) + br label %9 + +9: ; preds = %0 + store i1 false, ptr addrspace(5) %"32", align 1 %"33" = load i64, ptr addrspace(4) %"115", align 8 store i64 %"33", ptr addrspace(5) %"4", align 8 %"34" = load i64, ptr addrspace(4) %"116", align 8 @@ -27,38 +37,38 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" store float %"35", ptr addrspace(5) %"6", align 4 %"38" = load i64, ptr addrspace(5) %"4", align 8 %"118" = inttoptr i64 %"38" to ptr - %"131" = getelementptr inbounds i8, ptr %"118", i64 4 - %"37" = load float, ptr %"131", align 4 + %"130" = getelementptr inbounds i8, ptr %"118", i64 4 + %"37" = load float, ptr %"130", align 4 store float %"37", ptr addrspace(5) %"7", align 4 %"40" = load i64, ptr addrspace(5) %"4", align 8 %"119" = inttoptr i64 %"40" to ptr - %"133" = getelementptr inbounds i8, ptr %"119", i64 8 - %"39" = load float, ptr %"133", align 4 + %"132" = getelementptr inbounds i8, ptr %"119", i64 8 + %"39" = load float, ptr %"132", align 4 store float %"39", ptr addrspace(5) %"8", align 4 %"42" = load i64, ptr addrspace(5) %"4", align 8 %"120" = inttoptr i64 %"42" to ptr - %"135" = getelementptr inbounds i8, ptr %"120", i64 12 - %"41" = load float, ptr %"135", align 4 + %"134" = getelementptr inbounds i8, ptr %"120", i64 12 + %"41" = load float, ptr %"134", align 4 store float %"41", ptr addrspace(5) %"9", align 4 %"44" = load i64, ptr addrspace(5) %"4", align 8 %"121" = inttoptr i64 %"44" to ptr - %"137" = getelementptr inbounds i8, ptr %"121", i64 16 - %"43" = load float, ptr %"137", align 4 + %"136" = getelementptr inbounds i8, ptr %"121", i64 16 + %"43" = load float, ptr %"136", align 4 store float %"43", ptr addrspace(5) %"10", align 4 %"46" = load i64, ptr addrspace(5) %"4", align 8 %"122" = inttoptr i64 %"46" to ptr - %"139" = getelementptr inbounds i8, ptr %"122", i64 20 - %"45" = load float, ptr %"139", align 4 + %"138" = getelementptr inbounds i8, ptr %"122", i64 20 + %"45" = load float, ptr %"138", align 4 store float %"45", ptr addrspace(5) %"11", align 4 %"48" = load i64, ptr addrspace(5) %"4", align 8 %"123" = inttoptr i64 %"48" to ptr - %"141" = getelementptr inbounds i8, ptr %"123", i64 24 - %"47" = load float, ptr %"141", align 4 + %"140" = getelementptr inbounds i8, ptr %"123", i64 24 + %"47" = load float, ptr %"140", align 4 store float %"47", ptr addrspace(5) %"12", align 4 %"50" = load i64, ptr addrspace(5) %"4", align 8 %"124" = inttoptr i64 %"50" to ptr - %"143" = getelementptr inbounds i8, ptr %"124", i64 28 - %"49" = load float, ptr %"143", align 4 + %"142" = getelementptr inbounds i8, ptr %"124", i64 28 + %"49" = load float, ptr %"142", align 4 store float %"49", ptr addrspace(5) %"13", align 4 %"52" = load float, ptr addrspace(5) %"6", align 4 %"53" = load float, ptr addrspace(5) %"7", align 4 @@ -67,21 +77,19 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"54" = load i1, ptr addrspace(5) %"15", align 1 br i1 %"54", label %"16", label %"17" -"16": ; preds = %"129" - %0 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %0, align 4 - %"55" = load i32, ptr addrspace(5) %0, align 4 +"16": ; preds = %9 + store i32 1, ptr addrspace(5) %1, align 4 + %"55" = load i32, ptr addrspace(5) %1, align 4 store i32 %"55", ptr addrspace(5) %"14", align 4 br label %"17" -"17": ; preds = %"16", %"129" +"17": ; preds = %"16", %9 %"56" = load i1, ptr addrspace(5) %"15", align 1 br i1 %"56", label %"19", label %"18" "18": ; preds = %"17" - %1 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %1, align 4 - %"57" = load i32, ptr addrspace(5) %1, align 4 + store i32 0, ptr addrspace(5) %2, align 4 + %"57" = load i32, ptr addrspace(5) %2, align 4 store i32 %"57", ptr addrspace(5) %"14", align 4 br label %"19" @@ -98,9 +106,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"63", label %"20", label %"21" "20": ; preds = %"19" - %2 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %2, align 4 - %"64" = load i32, ptr addrspace(5) %2, align 4 + store i32 1, ptr addrspace(5) %3, align 4 + %"64" = load i32, ptr addrspace(5) %3, align 4 store i32 %"64", ptr addrspace(5) %"14", align 4 br label %"21" @@ -109,9 +116,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"65", label %"23", label %"22" "22": ; preds = %"21" - %3 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %3, align 4 - %"66" = load i32, ptr addrspace(5) %3, align 4 + store i32 0, ptr addrspace(5) %4, align 4 + %"66" = load i32, ptr addrspace(5) %4, align 4 store i32 %"66", ptr addrspace(5) %"14", align 4 br label %"23" @@ -119,8 +125,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"67" = load i64, ptr addrspace(5) %"5", align 8 %"68" = load i32, ptr addrspace(5) %"14", align 4 %"126" = inttoptr i64 %"67" to ptr - %"145" = getelementptr inbounds i8, ptr %"126", i64 4 - store i32 %"68", ptr %"145", align 4 + %"144" = getelementptr inbounds i8, ptr %"126", i64 4 + store i32 %"68", ptr %"144", align 4 %"70" = load float, ptr addrspace(5) %"10", align 4 %"71" = load float, ptr addrspace(5) %"11", align 4 %"69" = fcmp uno float %"70", %"71" @@ -129,9 +135,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"72", label %"24", label %"25" "24": ; preds = %"23" - %4 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %4, align 4 - %"73" = load i32, ptr addrspace(5) %4, align 4 + store i32 1, ptr addrspace(5) %5, align 4 + %"73" = load i32, ptr addrspace(5) %5, align 4 store i32 %"73", ptr addrspace(5) %"14", align 4 br label %"25" @@ -140,9 +145,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"74", label %"27", label %"26" "26": ; preds = %"25" - %5 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %5, align 4 - %"75" = load i32, ptr addrspace(5) %5, align 4 + store i32 0, ptr addrspace(5) %6, align 4 + %"75" = load i32, ptr addrspace(5) %6, align 4 store i32 %"75", ptr addrspace(5) %"14", align 4 br label %"27" @@ -150,8 +154,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"76" = load i64, ptr addrspace(5) %"5", align 8 %"77" = load i32, ptr addrspace(5) %"14", align 4 %"127" = inttoptr i64 %"76" to ptr - %"147" = getelementptr inbounds i8, ptr %"127", i64 8 - store i32 %"77", ptr %"147", align 4 + %"146" = getelementptr inbounds i8, ptr %"127", i64 8 + store i32 %"77", ptr %"146", align 4 %"79" = load float, ptr addrspace(5) %"12", align 4 %"80" = load float, ptr addrspace(5) %"13", align 4 %"78" = fcmp uno float %"79", %"80" @@ -160,9 +164,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"81", label %"28", label %"29" "28": ; preds = %"27" - %6 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %6, align 4 - %"82" = load i32, ptr addrspace(5) %6, align 4 + store i32 1, ptr addrspace(5) %7, align 4 + %"82" = load i32, ptr addrspace(5) %7, align 4 store i32 %"82", ptr addrspace(5) %"14", align 4 br label %"29" @@ -171,9 +174,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"83", label %"31", label %"30" "30": ; preds = %"29" - %7 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %7, align 4 - %"84" = load i32, ptr addrspace(5) %7, align 4 + store i32 0, ptr addrspace(5) %8, align 4 + %"84" = load i32, ptr addrspace(5) %8, align 4 store i32 %"84", ptr addrspace(5) %"14", align 4 br label %"31" @@ -181,8 +183,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"85" = load i64, ptr addrspace(5) %"5", align 8 %"86" = load i32, ptr addrspace(5) %"14", align 4 %"128" = inttoptr i64 %"85" to ptr - %"149" = getelementptr inbounds i8, ptr %"128", i64 12 - store i32 %"86", ptr %"149", align 4 + %"148" = getelementptr inbounds i8, ptr %"128", i64 12 + store i32 %"86", ptr %"148", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_num.ll b/ptx/src/test/spirv_run/setp_num.ll index a6254a2..bebecc4 100644 --- a/ptx/src/test/spirv_run/setp_num.ll +++ b/ptx/src/test/spirv_run/setp_num.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 { -"129": %"32" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"32", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -17,6 +15,18 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"13" = alloca float, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i1, align 1, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + %2 = alloca i32, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + %4 = alloca i32, align 4, addrspace(5) + %5 = alloca i32, align 4, addrspace(5) + %6 = alloca i32, align 4, addrspace(5) + %7 = alloca i32, align 4, addrspace(5) + %8 = alloca i32, align 4, addrspace(5) + br label %9 + +9: ; preds = %0 + store i1 false, ptr addrspace(5) %"32", align 1 %"33" = load i64, ptr addrspace(4) %"115", align 8 store i64 %"33", ptr addrspace(5) %"4", align 8 %"34" = load i64, ptr addrspace(4) %"116", align 8 @@ -27,38 +37,38 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" store float %"35", ptr addrspace(5) %"6", align 4 %"38" = load i64, ptr addrspace(5) %"4", align 8 %"118" = inttoptr i64 %"38" to ptr - %"131" = getelementptr inbounds i8, ptr %"118", i64 4 - %"37" = load float, ptr %"131", align 4 + %"130" = getelementptr inbounds i8, ptr %"118", i64 4 + %"37" = load float, ptr %"130", align 4 store float %"37", ptr addrspace(5) %"7", align 4 %"40" = load i64, ptr addrspace(5) %"4", align 8 %"119" = inttoptr i64 %"40" to ptr - %"133" = getelementptr inbounds i8, ptr %"119", i64 8 - %"39" = load float, ptr %"133", align 4 + %"132" = getelementptr inbounds i8, ptr %"119", i64 8 + %"39" = load float, ptr %"132", align 4 store float %"39", ptr addrspace(5) %"8", align 4 %"42" = load i64, ptr addrspace(5) %"4", align 8 %"120" = inttoptr i64 %"42" to ptr - %"135" = getelementptr inbounds i8, ptr %"120", i64 12 - %"41" = load float, ptr %"135", align 4 + %"134" = getelementptr inbounds i8, ptr %"120", i64 12 + %"41" = load float, ptr %"134", align 4 store float %"41", ptr addrspace(5) %"9", align 4 %"44" = load i64, ptr addrspace(5) %"4", align 8 %"121" = inttoptr i64 %"44" to ptr - %"137" = getelementptr inbounds i8, ptr %"121", i64 16 - %"43" = load float, ptr %"137", align 4 + %"136" = getelementptr inbounds i8, ptr %"121", i64 16 + %"43" = load float, ptr %"136", align 4 store float %"43", ptr addrspace(5) %"10", align 4 %"46" = load i64, ptr addrspace(5) %"4", align 8 %"122" = inttoptr i64 %"46" to ptr - %"139" = getelementptr inbounds i8, ptr %"122", i64 20 - %"45" = load float, ptr %"139", align 4 + %"138" = getelementptr inbounds i8, ptr %"122", i64 20 + %"45" = load float, ptr %"138", align 4 store float %"45", ptr addrspace(5) %"11", align 4 %"48" = load i64, ptr addrspace(5) %"4", align 8 %"123" = inttoptr i64 %"48" to ptr - %"141" = getelementptr inbounds i8, ptr %"123", i64 24 - %"47" = load float, ptr %"141", align 4 + %"140" = getelementptr inbounds i8, ptr %"123", i64 24 + %"47" = load float, ptr %"140", align 4 store float %"47", ptr addrspace(5) %"12", align 4 %"50" = load i64, ptr addrspace(5) %"4", align 8 %"124" = inttoptr i64 %"50" to ptr - %"143" = getelementptr inbounds i8, ptr %"124", i64 28 - %"49" = load float, ptr %"143", align 4 + %"142" = getelementptr inbounds i8, ptr %"124", i64 28 + %"49" = load float, ptr %"142", align 4 store float %"49", ptr addrspace(5) %"13", align 4 %"52" = load float, ptr addrspace(5) %"6", align 4 %"53" = load float, ptr addrspace(5) %"7", align 4 @@ -67,21 +77,19 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"54" = load i1, ptr addrspace(5) %"15", align 1 br i1 %"54", label %"16", label %"17" -"16": ; preds = %"129" - %0 = alloca i32, align 4, addrspace(5) - store i32 2, ptr addrspace(5) %0, align 4 - %"55" = load i32, ptr addrspace(5) %0, align 4 +"16": ; preds = %9 + store i32 2, ptr addrspace(5) %1, align 4 + %"55" = load i32, ptr addrspace(5) %1, align 4 store i32 %"55", ptr addrspace(5) %"14", align 4 br label %"17" -"17": ; preds = %"16", %"129" +"17": ; preds = %"16", %9 %"56" = load i1, ptr addrspace(5) %"15", align 1 br i1 %"56", label %"19", label %"18" "18": ; preds = %"17" - %1 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %1, align 4 - %"57" = load i32, ptr addrspace(5) %1, align 4 + store i32 0, ptr addrspace(5) %2, align 4 + %"57" = load i32, ptr addrspace(5) %2, align 4 store i32 %"57", ptr addrspace(5) %"14", align 4 br label %"19" @@ -98,9 +106,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"63", label %"20", label %"21" "20": ; preds = %"19" - %2 = alloca i32, align 4, addrspace(5) - store i32 2, ptr addrspace(5) %2, align 4 - %"64" = load i32, ptr addrspace(5) %2, align 4 + store i32 2, ptr addrspace(5) %3, align 4 + %"64" = load i32, ptr addrspace(5) %3, align 4 store i32 %"64", ptr addrspace(5) %"14", align 4 br label %"21" @@ -109,9 +116,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"65", label %"23", label %"22" "22": ; preds = %"21" - %3 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %3, align 4 - %"66" = load i32, ptr addrspace(5) %3, align 4 + store i32 0, ptr addrspace(5) %4, align 4 + %"66" = load i32, ptr addrspace(5) %4, align 4 store i32 %"66", ptr addrspace(5) %"14", align 4 br label %"23" @@ -119,8 +125,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"67" = load i64, ptr addrspace(5) %"5", align 8 %"68" = load i32, ptr addrspace(5) %"14", align 4 %"126" = inttoptr i64 %"67" to ptr - %"145" = getelementptr inbounds i8, ptr %"126", i64 4 - store i32 %"68", ptr %"145", align 4 + %"144" = getelementptr inbounds i8, ptr %"126", i64 4 + store i32 %"68", ptr %"144", align 4 %"70" = load float, ptr addrspace(5) %"10", align 4 %"71" = load float, ptr addrspace(5) %"11", align 4 %"69" = fcmp ord float %"70", %"71" @@ -129,9 +135,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"72", label %"24", label %"25" "24": ; preds = %"23" - %4 = alloca i32, align 4, addrspace(5) - store i32 2, ptr addrspace(5) %4, align 4 - %"73" = load i32, ptr addrspace(5) %4, align 4 + store i32 2, ptr addrspace(5) %5, align 4 + %"73" = load i32, ptr addrspace(5) %5, align 4 store i32 %"73", ptr addrspace(5) %"14", align 4 br label %"25" @@ -140,9 +145,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"74", label %"27", label %"26" "26": ; preds = %"25" - %5 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %5, align 4 - %"75" = load i32, ptr addrspace(5) %5, align 4 + store i32 0, ptr addrspace(5) %6, align 4 + %"75" = load i32, ptr addrspace(5) %6, align 4 store i32 %"75", ptr addrspace(5) %"14", align 4 br label %"27" @@ -150,8 +154,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"76" = load i64, ptr addrspace(5) %"5", align 8 %"77" = load i32, ptr addrspace(5) %"14", align 4 %"127" = inttoptr i64 %"76" to ptr - %"147" = getelementptr inbounds i8, ptr %"127", i64 8 - store i32 %"77", ptr %"147", align 4 + %"146" = getelementptr inbounds i8, ptr %"127", i64 8 + store i32 %"77", ptr %"146", align 4 %"79" = load float, ptr addrspace(5) %"12", align 4 %"80" = load float, ptr addrspace(5) %"13", align 4 %"78" = fcmp ord float %"79", %"80" @@ -160,9 +164,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"81", label %"28", label %"29" "28": ; preds = %"27" - %6 = alloca i32, align 4, addrspace(5) - store i32 2, ptr addrspace(5) %6, align 4 - %"82" = load i32, ptr addrspace(5) %6, align 4 + store i32 2, ptr addrspace(5) %7, align 4 + %"82" = load i32, ptr addrspace(5) %7, align 4 store i32 %"82", ptr addrspace(5) %"14", align 4 br label %"29" @@ -171,9 +174,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"83", label %"31", label %"30" "30": ; preds = %"29" - %7 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %7, align 4 - %"84" = load i32, ptr addrspace(5) %7, align 4 + store i32 0, ptr addrspace(5) %8, align 4 + %"84" = load i32, ptr addrspace(5) %8, align 4 store i32 %"84", ptr addrspace(5) %"14", align 4 br label %"31" @@ -181,8 +183,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"85" = load i64, ptr addrspace(5) %"5", align 8 %"86" = load i32, ptr addrspace(5) %"14", align 4 %"128" = inttoptr i64 %"85" to ptr - %"149" = getelementptr inbounds i8, ptr %"128", i64 12 - store i32 %"86", ptr %"149", align 4 + %"148" = getelementptr inbounds i8, ptr %"128", i64 12 + store i32 %"86", ptr %"148", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_pred2.ll b/ptx/src/test/spirv_run/setp_pred2.ll index 8220fc0..01ae23e 100644 --- a/ptx/src/test/spirv_run/setp_pred2.ll +++ b/ptx/src/test/spirv_run/setp_pred2.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { -"41": %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -12,6 +10,12 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36 %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) %"10" = alloca i1, align 1, addrspace(5) + %1 = alloca float, align 4, addrspace(5) + %2 = alloca float, align 4, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"15", align 1 %"16" = load i64, ptr addrspace(4) %"36", align 8 store i64 %"16", ptr addrspace(5) %"4", align 8 %"17" = load i64, ptr addrspace(4) %"37", align 8 @@ -22,8 +26,8 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36 store float %"18", ptr addrspace(5) %"6", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"39" = inttoptr i64 %"21" to ptr - %"43" = getelementptr inbounds i8, ptr %"39", i64 4 - %"20" = load float, ptr %"43", align 4 + %"42" = getelementptr inbounds i8, ptr %"39", i64 4 + %"20" = load float, ptr %"42", align 4 store float %"20", ptr addrspace(5) %"7", align 4 %"24" = load float, ptr addrspace(5) %"6", align 4 %"25" = load float, ptr addrspace(5) %"7", align 4 @@ -34,23 +38,21 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36 %"26" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"26", label %"11", label %"12" -"11": ; preds = %"41" +"11": ; preds = %3 %"28" = load float, ptr addrspace(5) %"6", align 4 - %0 = alloca float, align 4, addrspace(5) - store float %"28", ptr addrspace(5) %0, align 4 - %"27" = load float, ptr addrspace(5) %0, align 4 + store float %"28", ptr addrspace(5) %1, align 4 + %"27" = load float, ptr addrspace(5) %1, align 4 store float %"27", ptr addrspace(5) %"8", align 4 br label %"12" -"12": ; preds = %"11", %"41" +"12": ; preds = %"11", %3 %"29" = load i1, ptr addrspace(5) %"10", align 1 br i1 %"29", label %"13", label %"14" "13": ; preds = %"12" %"31" = load float, ptr addrspace(5) %"7", align 4 - %1 = alloca float, align 4, addrspace(5) - store float %"31", ptr addrspace(5) %1, align 4 - %"30" = load float, ptr addrspace(5) %1, align 4 + store float %"31", ptr addrspace(5) %2, align 4 + %"30" = load float, ptr addrspace(5) %2, align 4 store float %"30", ptr addrspace(5) %"8", align 4 br label %"14" diff --git a/ptx/src/test/spirv_run/shared_ptr_32.ll b/ptx/src/test/spirv_run/shared_ptr_32.ll index 8705967..f3e0269 100644 --- a/ptx/src/test/spirv_run/shared_ptr_32.ll +++ b/ptx/src/test/spirv_run/shared_ptr_32.ll @@ -4,21 +4,23 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [128 x i8] undef, align 4 define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"31": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"24", align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(4) %"25", align 8 store i64 %"12", ptr addrspace(5) %"6", align 8 - %0 = alloca i32, align 4, addrspace(5) - store i32 ptrtoint (ptr addrspace(3) @"4" to i32), ptr addrspace(5) %0, align 4 - %"13" = load i32, ptr addrspace(5) %0, align 4 + store i32 ptrtoint (ptr addrspace(3) @"4" to i32), ptr addrspace(5) %1, align 4 + %"13" = load i32, ptr addrspace(5) %1, align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"27" = inttoptr i64 %"15" to ptr addrspace(1) @@ -30,8 +32,8 @@ define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) % store i64 %"17", ptr addrspace(3) %"28", align 8 %"19" = load i32, ptr addrspace(5) %"7", align 4 %"29" = inttoptr i32 %"19" to ptr addrspace(3) - %"33" = getelementptr inbounds i8, ptr addrspace(3) %"29", i64 0 - %"18" = load i64, ptr addrspace(3) %"33", align 8 + %"32" = getelementptr inbounds i8, ptr addrspace(3) %"29", i64 0 + %"18" = load i64, ptr addrspace(3) %"32", align 8 store i64 %"18", ptr addrspace(5) %"9", align 8 %"20" = load i64, ptr addrspace(5) %"6", align 8 %"21" = load i64, ptr addrspace(5) %"9", align 8 diff --git a/ptx/src/test/spirv_run/shared_ptr_take_address.ll b/ptx/src/test/spirv_run/shared_ptr_take_address.ll index 6c430a2..fd61d71 100644 --- a/ptx/src/test/spirv_run/shared_ptr_take_address.ll +++ b/ptx/src/test/spirv_run/shared_ptr_take_address.ll @@ -4,21 +4,23 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i8], align 4 define protected amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"29": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"12", ptr addrspace(5) %"6", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %0, align 8 - %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %1, align 8 + %"13" = load i64, ptr addrspace(5) %1, align 8 store i64 %"13", ptr addrspace(5) %"7", align 8 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"25" = inttoptr i64 %"15" to ptr addrspace(1) diff --git a/ptx/src/test/spirv_run/shared_unify_decl.ll b/ptx/src/test/spirv_run/shared_unify_decl.ll index 4cc24fb..61d62d7 100644 --- a/ptx/src/test/spirv_run/shared_unify_decl.ll +++ b/ptx/src/test/spirv_run/shared_unify_decl.ll @@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @shared_mod = private addrspace(3) global [4 x i32] undef -define private i64 @"3"(ptr addrspace(3) %"66", ptr addrspace(3) %"67") #0 { -"59": +define private i64 @"3"(ptr addrspace(3) %"63", ptr addrspace(3) %"64") #0 { %"8" = alloca i64, align 8, addrspace(5) %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) - %"23" = load i64, ptr addrspace(3) %"67", align 8 + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"20", align 1 + %"23" = load i64, ptr addrspace(3) %"64", align 8 store i64 %"23", ptr addrspace(5) %"9", align 8 - %"24" = load i64, ptr addrspace(3) %"66", align 8 + %"24" = load i64, ptr addrspace(3) %"63", align 8 store i64 %"24", ptr addrspace(5) %"10", align 8 %"26" = load i64, ptr addrspace(5) %"10", align 8 %"27" = load i64, ptr addrspace(5) %"9", align 8 @@ -23,29 +25,33 @@ define private i64 @"3"(ptr addrspace(3) %"66", ptr addrspace(3) %"67") #0 { ret i64 %"28" } -define private i64 @"5"(i64 %"29", ptr addrspace(3) %"68", ptr addrspace(3) %"69") #0 { -"60": +define private i64 @"5"(i64 %"29", ptr addrspace(3) %"65", ptr addrspace(3) %"66") #0 { %"12" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 + br label %1 + +1: ; preds = %0 store i64 %"29", ptr addrspace(5) %"12", align 8 + store i1 false, ptr addrspace(5) %"21", align 1 %"30" = load i64, ptr addrspace(5) %"12", align 8 - store i64 %"30", ptr addrspace(3) %"68", align 8 - %"31" = call i64 @"3"(ptr addrspace(3) %"68", ptr addrspace(3) %"69") + store i64 %"30", ptr addrspace(3) %"65", align 8 + %"31" = call i64 @"3"(ptr addrspace(3) %"65", ptr addrspace(3) %"66") store i64 %"31", ptr addrspace(5) %"11", align 8 %"32" = load i64, ptr addrspace(5) %"11", align 8 ret i64 %"32" } define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"61": %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"18" = alloca i64, align 8, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"22", align 1 %"33" = load i64, ptr addrspace(4) %"46", align 8 store i64 %"33", ptr addrspace(5) %"16", align 8 %"34" = load i64, ptr addrspace(4) %"47", align 8 @@ -56,8 +62,8 @@ define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i6 store i64 %"35", ptr addrspace(5) %"18", align 8 %"38" = load i64, ptr addrspace(5) %"16", align 8 %"54" = inttoptr i64 %"38" to ptr addrspace(1) - %"71" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 - %"37" = load i64, ptr addrspace(1) %"71", align 8 + %"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 + %"37" = load i64, ptr addrspace(1) %"68", align 8 store i64 %"37", ptr addrspace(5) %"19", align 8 %"39" = load i64, ptr addrspace(5) %"19", align 8 store i64 %"39", ptr addrspace(3) @shared_mod, align 8 diff --git a/ptx/src/test/spirv_run/shared_unify_extern.ll b/ptx/src/test/spirv_run/shared_unify_extern.ll index 819e8a1..769fd9f 100644 --- a/ptx/src/test/spirv_run/shared_unify_extern.ll +++ b/ptx/src/test/spirv_run/shared_unify_extern.ll @@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @shared_mod = private addrspace(3) global [4 x i32] undef -define private i64 @"3"(ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 { -"56": +define private i64 @"3"(ptr addrspace(3) %"56", ptr addrspace(3) %"57") #0 { %"4" = alloca i64, align 8, addrspace(5) %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) - %"20" = load i64, ptr addrspace(3) %"60", align 8 + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"17", align 1 + %"20" = load i64, ptr addrspace(3) %"57", align 8 store i64 %"20", ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(3) %"59", align 8 + %"21" = load i64, ptr addrspace(3) %"56", align 8 store i64 %"21", ptr addrspace(5) %"6", align 8 %"23" = load i64, ptr addrspace(5) %"6", align 8 %"24" = load i64, ptr addrspace(5) %"5", align 8 @@ -23,29 +25,33 @@ define private i64 @"3"(ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 { ret i64 %"25" } -define private i64 @"7"(i64 %"26", ptr addrspace(3) %"61", ptr addrspace(3) %"62") #0 { -"57": +define private i64 @"7"(i64 %"26", ptr addrspace(3) %"58", ptr addrspace(3) %"59") #0 { %"9" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 + br label %1 + +1: ; preds = %0 store i64 %"26", ptr addrspace(5) %"9", align 8 + store i1 false, ptr addrspace(5) %"18", align 1 %"27" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"27", ptr addrspace(3) %"61", align 8 - %"28" = call i64 @"3"(ptr addrspace(3) %"61", ptr addrspace(3) %"62") + store i64 %"27", ptr addrspace(3) %"58", align 8 + %"28" = call i64 @"3"(ptr addrspace(3) %"58", ptr addrspace(3) %"59") store i64 %"28", ptr addrspace(5) %"8", align 8 %"29" = load i64, ptr addrspace(5) %"8", align 8 ret i64 %"29" } define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 { -"58": %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"13" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"19", align 1 %"30" = load i64, ptr addrspace(4) %"43", align 8 store i64 %"30", ptr addrspace(5) %"13", align 8 %"31" = load i64, ptr addrspace(4) %"44", align 8 @@ -56,8 +62,8 @@ define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref( store i64 %"32", ptr addrspace(5) %"15", align 8 %"35" = load i64, ptr addrspace(5) %"13", align 8 %"51" = inttoptr i64 %"35" to ptr addrspace(1) - %"64" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 8 - %"34" = load i64, ptr addrspace(1) %"64", align 8 + %"61" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 8 + %"34" = load i64, ptr addrspace(1) %"61", align 8 store i64 %"34", ptr addrspace(5) %"16", align 8 %"36" = load i64, ptr addrspace(5) %"16", align 8 store i64 %"36", ptr addrspace(3) @shared_mod, align 8 diff --git a/ptx/src/test/spirv_run/shared_unify_local.ll b/ptx/src/test/spirv_run/shared_unify_local.ll index b98b280..522e0f5 100644 --- a/ptx/src/test/spirv_run/shared_unify_local.ll +++ b/ptx/src/test/spirv_run/shared_unify_local.ll @@ -4,19 +4,21 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @"5" = private addrspace(3) global i64 undef, align 4 -define private i64 @"2"(i64 %"21", ptr addrspace(3) %"62", ptr addrspace(3) %"63") #0 { -"59": +define private i64 @"2"(i64 %"21", ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 { %"4" = alloca i64, align 8, addrspace(5) %"3" = alloca i64, align 8, addrspace(5) %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 %"6" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 store i64 %"21", ptr addrspace(5) %"4", align 8 + store i1 false, ptr addrspace(5) %"18", align 1 %"22" = load i64, ptr addrspace(5) %"4", align 8 - store i64 %"22", ptr addrspace(3) %"63", align 8 - %"23" = load i64, ptr addrspace(3) %"63", align 8 + store i64 %"22", ptr addrspace(3) %"60", align 8 + %"23" = load i64, ptr addrspace(3) %"60", align 8 store i64 %"23", ptr addrspace(5) %"6", align 8 - %"24" = load i64, ptr addrspace(3) %"62", align 8 + %"24" = load i64, ptr addrspace(3) %"59", align 8 store i64 %"24", ptr addrspace(5) %"4", align 8 %"26" = load i64, ptr addrspace(5) %"4", align 8 %"27" = load i64, ptr addrspace(5) %"6", align 8 @@ -26,32 +28,36 @@ define private i64 @"2"(i64 %"21", ptr addrspace(3) %"62", ptr addrspace(3) %"63 ret i64 %"28" } -define private i64 @"7"(i64 %"29", i64 %"30", ptr addrspace(3) %"64", ptr addrspace(3) %"65") #0 { -"60": +define private i64 @"7"(i64 %"29", i64 %"30", ptr addrspace(3) %"61", ptr addrspace(3) %"62") #0 { %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 + br label %1 + +1: ; preds = %0 store i64 %"29", ptr addrspace(5) %"9", align 8 store i64 %"30", ptr addrspace(5) %"10", align 8 + store i1 false, ptr addrspace(5) %"19", align 1 %"31" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"31", ptr addrspace(3) %"64", align 8 + store i64 %"31", ptr addrspace(3) %"61", align 8 %"33" = load i64, ptr addrspace(5) %"10", align 8 - %"32" = call i64 @"2"(i64 %"33", ptr addrspace(3) %"64", ptr addrspace(3) %"65") + %"32" = call i64 @"2"(i64 %"33", ptr addrspace(3) %"61", ptr addrspace(3) %"62") store i64 %"32", ptr addrspace(5) %"8", align 8 %"34" = load i64, ptr addrspace(5) %"8", align 8 ret i64 %"34" } define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { -"61": %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"20", align 1 %"35" = load i64, ptr addrspace(4) %"48", align 8 store i64 %"35", ptr addrspace(5) %"14", align 8 %"36" = load i64, ptr addrspace(4) %"49", align 8 @@ -62,8 +68,8 @@ define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i store i64 %"37", ptr addrspace(5) %"16", align 8 %"40" = load i64, ptr addrspace(5) %"14", align 8 %"55" = inttoptr i64 %"40" to ptr addrspace(1) - %"67" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8 - %"39" = load i64, ptr addrspace(1) %"67", align 8 + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8 + %"39" = load i64, ptr addrspace(1) %"64", align 8 store i64 %"39", ptr addrspace(5) %"17", align 8 %"42" = load i64, ptr addrspace(5) %"16", align 8 %"43" = load i64, ptr addrspace(5) %"17", align 8 diff --git a/ptx/src/test/spirv_run/shared_variable.ll b/ptx/src/test/spirv_run/shared_variable.ll index 859a767..ac1e519 100644 --- a/ptx/src/test/spirv_run/shared_variable.ll +++ b/ptx/src/test/spirv_run/shared_variable.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [128 x i8] undef, align 4 define protected amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/shf.ll b/ptx/src/test/spirv_run/shf.ll index 22be32a..317a60f 100644 --- a/ptx/src/test/spirv_run/shf.ll +++ b/ptx/src/test/spirv_run/shf.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"32": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"24", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"25", align 8 @@ -20,8 +22,8 @@ define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"24", ptr store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"27" = inttoptr i64 %"15" to ptr - %"34" = getelementptr inbounds i8, ptr %"27", i64 4 - %"14" = load i32, ptr %"34", align 4 + %"33" = getelementptr inbounds i8, ptr %"27", i64 4 + %"14" = load i32, ptr %"33", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i32, ptr addrspace(5) %"6", align 4 %"18" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/shl.ll b/ptx/src/test/spirv_run/shl.ll index 40c3365..9f9b609 100644 --- a/ptx/src/test/spirv_run/shl.ll +++ b/ptx/src/test/spirv_run/shl.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 @@ -18,8 +20,8 @@ define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"18", ptr %"11" = load i64, ptr %"20", align 8 store i64 %"11", ptr addrspace(5) %"6", align 8 %"14" = load i64, ptr addrspace(5) %"6", align 8 - %0 = shl i64 %"14", 2 - %"21" = select i1 false, i64 0, i64 %0 + %2 = shl i64 %"14", 2 + %"21" = select i1 false, i64 0, i64 %2 store i64 %"21", ptr addrspace(5) %"7", align 8 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"16" = load i64, ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/shl_link_hack.ll b/ptx/src/test/spirv_run/shl_link_hack.ll index 9ac3883..29d1c74 100644 --- a/ptx/src/test/spirv_run/shl_link_hack.ll +++ b/ptx/src/test/spirv_run/shl_link_hack.ll @@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0 define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #1 { -"29": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 @@ -25,8 +27,8 @@ define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) % %"14" = load i64, ptr %"25", align 8 store i64 %"14", ptr addrspace(5) %"6", align 8 %"17" = load i64, ptr addrspace(5) %"6", align 8 - %0 = shl i64 %"17", 2 - %"26" = select i1 false, i64 0, i64 %0 + %2 = shl i64 %"17", 2 + %"26" = select i1 false, i64 0, i64 %2 store i64 %"26", ptr addrspace(5) %"7", align 8 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i64, ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/shl_overflow.ll b/ptx/src/test/spirv_run/shl_overflow.ll index 80d4871..86178d8 100644 --- a/ptx/src/test/spirv_run/shl_overflow.ll +++ b/ptx/src/test/spirv_run/shl_overflow.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { -"62": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %" %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"47", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"48", align 8 @@ -22,24 +24,24 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %" store i32 %"14", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"50" = inttoptr i64 %"17" to ptr - %"64" = getelementptr inbounds i8, ptr %"50", i64 4 - %"16" = load i32, ptr %"64", align 4 + %"63" = getelementptr inbounds i8, ptr %"50", i64 4 + %"16" = load i32, ptr %"63", align 4 store i32 %"16", ptr addrspace(5) %"8", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"51" = inttoptr i64 %"19" to ptr - %"66" = getelementptr inbounds i8, ptr %"51", i64 8 - %"18" = load i32, ptr %"66", align 4 + %"65" = getelementptr inbounds i8, ptr %"51", i64 8 + %"18" = load i32, ptr %"65", align 4 store i32 %"18", ptr addrspace(5) %"9", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"52" = inttoptr i64 %"21" to ptr - %"68" = getelementptr inbounds i8, ptr %"52", i64 12 - %"20" = load i32, ptr %"68", align 4 + %"67" = getelementptr inbounds i8, ptr %"52", i64 12 + %"20" = load i32, ptr %"67", align 4 store i32 %"20", ptr addrspace(5) %"10", align 4 %"23" = load i32, ptr addrspace(5) %"6", align 4 %"24" = load i32, ptr addrspace(5) %"8", align 4 - %0 = icmp ugt i32 %"24", 31 - %1 = shl i32 %"23", %"24" - %"53" = select i1 %0, i32 0, i32 %1 + %2 = icmp ugt i32 %"24", 31 + %3 = shl i32 %"23", %"24" + %"53" = select i1 %2, i32 0, i32 %3 store i32 %"53", ptr addrspace(5) %"7", align 4 %"25" = load i64, ptr addrspace(5) %"5", align 8 %"26" = load i32, ptr addrspace(5) %"7", align 4 @@ -47,26 +49,26 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %" store i32 %"26", ptr %"55", align 4 %"28" = load i32, ptr addrspace(5) %"6", align 4 %"29" = load i32, ptr addrspace(5) %"9", align 4 - %2 = icmp ugt i32 %"29", 31 - %3 = shl i32 %"28", %"29" - %"56" = select i1 %2, i32 0, i32 %3 + %4 = icmp ugt i32 %"29", 31 + %5 = shl i32 %"28", %"29" + %"56" = select i1 %4, i32 0, i32 %5 store i32 %"56", ptr addrspace(5) %"7", align 4 %"30" = load i64, ptr addrspace(5) %"5", align 8 %"31" = load i32, ptr addrspace(5) %"7", align 4 %"58" = inttoptr i64 %"30" to ptr - %"70" = getelementptr inbounds i8, ptr %"58", i64 4 - store i32 %"31", ptr %"70", align 4 + %"69" = getelementptr inbounds i8, ptr %"58", i64 4 + store i32 %"31", ptr %"69", align 4 %"33" = load i32, ptr addrspace(5) %"6", align 4 %"34" = load i32, ptr addrspace(5) %"10", align 4 - %4 = icmp ugt i32 %"34", 31 - %5 = shl i32 %"33", %"34" - %"59" = select i1 %4, i32 0, i32 %5 + %6 = icmp ugt i32 %"34", 31 + %7 = shl i32 %"33", %"34" + %"59" = select i1 %6, i32 0, i32 %7 store i32 %"59", ptr addrspace(5) %"7", align 4 %"35" = load i64, ptr addrspace(5) %"5", align 8 %"36" = load i32, ptr addrspace(5) %"7", align 4 %"61" = inttoptr i64 %"35" to ptr - %"72" = getelementptr inbounds i8, ptr %"61", i64 8 - store i32 %"36", ptr %"72", align 4 + %"71" = getelementptr inbounds i8, ptr %"61", i64 8 + store i32 %"36", ptr %"71", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shr_s32.ll b/ptx/src/test/spirv_run/shr_s32.ll index 77c71f9..a6a6d98 100644 --- a/ptx/src/test/spirv_run/shr_s32.ll +++ b/ptx/src/test/spirv_run/shr_s32.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"28": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,14 +21,14 @@ define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"22", store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"30" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"30", align 4 + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 - %0 = icmp ugt i32 %"17", 31 - %1 = ashr i32 %"16", %"17" - %"15" = select i1 %0, i32 -1, i32 %1 + %2 = icmp ugt i32 %"17", 31 + %3 = ashr i32 %"16", %"17" + %"15" = select i1 %2, i32 -1, i32 %3 store i32 %"15", ptr addrspace(5) %"6", align 4 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i32, ptr addrspace(5) %"6", align 4 diff --git a/ptx/src/test/spirv_run/shr_u32.ll b/ptx/src/test/spirv_run/shr_u32.ll index 22c8761..52153d9 100644 --- a/ptx/src/test/spirv_run/shr_u32.ll +++ b/ptx/src/test/spirv_run/shr_u32.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { -"45": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"36", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"37", align 8 @@ -22,25 +24,25 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", store i32 %"14", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"39" = inttoptr i64 %"17" to ptr - %"47" = getelementptr inbounds i8, ptr %"39", i64 4 - %"16" = load i32, ptr %"47", align 4 + %"46" = getelementptr inbounds i8, ptr %"39", i64 4 + %"16" = load i32, ptr %"46", align 4 store i32 %"16", ptr addrspace(5) %"7", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"40" = inttoptr i64 %"19" to ptr - %"49" = getelementptr inbounds i8, ptr %"40", i64 8 - %"18" = load i32, ptr %"49", align 4 + %"48" = getelementptr inbounds i8, ptr %"40", i64 8 + %"18" = load i32, ptr %"48", align 4 store i32 %"18", ptr addrspace(5) %"8", align 4 %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 - %0 = icmp ugt i32 %"22", 31 - %1 = lshr i32 %"21", %"22" - %"20" = select i1 %0, i32 0, i32 %1 + %2 = icmp ugt i32 %"22", 31 + %3 = lshr i32 %"21", %"22" + %"20" = select i1 %2, i32 0, i32 %3 store i32 %"20", ptr addrspace(5) %"9", align 4 %"24" = load i32, ptr addrspace(5) %"6", align 4 %"25" = load i32, ptr addrspace(5) %"8", align 4 - %2 = icmp ugt i32 %"25", 31 - %3 = lshr i32 %"24", %"25" - %"23" = select i1 %2, i32 0, i32 %3 + %4 = icmp ugt i32 %"25", 31 + %5 = lshr i32 %"24", %"25" + %"23" = select i1 %4, i32 0, i32 %5 store i32 %"23", ptr addrspace(5) %"10", align 4 %"26" = load i64, ptr addrspace(5) %"5", align 8 %"27" = load i32, ptr addrspace(5) %"9", align 4 @@ -49,8 +51,8 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", %"28" = load i64, ptr addrspace(5) %"5", align 8 %"29" = load i32, ptr addrspace(5) %"10", align 4 %"44" = inttoptr i64 %"28" to ptr - %"51" = getelementptr inbounds i8, ptr %"44", i64 4 - store i32 %"29", ptr %"51", align 4 + %"50" = getelementptr inbounds i8, ptr %"44", i64 4 + store i32 %"29", ptr %"50", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sign_extend.ll b/ptx/src/test/spirv_run/sign_extend.ll index ef26261..98494e3 100644 --- a/ptx/src/test/spirv_run/sign_extend.ll +++ b/ptx/src/test/spirv_run/sign_extend.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { -"19": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"14", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 diff --git a/ptx/src/test/spirv_run/sin.ll b/ptx/src/test/spirv_run/sin.ll index f38aedd..33f510c 100644 --- a/ptx/src/test/spirv_run/sin.ll +++ b/ptx/src/test/spirv_run/sin.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/sqrt.ll b/ptx/src/test/spirv_run/sqrt.ll index c8e4ec0..f86753e 100644 --- a/ptx/src/test/spirv_run/sqrt.ll +++ b/ptx/src/test/spirv_run/sqrt.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/sub.ll b/ptx/src/test/spirv_run/sub.ll index 83fec5f..24a12bd 100644 --- a/ptx/src/test/spirv_run/sub.ll +++ b/ptx/src/test/spirv_run/sub.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/subc_cc.ll b/ptx/src/test/spirv_run/subc_cc.ll index 0101b83..cdd5c0b 100644 --- a/ptx/src/test/spirv_run/subc_cc.ll +++ b/ptx/src/test/spirv_run/subc_cc.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #0 { -"72": %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,6 +12,10 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"13", align 1 %"18" = load i64, ptr addrspace(4) %"57", align 8 store i64 %"18", ptr addrspace(5) %"4", align 8 %"19" = load i64, ptr addrspace(4) %"58", align 8 @@ -24,24 +26,24 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", store i32 %"59", ptr addrspace(5) %"9", align 4 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"61" = inttoptr i64 %"23" to ptr - %"74" = getelementptr inbounds i8, ptr %"61", i64 4 - %"62" = load i32, ptr %"74", align 4 + %"73" = getelementptr inbounds i8, ptr %"61", i64 4 + %"62" = load i32, ptr %"73", align 4 store i32 %"62", ptr addrspace(5) %"10", align 4 %"25" = load i64, ptr addrspace(5) %"4", align 8 %"63" = inttoptr i64 %"25" to ptr - %"76" = getelementptr inbounds i8, ptr %"63", i64 8 - %"24" = load i32, ptr %"76", align 4 + %"75" = getelementptr inbounds i8, ptr %"63", i64 8 + %"24" = load i32, ptr %"75", align 4 store i32 %"24", ptr addrspace(5) %"11", align 4 %"27" = load i64, ptr addrspace(5) %"4", align 8 %"64" = inttoptr i64 %"27" to ptr - %"78" = getelementptr inbounds i8, ptr %"64", i64 12 - %"26" = load i32, ptr %"78", align 4 + %"77" = getelementptr inbounds i8, ptr %"64", i64 12 + %"26" = load i32, ptr %"77", align 4 store i32 %"26", ptr addrspace(5) %"12", align 4 %"29" = load i32, ptr addrspace(5) %"9", align 4 %"30" = load i32, ptr addrspace(5) %"10", align 4 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"29", i32 %"30") - %"28" = extractvalue { i32, i1 } %0, 0 - %"14" = extractvalue { i32, i1 } %0, 1 + %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"29", i32 %"30") + %"28" = extractvalue { i32, i1 } %2, 0 + %"14" = extractvalue { i32, i1 } %2, 1 store i32 %"28", ptr addrspace(5) %"6", align 4 %"31" = xor i1 %"14", true store i1 %"31", ptr addrspace(5) %"13", align 1 @@ -49,14 +51,14 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", %"15" = xor i1 %"32", true %"34" = load i32, ptr addrspace(5) %"6", align 4 %"35" = load i32, ptr addrspace(5) %"11", align 4 - %1 = zext i1 %"15" to i32 - %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"34", i32 %"35") - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %1) - %"33" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"16" = xor i1 %4, %6 + %3 = zext i1 %"15" to i32 + %4 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"34", i32 %"35") + %5 = extractvalue { i32, i1 } %4, 0 + %6 = extractvalue { i32, i1 } %4, 1 + %7 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %5, i32 %3) + %"33" = extractvalue { i32, i1 } %7, 0 + %8 = extractvalue { i32, i1 } %7, 1 + %"16" = xor i1 %6, %8 store i32 %"33", ptr addrspace(5) %"7", align 4 %"36" = xor i1 %"16", true store i1 %"36", ptr addrspace(5) %"13", align 1 @@ -64,9 +66,9 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", %"17" = xor i1 %"37", true %"39" = load i32, ptr addrspace(5) %"7", align 4 %"40" = load i32, ptr addrspace(5) %"12", align 4 - %7 = zext i1 %"17" to i32 - %8 = sub i32 %"39", %"40" - %"38" = sub i32 %8, %7 + %9 = zext i1 %"17" to i32 + %10 = sub i32 %"39", %"40" + %"38" = sub i32 %10, %9 store i32 %"38", ptr addrspace(5) %"8", align 4 %"41" = load i64, ptr addrspace(5) %"5", align 8 %"42" = load i32, ptr addrspace(5) %"6", align 4 @@ -75,13 +77,13 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", %"43" = load i64, ptr addrspace(5) %"5", align 8 %"44" = load i32, ptr addrspace(5) %"7", align 4 %"70" = inttoptr i64 %"43" to ptr - %"80" = getelementptr inbounds i8, ptr %"70", i64 4 - store i32 %"44", ptr %"80", align 4 + %"79" = getelementptr inbounds i8, ptr %"70", i64 4 + store i32 %"44", ptr %"79", align 4 %"45" = load i64, ptr addrspace(5) %"5", align 8 %"46" = load i32, ptr addrspace(5) %"8", align 4 %"71" = inttoptr i64 %"45" to ptr - %"82" = getelementptr inbounds i8, ptr %"71", i64 8 - store i32 %"46", ptr %"82", align 4 + %"81" = getelementptr inbounds i8, ptr %"71", i64 8 + store i32 %"46", ptr %"81", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vector.ll b/ptx/src/test/spirv_run/vector.ll index b60aaec..f311be7 100644 --- a/ptx/src/test/spirv_run/vector.ll +++ b/ptx/src/test/spirv_run/vector.ll @@ -2,69 +2,74 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private <2 x i32> @"1"(<2 x i32> %"18") #0 { -"50": %"3" = alloca <2 x i32>, align 8, addrspace(5) %"2" = alloca <2 x i32>, align 8, addrspace(5) %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 %"4" = alloca <2 x i32>, align 8, addrspace(5) %"5" = alloca i32, align 4, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) - store <2 x i32> %"18", ptr addrspace(5) %"3", align 8 - %0 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 0 - %"20" = load i32, ptr addrspace(5) %0, align 4 %1 = alloca i32, align 4, addrspace(5) + %2 = alloca i32, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + %4 = alloca i32, align 4, addrspace(5) + %5 = alloca i32, align 4, addrspace(5) + %6 = alloca <2 x i32>, align 8, addrspace(5) + br label %7 + +7: ; preds = %0 + store <2 x i32> %"18", ptr addrspace(5) %"3", align 8 + store i1 false, ptr addrspace(5) %"16", align 1 + %8 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 0 + %"20" = load i32, ptr addrspace(5) %8, align 4 store i32 %"20", ptr addrspace(5) %1, align 4 %"19" = load i32, ptr addrspace(5) %1, align 4 store i32 %"19", ptr addrspace(5) %"5", align 4 - %2 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 1 - %"22" = load i32, ptr addrspace(5) %2, align 4 - %3 = alloca i32, align 4, addrspace(5) - store i32 %"22", ptr addrspace(5) %3, align 4 - %"21" = load i32, ptr addrspace(5) %3, align 4 + %9 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 1 + %"22" = load i32, ptr addrspace(5) %9, align 4 + store i32 %"22", ptr addrspace(5) %2, align 4 + %"21" = load i32, ptr addrspace(5) %2, align 4 store i32 %"21", ptr addrspace(5) %"6", align 4 %"24" = load i32, ptr addrspace(5) %"5", align 4 %"25" = load i32, ptr addrspace(5) %"6", align 4 %"23" = add i32 %"24", %"25" store i32 %"23", ptr addrspace(5) %"6", align 4 %"27" = load i32, ptr addrspace(5) %"6", align 4 - %4 = alloca i32, align 4, addrspace(5) - store i32 %"27", ptr addrspace(5) %4, align 4 - %"26" = load i32, ptr addrspace(5) %4, align 4 - %5 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 - store i32 %"26", ptr addrspace(5) %5, align 4 - %"29" = load i32, ptr addrspace(5) %"6", align 4 - %6 = alloca i32, align 4, addrspace(5) - store i32 %"29", ptr addrspace(5) %6, align 4 - %"28" = load i32, ptr addrspace(5) %6, align 4 - %7 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 - store i32 %"28", ptr addrspace(5) %7, align 4 - %8 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 - %"31" = load i32, ptr addrspace(5) %8, align 4 - %9 = alloca i32, align 4, addrspace(5) - store i32 %"31", ptr addrspace(5) %9, align 4 - %"30" = load i32, ptr addrspace(5) %9, align 4 + store i32 %"27", ptr addrspace(5) %3, align 4 + %"26" = load i32, ptr addrspace(5) %3, align 4 %10 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 - store i32 %"30", ptr addrspace(5) %10, align 4 + store i32 %"26", ptr addrspace(5) %10, align 4 + %"29" = load i32, ptr addrspace(5) %"6", align 4 + store i32 %"29", ptr addrspace(5) %4, align 4 + %"28" = load i32, ptr addrspace(5) %4, align 4 + %11 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 + store i32 %"28", ptr addrspace(5) %11, align 4 + %12 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 + %"31" = load i32, ptr addrspace(5) %12, align 4 + store i32 %"31", ptr addrspace(5) %5, align 4 + %"30" = load i32, ptr addrspace(5) %5, align 4 + %13 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 + store i32 %"30", ptr addrspace(5) %13, align 4 %"33" = load <2 x i32>, ptr addrspace(5) %"4", align 8 - %11 = alloca <2 x i32>, align 8, addrspace(5) - store <2 x i32> %"33", ptr addrspace(5) %11, align 8 - %"32" = load <2 x i32>, ptr addrspace(5) %11, align 8 + store <2 x i32> %"33", ptr addrspace(5) %6, align 8 + %"32" = load <2 x i32>, ptr addrspace(5) %6, align 8 store <2 x i32> %"32", ptr addrspace(5) %"2", align 8 %"34" = load <2 x i32>, ptr addrspace(5) %"2", align 8 ret <2 x i32> %"34" } define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { -"51": %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca <2 x i32>, align 8, addrspace(5) %"13" = alloca i32, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"17", align 1 %"35" = load i64, ptr addrspace(4) %"45", align 8 store i64 %"35", ptr addrspace(5) %"10", align 8 %"36" = load i64, ptr addrspace(4) %"46", align 8 @@ -78,9 +83,8 @@ define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"45", p store <2 x i32> %"39", ptr addrspace(5) %"12", align 8 %"42" = load <2 x i32>, ptr addrspace(5) %"12", align 8 %"48" = bitcast <2 x i32> %"42" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"48", ptr addrspace(5) %0, align 8 - %"41" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"48", ptr addrspace(5) %1, align 8 + %"41" = load i64, ptr addrspace(5) %1, align 8 store i64 %"41", ptr addrspace(5) %"15", align 8 %"43" = load i64, ptr addrspace(5) %"11", align 8 %"44" = load <2 x i32>, ptr addrspace(5) %"12", align 8 diff --git a/ptx/src/test/spirv_run/vector4.ll b/ptx/src/test/spirv_run/vector4.ll index 494b1af..7d92885 100644 --- a/ptx/src/test/spirv_run/vector4.ll +++ b/ptx/src/test/spirv_run/vector4.ll @@ -2,13 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca <4 x i32>, align 16, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 @@ -17,9 +20,8 @@ define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"17", %"19" = inttoptr i64 %"12" to ptr %"11" = load <4 x i32>, ptr %"19", align 16 store <4 x i32> %"11", ptr addrspace(5) %"6", align 16 - %0 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %"6", i32 0, i32 3 - %"14" = load i32, ptr addrspace(5) %0, align 4 - %1 = alloca i32, align 4, addrspace(5) + %3 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %"6", i32 0, i32 3 + %"14" = load i32, ptr addrspace(5) %3, align 4 store i32 %"14", ptr addrspace(5) %1, align 4 %"20" = load i32, ptr addrspace(5) %1, align 4 store i32 %"20", ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/vector_extract.ll b/ptx/src/test/spirv_run/vector_extract.ll index d877dc7..ea2e2db 100644 --- a/ptx/src/test/spirv_run/vector_extract.ll +++ b/ptx/src/test/spirv_run/vector_extract.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { -"60": %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) @@ -12,6 +10,13 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"8" = alloca i16, align 2, addrspace(5) %"9" = alloca i16, align 2, addrspace(5) %"10" = alloca <4 x i16>, align 8, addrspace(5) + %1 = alloca <4 x i16>, align 8, addrspace(5) + %2 = alloca <4 x i16>, align 8, addrspace(5) + %3 = alloca <4 x i16>, align 8, addrspace(5) + br label %4 + +4: ; preds = %0 + store i1 false, ptr addrspace(5) %"17", align 1 %"18" = load i64, ptr addrspace(4) %"48", align 8 store i64 %"18", ptr addrspace(5) %"4", align 8 %"19" = load i64, ptr addrspace(4) %"49", align 8 @@ -35,18 +40,16 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"26" = load i16, ptr addrspace(5) %"8", align 2 %"27" = load i16, ptr addrspace(5) %"9", align 2 %"28" = load i16, ptr addrspace(5) %"6", align 2 - %0 = insertelement <4 x i16> undef, i16 %"25", i32 0 - %1 = insertelement <4 x i16> %0, i16 %"26", i32 1 - %2 = insertelement <4 x i16> %1, i16 %"27", i32 2 - %"12" = insertelement <4 x i16> %2, i16 %"28", i32 3 - %3 = alloca <4 x i16>, align 8, addrspace(5) - store <4 x i16> %"12", ptr addrspace(5) %3, align 8 - %"29" = load <4 x i16>, ptr addrspace(5) %3, align 8 + %5 = insertelement <4 x i16> undef, i16 %"25", i32 0 + %6 = insertelement <4 x i16> %5, i16 %"26", i32 1 + %7 = insertelement <4 x i16> %6, i16 %"27", i32 2 + %"12" = insertelement <4 x i16> %7, i16 %"28", i32 3 + store <4 x i16> %"12", ptr addrspace(5) %1, align 8 + %"29" = load <4 x i16>, ptr addrspace(5) %1, align 8 store <4 x i16> %"29", ptr addrspace(5) %"10", align 8 %"30" = load <4 x i16>, ptr addrspace(5) %"10", align 8 - %4 = alloca <4 x i16>, align 8, addrspace(5) - store <4 x i16> %"30", ptr addrspace(5) %4, align 8 - %"13" = load <4 x i16>, ptr addrspace(5) %4, align 8 + store <4 x i16> %"30", ptr addrspace(5) %2, align 8 + %"13" = load <4 x i16>, ptr addrspace(5) %2, align 8 %"31" = extractelement <4 x i16> %"13", i32 0 %"32" = extractelement <4 x i16> %"13", i32 1 %"33" = extractelement <4 x i16> %"13", i32 2 @@ -59,13 +62,12 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"36" = load i16, ptr addrspace(5) %"9", align 2 %"37" = load i16, ptr addrspace(5) %"6", align 2 %"38" = load i16, ptr addrspace(5) %"7", align 2 - %5 = insertelement <4 x i16> undef, i16 %"35", i32 0 - %6 = insertelement <4 x i16> %5, i16 %"36", i32 1 - %7 = insertelement <4 x i16> %6, i16 %"37", i32 2 - %"15" = insertelement <4 x i16> %7, i16 %"38", i32 3 - %8 = alloca <4 x i16>, align 8, addrspace(5) - store <4 x i16> %"15", ptr addrspace(5) %8, align 8 - %"14" = load <4 x i16>, ptr addrspace(5) %8, align 8 + %8 = insertelement <4 x i16> undef, i16 %"35", i32 0 + %9 = insertelement <4 x i16> %8, i16 %"36", i32 1 + %10 = insertelement <4 x i16> %9, i16 %"37", i32 2 + %"15" = insertelement <4 x i16> %10, i16 %"38", i32 3 + store <4 x i16> %"15", ptr addrspace(5) %3, align 8 + %"14" = load <4 x i16>, ptr addrspace(5) %3, align 8 %"39" = extractelement <4 x i16> %"14", i32 0 %"40" = extractelement <4 x i16> %"14", i32 1 %"41" = extractelement <4 x i16> %"14", i32 2 @@ -82,10 +84,10 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"56" = trunc i16 %"44" to i8 %"57" = trunc i16 %"45" to i8 %"58" = trunc i16 %"46" to i8 - %9 = insertelement <4 x i8> undef, i8 %"55", i32 0 - %10 = insertelement <4 x i8> %9, i8 %"56", i32 1 - %11 = insertelement <4 x i8> %10, i8 %"57", i32 2 - %"16" = insertelement <4 x i8> %11, i8 %"58", i32 3 + %11 = insertelement <4 x i8> undef, i8 %"55", i32 0 + %12 = insertelement <4 x i8> %11, i8 %"56", i32 1 + %13 = insertelement <4 x i8> %12, i8 %"57", i32 2 + %"16" = insertelement <4 x i8> %13, i8 %"58", i32 3 %"47" = load i64, ptr addrspace(5) %"5", align 8 %"59" = inttoptr i64 %"47" to ptr addrspace(1) store <4 x i8> %"16", ptr addrspace(1) %"59", align 4 diff --git a/ptx/src/test/spirv_run/vote_ballot.ll b/ptx/src/test/spirv_run/vote_ballot.ll index fd31f1a..efba70a 100644 --- a/ptx/src/test/spirv_run/vote_ballot.ll +++ b/ptx/src/test/spirv_run/vote_ballot.ll @@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1, i32) #0 define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 { -"50": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"41", align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"42" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 1) @@ -26,23 +28,23 @@ define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"4 %"16" = load i64, ptr addrspace(5) %"5", align 8 %"17" = load i32, ptr addrspace(5) %"6", align 4 %"46" = inttoptr i64 %"16" to ptr - %"56" = getelementptr inbounds i8, ptr %"46", i64 0 - store i32 %"17", ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"46", i64 0 + store i32 %"17", ptr %"55", align 4 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i32, ptr addrspace(5) %"7", align 4 %"47" = inttoptr i64 %"18" to ptr - %"58" = getelementptr inbounds i8, ptr %"47", i64 4 - store i32 %"19", ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"47", i64 4 + store i32 %"19", ptr %"57", align 4 %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load i32, ptr addrspace(5) %"8", align 4 %"48" = inttoptr i64 %"20" to ptr - %"60" = getelementptr inbounds i8, ptr %"48", i64 8 - store i32 %"21", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"48", i64 8 + store i32 %"21", ptr %"59", align 4 %"22" = load i64, ptr addrspace(5) %"5", align 8 %"23" = load i32, ptr addrspace(5) %"9", align 4 %"49" = inttoptr i64 %"22" to ptr - %"62" = getelementptr inbounds i8, ptr %"49", i64 12 - store i32 %"23", ptr %"62", align 4 + %"61" = getelementptr inbounds i8, ptr %"49", i64 12 + store i32 %"23", ptr %"61", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vshr.ll b/ptx/src/test/spirv_run/vshr.ll index 4433bf2..3d24770 100644 --- a/ptx/src/test/spirv_run/vshr.ll +++ b/ptx/src/test/spirv_run/vshr.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"38": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"29", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"30", align 8 @@ -21,21 +23,21 @@ define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"29", ptr store i32 %"31", ptr addrspace(5) %"7", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"33" = inttoptr i64 %"16" to ptr - %"40" = getelementptr inbounds i8, ptr %"33", i64 4 - %"34" = load i32, ptr %"40", align 4 + %"39" = getelementptr inbounds i8, ptr %"33", i64 4 + %"34" = load i32, ptr %"39", align 4 store i32 %"34", ptr addrspace(5) %"8", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"35" = inttoptr i64 %"18" to ptr - %"42" = getelementptr inbounds i8, ptr %"35", i64 8 - %"36" = load i32, ptr %"42", align 4 + %"41" = getelementptr inbounds i8, ptr %"35", i64 8 + %"36" = load i32, ptr %"41", align 4 store i32 %"36", ptr addrspace(5) %"9", align 4 %"20" = load i32, ptr addrspace(5) %"7", align 4 %"21" = load i32, ptr addrspace(5) %"8", align 4 %"22" = load i32, ptr addrspace(5) %"9", align 4 - %0 = icmp ugt i32 %"21", 31 - %1 = lshr i32 %"20", %"21" - %2 = select i1 %0, i32 0, i32 %1 - %"19" = add i32 %2, %"22" + %2 = icmp ugt i32 %"21", 31 + %3 = lshr i32 %"20", %"21" + %4 = select i1 %2, i32 0, i32 %3 + %"19" = add i32 %4, %"22" store i32 %"19", ptr addrspace(5) %"6", align 4 %"23" = load i64, ptr addrspace(5) %"5", align 8 %"24" = load i32, ptr addrspace(5) %"6", align 4 diff --git a/ptx/src/test/spirv_run/xor.ll b/ptx/src/test/spirv_run/xor.ll index 96b2914..bc0ad26 100644 --- a/ptx/src/test/spirv_run/xor.ll +++ b/ptx/src/test/spirv_run/xor.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"28", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 1085258..b06fa52 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -2526,58 +2526,6 @@ fn insert_implicit_conversions2_impl<'input>( Ok(result) } -fn normalize_labels<'input>( - module: TranslationModule<'input, ExpandedArgParams>, -) -> Result, TranslateError> { - convert_methods_simple(module, normalize_labels2_impl) -} - -fn normalize_labels2_impl<'input>( - id_defs: &mut IdNameMapBuilder<'input>, - fn_body: Vec, -) -> Result, TranslateError> { - let mut labels_in_use = FxHashSet::default(); - for statement in fn_body.iter() { - match statement { - Statement::Instruction(i) => { - if let Some(target) = i.jump_target() { - labels_in_use.insert(target); - } - } - Statement::Conditional(cond) => { - labels_in_use.insert(cond.if_true); - labels_in_use.insert(cond.if_false); - } - Statement::Call(..) - | Statement::Variable(..) - | Statement::LoadVar(..) - | Statement::StoreVar(..) - | Statement::RetValue(..) - | Statement::Conversion(..) - | Statement::Constant(..) - | Statement::Label(..) - | Statement::PtrAccess { .. } - | Statement::RepackVector(..) - | Statement::MadC(..) - | Statement::MadCC(..) - | Statement::AddC(..) - | Statement::AddCC(..) - | Statement::SubC(..) - | Statement::SubCC(..) - | Statement::AsmVolatile { .. } - | Statement::FunctionPointer(..) => {} - } - } - Ok( - iter::once(Statement::Label(id_defs.register_intermediate(None))) - .chain(fn_body.into_iter().filter(|s| match s { - Statement::Label(i) => labels_in_use.contains(i), - _ => true, - })) - .collect::>(), - ) -} - fn hoist_globals<'input, P: ast::ArgParams>( module: TranslationModule<'input, P>, ) -> TranslationModule<'input, P> { @@ -3410,9 +3358,7 @@ fn to_llvm_module_impl2<'a, 'input>( } let translation_module = insert_implicit_conversions(translation_module)?; let translation_module = insert_compilation_mode_prologue(translation_module); - let translation_module = normalize_labels(translation_module)?; let translation_module = hoist_globals(translation_module); - let translation_module = move_variables_to_start(translation_module)?; let mut translation_module = replace_instructions_with_builtins(translation_module)?; if raytracing.is_some() { translation_module = raytracing::replace_tex_builtins_hack(translation_module)?; @@ -3439,49 +3385,6 @@ fn to_llvm_module_impl2<'a, 'input>( }) } -// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html): -// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca -// instructions that are in the entry basic block. Given SSA is the canonical form expected by much -// of the optimizer; if allocas can not be eliminated by Mem2Reg or SROA, the optimizer is likely to -// be less effective than it could be." -// Empirically, this is true. Moving allocas to the start gives us less spill-happy assembly -fn move_variables_to_start<'input, P: ast::ArgParams>( - module: TranslationModule<'input, P>, -) -> Result, TranslateError> { - convert_methods_simple(module, move_variables_to_start_impl) -} - -fn move_variables_to_start_impl<'input, P: ast::ArgParams>( - _: &mut IdNameMapBuilder<'input>, - fn_body: Vec, P>>, -) -> Result, P>>, TranslateError> { - if fn_body.is_empty() { - return Ok(fn_body); - } - let mut result = (0..fn_body.len()) - .into_iter() - .map(|_| mem::MaybeUninit::<_>::uninit()) - .collect::>(); - let variables_count = fn_body.iter().fold(0, |acc, statement| { - acc + matches!(statement, Statement::Variable(..)) as usize - }); - let mut variable = 1usize; - let mut non_variable = variables_count + 1; - // methods always start with an entry label - let mut statements = fn_body.into_iter(); - let start_label = statements.next().ok_or_else(TranslateError::unreachable)?; - unsafe { result.get_unchecked_mut(0).write(start_label) }; - for statement in statements { - let index = match statement { - Statement::Variable(_) => &mut variable, - _ => &mut non_variable, - }; - unsafe { result.get_unchecked_mut(*index).write(statement) }; - *index += 1; - } - Ok(unsafe { mem::transmute(result) }) -} - // PTX definition of param state space does not translate cleanly into AMDGPU notion of an address space: //  .param in kernel arguments matches AMDGPU constant address space // .param in function arguments and variables matches AMDGPU private address space @@ -6901,15 +6804,6 @@ pub(crate) enum TypeKind { Struct, } -impl> ast::Instruction { - fn jump_target(&self) -> Option { - match self { - ast::Instruction::Bra(_, a) => Some(a.src), - _ => None, - } - } -} - impl ast::Instruction { // .wide instructions don't support ftz, so it's enough to just look at the // type declared by the instruction From f0c905db15b287a629b96a67c246ec6317f871a8 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Wed, 8 May 2024 15:19:59 +0200 Subject: [PATCH 13/14] Fix trap instruction codegen, don't fail build with older Rust versions (#229) --- ptx/src/emit.rs | 3 --- xtask/src/main.rs | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs index 7388203..e2d00d9 100644 --- a/ptx/src/emit.rs +++ b/ptx/src/emit.rs @@ -1337,9 +1337,6 @@ fn emit_int_trap(ctx: &mut EmitContext) -> Result<(), TranslateError> { 0, LLVM_UNNAMED, ); - // llvm.trap is not a terminator, - // LLVM might fail with an unterminated basic block if we don't insert unreachable - LLVMBuildUnreachable(builder); } Ok(()) } diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 3f1f224..d47659f 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -214,6 +214,7 @@ fn build_impl(is_debug: bool) -> Result { let workspace = Workspace::open(is_debug)?; let mut command = workspace.cargo_command(); command.arg("build"); + command.arg("--locked"); workspace .projects .iter() From fcd7a57888cd91abb9849f681c5fe753e8315972 Mon Sep 17 00:00:00 2001 From: NyanCatTW1 <17372086+NyanCatTW1@users.noreply.github.com> Date: Thu, 16 May 2024 06:38:52 +0800 Subject: [PATCH 14/14] Fix + improve vprintf implementation (#211) --- ptx/lib/zluda_ptx_impl.bc | Bin 232076 -> 232464 bytes ptx/lib/zluda_ptx_impl.cpp | 56 ++++++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 1edcbd5cc43abb641e28b19d37651f905d1e0986..fec881a63b2759bcad291163e4ff3b21393bbd7b 100644 GIT binary patch delta 1748 zcmeB~$~R#K-vkvV*0mee4)HRxonYDgnl~zl>EZd!?g_apjE2oyQ`)ztFmB(P!ZcZ@ zKAw?*L6{{?fSZMZL56{W;fMq0l>h(#|7YKrW5dA67{I|A=#?txz$3ya7{DhQD8S+n zD9D%^z}Xsbf>BjA;1%Oj*?

tkMC;7>~*X^e`@!3Fu;cDihGhs45fC#F#1<@PP5C ztOI{P!xaX>fD4SRvJMPW7zG2ESsXYS7#QjqJ{mAE2r)1)D7l}Q5Xj}i?83;vkdvR6 zUYwC%RKfsq+=IVr91MTf3gkT!596@-$SvJ6YeOgRVV@67TM{pD&TTN`F*(S6p{-H9 zf{BTNL79zJrbR(wMR?r}=XH%NR^9?UAs$QyEV2xW5zImi9EW)wWf~6ZGz+l&WE0A$ z7vg!sS|lKTps`Ovp+KN1!JxBa63a`6LIwwkJdO_xJj$o1XkusMb3!XPbxCqzJ6nf<6k>lVYj z0G<#75XXSs!Q{O=+rRB43@nLaeGaTU7(^kKFL9jUDB{h?Sg*{?_NXVUL6Di9&q;u1 zm1$ssQvml?2%6Cu$&|LBsiNIbVOk@XrrfauQEXZ>DkPYb6&V{0ycaBSY2Yblb7AOA z_|TK^VM4-(DG48DBz%~Y@L@s1hb0LQRwO)Flki|e!h~vYMfKB!cs*@4^Gp;eVPIJ$ zlg7C%k#Et39D^Bo1}m%>oKpEj7zB(I9NIcN9E1-lNFP*CKB%C5P{H`1g7ra#*#{eD zsh+T6V44O~wiBf6fYiYQN(T>U9Xw!k@IbxQ!2?bQA9R6KE9^eVFkAIR8s{dyh7D7h zSs7Y*I3^@2DP3?eIN@e+!o%Q%m%#}igA;xRC(;Z~uxT+n$mvw=$Z1yU@3c|-G~+}v zm~neW&f-nHXUq(4AIM?e3}LWoF(0te`Ml#yvr+#^8@10f&Ll$^E9%cI-o$xE&0zL{ z9Hz|>MjGdqMzMy;JGu^c^vq_9V)!=U#^NOYo)b5crSdr5G4cc@C|EHtNwUZ?EaZ8U zI4zOKENKCgLvzPPHZ2w74xZc{jxX2^W~53m>z!z9*kQVYb%z7Pl5Ry7KaP?atPY19 z*qizO+5GTg_d<~idm{bIt8gvZQ=46L~engcs| z1(~N_NRyN>l3>f?xTsK}B*^P*!j{65;3T=C((s4E1La2k!?tWcB$$5$uw|)SP&lG= zk((K$BE?B^MWRRZh0Yceo=XxF3@31=PFu*+mI!rFR&3MF0RFEqbmvC>9gYr01_mhx z28QT1C5Lw1!-_YLOk1c}kaRkM^~HwN150M#FiK&x%8|5qskV(LN#MniX%BgrN^Te} zX-pB0X0zj{N<8hr1yYcr2-2?0@?t~ki&h5NZ9Lxu795_|D9<$ShEa#h1@36JDI6tC z3=E2f3=GT+3=G^HiVP>%at;bGB(rU|pURXdQ@`mnQ$^N<-8u~`*bba#T9H+-N2g&m z+qu(BH?jiu>NKoldvKbGBfDUqPQymFFQ=I-vLBq#Y1qQH@(fc&cEMSlh8=8w&oIr% zHn^qJu#4^FS*91+3ts3n>}Okcj!7ct!7H7H!))8mF-7EDc&*cLjP39_rjDEiZ*&?? zv0XdIbRtKg{+&+4S+>vTn11AZc(2oNk*)hYlSOXAPo0LVYzxmbW#k_CrPFYoZTESm zj@*DhIt@43E}v&wk$d2;PQz`scjuW-n+abYAOf&K}=;$`QX1j8U Q=|&)M&stKDeYTR7`JatVVbN{ z|Br!zL6{|0M}UEWL4tvS;fMq0l>h(#|7YKrW5dA67{I|A=;h*|$UddSVI}*Na)*QL zA~_C{>>|YuhHO)c95mUc6gyb5O(}5TWSdgy(8w-Q&R}c96u^AMK~R8^RW{%T;}Hi* z0Y*~?hA9jn@X>&QL6CugLCO8Zgg`ErdS+He28QB{{GyVa{JeCqV;=lf<6!u+Rv_<@ zxEY6OqOf$^%m=I)hbIVIGUiHTG>I?ZJ?!^}g@u8EL79zJrbR(wMR?5)=e|Z3D{ldw z5Dz8;7Fh;G31%S%j>Ei;QVj=nngv)Y*@QBLc%HCk35YLf?2}L^5NJv;c-cOwp5>uK zCWC`S9>)U)o@LTk82FlAvYAFOCr(R{SfLQ%Jfl5#fzzj!svB%t67DP7*D@<`6ge<6 zB+g*w*z*1Mhkgadh6#*}MrR}i4o<9Z`zQH3{lol=^=dlM1W6#N>#2pCrT1qbg#P zBYcuP8yO^@DKTmOm-$=#?fgUg<^N^=7yoI3KJV#n1mnW=ynZ^9unQZ{5go^AT+A041zsqeQ0S^(+R!kOk=f9D!qdYJ zB@8V#97YV)nh6bOgi>q*4slFR3Y_XL+j_90k(phQgYD{^GmH@&MT+7OWNqlZfOm?( z8-oqZhxmjh@=o!1Veo=!5&I;$6lPT(Yo0d^8=6y6HnZGzU`@>UWWW=$M)8IEV&ce7*rC&~k!|v6CX4I~yL1}1u$?%~RFR#q zU#DRQ+nh5@GqM%V=rrtNdvS*8MfQYSIt}~Tj-F+b$hmMwr{OT$)w4_yIS20QG#q1l zbe5?jXTm+5hEr@`&oZ6J5vYHm({Pq;(mAFdIS*dyG+bm`b&km*H{i8S!&SBe=a@2b zH@wklxXyO#98*WG!#kaZn{1!XF|Ekm@Ls3kHe2s`rW3ghKXn@JvMoQ)^deW`w@$-- zwu9%HIPy09(P? cvt.h && /opt/rocm/llvm/bin/clang -std=c++17 -Xclang -no-opaque-pointers -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -S -emit-llvm --cuda-device-only -nogpulib -O3 -Xclang -fallow-half-arguments-and-returns -o - | sed -e 's/define/define linkonce_odr/g' | sed -e '/@llvm.used/d' | sed -e 's/\"target-cpu\"=\"[^\"]*\"//g' | sed -e 's/\"target-features\"=\"[^\"]*\"//g' | sed -e 's/\"denormal-fp-math-f32\"=\"[^\"]*\"//g' | sed -e 's/!llvm.module.flags = !{!0, !1, !2, !3, !4}/!llvm.module.flags = !{ }/g' | sed -e 's/memory(none)/readnone/g' | sed -e 's/memory(argmem: readwrite, inaccessiblemem: readwrite)/inaccessiblemem_or_argmemonly/g' | sed -e 's/memory(read)/readonly/g' | sed -e 's/memory(argmem: readwrite)/argmemonly/g' | llvm-as-13 -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc +// python3 ./cvt.py > cvt.h && /opt/rocm/llvm/bin/clang -std=c++20 -Xclang -no-opaque-pointers -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -S -emit-llvm --cuda-device-only -nogpulib -O3 -Xclang -fallow-half-arguments-and-returns -o - | sed -e 's/define/define linkonce_odr/g' | sed -e '/@llvm.used/d' | sed -e 's/\"target-cpu\"=\"[^\"]*\"//g' | sed -e 's/\"target-features\"=\"[^\"]*\"//g' | sed -e 's/\"denormal-fp-math-f32\"=\"[^\"]*\"//g' | sed -e 's/!llvm.module.flags = !{!0, !1, !2, !3, !4}/!llvm.module.flags = !{ }/g' | sed -e 's/memory(none)/readnone/g' | sed -e 's/memory(argmem: readwrite, inaccessiblemem: readwrite)/inaccessiblemem_or_argmemonly/g' | sed -e 's/memory(read)/readonly/g' | sed -e 's/memory(argmem: readwrite)/argmemonly/g' | llvm-as-13 -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc // Compile to binary: // /opt/rocm/llvm/bin/clang -x ir -target amdgcn-amd-amdhsa -Xlinker --no-undefined zluda_ptx_impl.bc -mno-wavefrontsize64 -mcpu=gfx1030 // Decompile: @@ -1260,6 +1260,7 @@ extern "C" default: return 0; } + return 2; case 'l': switch (s[1]) { @@ -1289,17 +1290,18 @@ extern "C" case 'X': case 'n': len = 8; - return 2; + break; default: return 0; } + return 3; default: return 0; } + return 2; default: return 0; } - return 1; } __device__ static bool parse_printf_specifier(const char *s, uint8_t &len) @@ -1393,8 +1395,36 @@ extern "C" char c = *(s++); if (c == 0) break; - if (c == '%') + if (c != '%') + continue; + + // %% requires no additional handling + if (*s == '%') { + s++; + continue; + } + + // %s uses __ockl_printf_append_string_n + // https://github.com/ROCm/ROCm-Device-Libs/blob/rocm-5.7.x/ockl/src/services.cl#L343 + if (*s == 's') + { + s++; + const char *value = (const char *)read_valist(valist_ptr, valist_offset, 8); + handle = __ockl_printf_append_string_n(handle, value, strlen_plus_one(value), 0); + continue; + } + + // Keep scanning until we figure out the length of this specifier or if we reach the end of the string + while (*s != 0) { + // "The width is not specified in the format string, but as an additional integer value argument preceding the argument that has to be formatted." + if (*s == '*') { + s++; + uint64_t value = read_valist(valist_ptr, valist_offset, 4); + handle = __ockl_printf_append_args(handle, 1, value, 0, 0, 0, 0, 0, 0, 0); + continue; + } + uint8_t len = 0; if (parse_printf_specifier(s, len)) { @@ -1406,16 +1436,22 @@ extern "C" if (specifier_with_length) { s += specifier_with_length; - } - if (len > 0) - { - uint64_t value = read_valist(valist_ptr, valist_offset, len); - handle = __ockl_printf_append_args(handle, 1, value, 0, 0, 0, 0, 0, 0, 0); + } else { + // Assume the unknown character is a sub-specifier and move on + s++; + continue; } } + + if (len > 0) + { + uint64_t value = read_valist(valist_ptr, valist_offset, len); + handle = __ockl_printf_append_args(handle, 1, value, 0, 0, 0, 0, 0, 0, 0); + } + break; } } - return (uint32_t)__ockl_printf_append_args(handle, 0, 0, 0, 0, 0, 0, 0, 0, 1); + __ockl_printf_append_args(handle, 0, 0, 0, 0, 0, 0, 0, 0, 1); return 1; }