diff --git a/doc/NOTES.md b/doc/NOTES.md index eac1007..be91c4c 100644 --- a/doc/NOTES.md +++ b/doc/NOTES.md @@ -1,3 +1,5 @@ +Parser generators in Rust: +-------------------------- I'm convinced nobody actually uses parser generators in Rust: * pomelo can't generate lexer (understandable, as it is a port of lemon and lemon can't do this either) * pest can't do parse actions, you have to convert your parse tree to ast manually @@ -9,15 +11,17 @@ I'm convinced nobody actually uses parser generators in Rust: * no library supports island grammars What to emit? +------------- * SPIR-V * Better library support, easier to emit * Can by optimized by IGC * Can't do some things (not sure what exactly yet) - * But we can work around things with inline VISA + * But we can work around with inline VISA * VISA * Quicker compilation A64 vs BTS +---------- * How to force A64: -cl-intel-greater-than-4GB-buffer-required * PTX made a baffling desing choice: global pointers are represented as untyped 64bit integers * Consequently, there's no 100% certain way to know which argument is a surface and which is a scalar @@ -30,3 +34,18 @@ A64 vs BTS * Potential solution: compile only during the dispatch, when type of arguments is known? * Can't do, the set of arguments passed to cuLaunchKernel is untyped * Solution: treat all arguments as untyped integers and say goodbye to BTS access + +Implicit conversions +-------------------- +* PTX support for implicit conversions is completely degenerate, docs say: +_For convenience, ld, st, and cvt instructions permit source and destination data operands to be wider than the instruction-type size, so that narrow values may be loaded, stored, and converted using regular-width registers. For example, 8-bit or 16-bit values may be held directly in 32-bit or 64-bit registers when being loaded, stored, or converted to other types and sizes_ +Which is sensible, but completely untrue. In reality ptxas compiles silly code like this: + ``` + param.f32 param_1 + ... + .reg.s32 %r1 + ld.param.b16 %r1, [param_1]; + ``` +* Surprise, surprise, there's two kind of implicit conversions at play in the example above: + * "Relaxed type-checking rules": this is the conversion of b16 operation type to s32 dst register + * Undocumented type coercion when dereferencing param_1. The PTX behaviour is to coerce **every** type. It's something like `[param_1] = *(b16*)param_1` \ No newline at end of file diff --git a/ptx/tools/implicit_ld_dst.py b/ptx/tools/implicit_ld_dst.py new file mode 100644 index 0000000..ea95199 --- /dev/null +++ b/ptx/tools/implicit_ld_dst.py @@ -0,0 +1,31 @@ +import os +import subprocess +import tempfile + +types = ["b8", "b16", "b32", "b64", "u8", "u16", "u32", "u64", "s8", "s16", "s32", "s64", "f32", "f64"] + +for op_type in types: + for output_type in types: + with tempfile.TemporaryDirectory() as dir: + f_name = os.path.join(dir, 'ptx') + out_name = os.path.join(dir, 'out') + with open(f_name, 'w') as f: + f.write( + f""" + .version 6.5 + .target sm_30 + .address_size 64 + .visible .entry VecAdd_kernel( + .param .{op_type} input + ) + {{ + .reg.{output_type} r1; + ld.param.{op_type} r1, [input]; + ret; + }} + """) + err = subprocess.run(f"ptxas {f_name} -o {out_name}", capture_output = True) + if err.returncode == 0: + print(f"{op_type} {output_type}") + else: + print(f"[INVALID] {op_type} {output_type}") \ No newline at end of file diff --git a/ptx/tools/implicit_ld_src.py b/ptx/tools/implicit_ld_src.py new file mode 100644 index 0000000..cb6780e --- /dev/null +++ b/ptx/tools/implicit_ld_src.py @@ -0,0 +1,31 @@ +import os +import subprocess +import tempfile + +types = ["b8", "b16", "b32", "b64", "u8", "u16", "u32", "u64", "s8", "s16", "s32", "s64", "f32", "f64"] + +for input_type in types: + for op_type in types: + with tempfile.TemporaryDirectory() as dir: + f_name = os.path.join(dir, 'ptx') + out_name = os.path.join(dir, 'out') + with open(f_name, 'w') as f: + f.write( + f""" + .version 6.5 + .target sm_30 + .address_size 64 + .visible .entry VecAdd_kernel( + .param .{input_type} input + ) + {{ + .reg.{op_type} r1; + ld.param.{op_type} r1, [input]; + ret; + }} + """) + err = subprocess.run(f"ptxas {f_name} -o {out_name}") + if err.returncode == 0: + print(f"{op_type} {input_type}") + else: + print(f"[INVALID] {op_type} {input_type}") \ No newline at end of file