diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/Makefile b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/Makefile index f2ac0cb5f415d2e40e5dbd99f6baca7d1833f67f..36920387c4cfcce5f769d1e900652af63d20228d 100644 --- a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/Makefile +++ b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/Makefile @@ -5,3 +5,4 @@ SRCDIR_OBJS=main.o lbm.o ocl.o APP_CUDALDFLAGS=-lm APP_CFLAGS=-ffast-math -g3 -O3 APP_CXXFLAGS=-ffast-math -g3 -O3 +KERNEL_OBJS=kernel_offline.nvptx.s diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/kernel_offline.nvptx.s b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/kernel_offline.nvptx.s deleted file mode 100644 index 7e130799aa9f47a3866d08596d832f5954c658ad..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/kernel_offline.nvptx.s +++ /dev/null @@ -1,261 +0,0 @@ -// -// Generated by LLVM NVPTX Back-End -// - -.version 3.1 -.target sm_20, texmode_independent -.address_size 32 - - // .globl performStreamCollide_kernel - -.entry performStreamCollide_kernel( - .param .u32 .ptr .global .align 4 performStreamCollide_kernel_param_0, - .param .u32 .ptr .global .align 4 performStreamCollide_kernel_param_1 -) -{ - .reg .pred %p<3>; - .reg .f32 %f<197>; - .reg .s32 %r<38>; - - ld.param.u32 %r5, [performStreamCollide_kernel_param_0]; - mov.u32 %r6, %tid.x; - ld.param.u32 %r4, [performStreamCollide_kernel_param_1]; - mov.u32 %r7, %ctaid.x; - mov.u32 %r8, %ctaid.y; - shl.b32 %r9, %r7, 7; - add.s32 %r10, %r9, %r6; - mad.lo.s32 %r1, %r8, 15360, %r10; - add.s32 %r2, %r1, 30720; - shl.b32 %r11, %r2, 2; - add.s32 %r12, %r5, %r11; - ld.global.f32 %f1, [%r12]; - shl.b32 %r13, %r1, 2; - add.s32 %r14, %r13, %r5; - ld.global.f32 %f2, [%r14+9584128]; - ld.global.f32 %f3, [%r14+19046912]; - add.s32 %r15, %r10, -1; - mad.lo.s32 %r16, %r8, 15360, %r15; - shl.b32 %r17, %r16, 2; - add.s32 %r18, %r17, %r5; - ld.global.f32 %f4, [%r18+28508160]; - add.s32 %r19, %r10, 1; - mad.lo.s32 %r20, %r8, 15360, %r19; - shl.b32 %r21, %r20, 2; - add.s32 %r22, %r21, %r5; - ld.global.f32 %f5, [%r22+37969920]; - ld.global.f32 %f6, [%r14+47370240]; - mad.lo.s32 %r23, %r8, 15360, 15360; - add.s32 %r24, %r10, %r23; - shl.b32 %r25, %r24, 2; - add.s32 %r26, %r25, %r5; - ld.global.f32 %f7, [%r26+56893440]; - ld.global.f32 %f8, [%r14+66354684]; - ld.global.f32 %f9, [%r14+75816452]; - ld.global.f32 %f10, [%r14+85279228]; - ld.global.f32 %f11, [%r14+94740996]; - ld.global.f32 %f12, [%r14+104140288]; - ld.global.f32 %f13, [%r14+113724928]; - ld.global.f32 %f14, [%r14+123064832]; - ld.global.f32 %f15, [%r14+132649472]; - ld.global.f32 %f16, [%r14+141987836]; - add.s32 %r27, %r15, %r23; - shl.b32 %r28, %r27, 2; - add.s32 %r29, %r28, %r5; - ld.global.f32 %f17, [%r29+151511040]; - ld.global.f32 %f18, [%r14+160911364]; - add.s32 %r30, %r19, %r23; - shl.b32 %r31, %r30, 2; - add.s32 %r32, %r31, %r5; - ld.global.f32 %f19, [%r32+170434560]; - ld.global.u32 %r3, [%r14+179896320]; - { - .reg .b32 temp; - and.b32 temp, %r3, 1; - setp.b32.eq %p1, temp, 1; - } - @%p1 bra BB0_2; - add.f32 %f58, %f1, %f2; - add.f32 %f59, %f58, %f3; - add.f32 %f60, %f59, %f4; - add.f32 %f61, %f60, %f5; - add.f32 %f62, %f61, %f6; - add.f32 %f63, %f62, %f7; - add.f32 %f64, %f63, %f8; - add.f32 %f65, %f64, %f9; - add.f32 %f66, %f65, %f10; - add.f32 %f67, %f66, %f11; - add.f32 %f68, %f67, %f12; - add.f32 %f69, %f68, %f13; - add.f32 %f70, %f69, %f14; - add.f32 %f71, %f70, %f15; - add.f32 %f72, %f71, %f16; - add.f32 %f73, %f72, %f17; - add.f32 %f74, %f73, %f18; - add.f32 %f75, %f74, %f19; - sub.f32 %f76, %f4, %f5; - add.f32 %f77, %f76, %f8; - sub.f32 %f78, %f77, %f9; - add.f32 %f79, %f78, %f10; - sub.f32 %f80, %f79, %f11; - add.f32 %f81, %f80, %f16; - add.f32 %f82, %f81, %f17; - sub.f32 %f83, %f82, %f18; - sub.f32 %f84, %f83, %f19; - sub.f32 %f85, %f2, %f3; - add.f32 %f86, %f85, %f8; - add.f32 %f87, %f86, %f9; - sub.f32 %f88, %f87, %f10; - sub.f32 %f89, %f88, %f11; - add.f32 %f90, %f89, %f12; - add.f32 %f91, %f90, %f13; - sub.f32 %f92, %f91, %f14; - sub.f32 %f93, %f92, %f15; - sub.f32 %f94, %f6, %f7; - add.f32 %f95, %f94, %f12; - sub.f32 %f96, %f95, %f13; - add.f32 %f97, %f96, %f14; - sub.f32 %f98, %f97, %f15; - add.f32 %f99, %f98, %f16; - sub.f32 %f100, %f99, %f17; - add.f32 %f101, %f100, %f18; - sub.f32 %f102, %f101, %f19; - div.rn.f32 %f103, %f84, %f75; - div.rn.f32 %f104, %f93, %f75; - div.rn.f32 %f105, %f102, %f75; - and.b32 %r33, %r3, 2; - setp.eq.s32 %p2, %r33, 0; - selp.f32 %f106, %f103, 0f3BA3D70A, %p2; - selp.f32 %f107, %f104, 0f3B03126F, %p2; - selp.f32 %f108, %f105, 0f00000000, %p2; - mul.f32 %f109, %f107, %f107; - fma.rn.f32 %f110, %f106, %f106, %f109; - fma.rn.f32 %f111, %f108, %f108, %f110; - fma.rn.f32 %f112, %f111, 0f3FC00000, 0fBF800000; - mul.f32 %f113, %f75, 0f3FF9999A; - mul.f32 %f114, %f113, 0fBEAAAAAB; - mul.f32 %f115, %f114, %f112; - fma.rn.f32 %f20, %f1, 0fBF733334, %f115; - mul.f32 %f116, %f113, 0f3D638E39; - fma.rn.f32 %f117, %f107, 0f40900000, 0f40400000; - neg.f32 %f118, %f112; - fma.rn.f32 %f119, %f107, %f117, %f118; - mul.f32 %f120, %f116, %f119; - fma.rn.f32 %f21, %f2, 0fBF733334, %f120; - fma.rn.f32 %f121, %f107, 0f40900000, 0fC0400000; - fma.rn.f32 %f122, %f107, %f121, %f118; - mul.f32 %f123, %f116, %f122; - fma.rn.f32 %f2, %f3, 0fBF733334, %f123; - fma.rn.f32 %f124, %f108, 0f40900000, 0f40400000; - fma.rn.f32 %f125, %f108, %f124, %f118; - mul.f32 %f126, %f116, %f125; - fma.rn.f32 %f23, %f6, 0fBF733334, %f126; - fma.rn.f32 %f127, %f108, 0f40900000, 0fC0400000; - fma.rn.f32 %f128, %f108, %f127, %f118; - mul.f32 %f129, %f116, %f128; - fma.rn.f32 %f6, %f7, 0fBF733334, %f129; - fma.rn.f32 %f130, %f106, 0f40900000, 0f40400000; - fma.rn.f32 %f131, %f106, %f130, %f118; - mul.f32 %f132, %f116, %f131; - fma.rn.f32 %f25, %f4, 0fBF733334, %f132; - fma.rn.f32 %f133, %f106, 0f40900000, 0fC0400000; - fma.rn.f32 %f134, %f106, %f133, %f118; - mul.f32 %f135, %f116, %f134; - fma.rn.f32 %f4, %f5, 0fBF733334, %f135; - mul.f32 %f136, %f113, 0f3CE38E39; - add.f32 %f137, %f107, %f108; - fma.rn.f32 %f138, %f137, 0f40900000, 0f40400000; - fma.rn.f32 %f139, %f137, %f138, %f118; - mul.f32 %f140, %f136, %f139; - fma.rn.f32 %f27, %f12, 0fBF733334, %f140; - sub.f32 %f141, %f107, %f108; - fma.rn.f32 %f142, %f141, 0f40900000, 0f40400000; - fma.rn.f32 %f143, %f141, %f142, %f118; - mul.f32 %f144, %f136, %f143; - fma.rn.f32 %f28, %f13, 0fBF733334, %f144; - neg.f32 %f145, %f107; - sub.f32 %f146, %f108, %f107; - fma.rn.f32 %f147, %f146, 0f40900000, 0f40400000; - fma.rn.f32 %f148, %f146, %f147, %f118; - mul.f32 %f149, %f136, %f148; - fma.rn.f32 %f13, %f14, 0fBF733334, %f149; - sub.f32 %f150, %f145, %f108; - fma.rn.f32 %f151, %f150, 0f40900000, 0f40400000; - fma.rn.f32 %f152, %f150, %f151, %f118; - mul.f32 %f153, %f136, %f152; - fma.rn.f32 %f12, %f15, 0fBF733334, %f153; - add.f32 %f154, %f106, %f107; - fma.rn.f32 %f155, %f154, 0f40900000, 0f40400000; - fma.rn.f32 %f156, %f154, %f155, %f118; - mul.f32 %f157, %f136, %f156; - fma.rn.f32 %f31, %f8, 0fBF733334, %f157; - sub.f32 %f158, %f106, %f107; - fma.rn.f32 %f159, %f158, 0f40900000, 0f40400000; - fma.rn.f32 %f160, %f158, %f159, %f118; - mul.f32 %f161, %f136, %f160; - fma.rn.f32 %f32, %f10, 0fBF733334, %f161; - add.f32 %f162, %f106, %f108; - fma.rn.f32 %f163, %f162, 0f40900000, 0f40400000; - fma.rn.f32 %f164, %f162, %f163, %f118; - mul.f32 %f165, %f136, %f164; - fma.rn.f32 %f33, %f16, 0fBF733334, %f165; - sub.f32 %f166, %f106, %f108; - fma.rn.f32 %f167, %f166, 0f40900000, 0f40400000; - fma.rn.f32 %f168, %f166, %f167, %f118; - mul.f32 %f169, %f136, %f168; - fma.rn.f32 %f34, %f17, 0fBF733334, %f169; - neg.f32 %f170, %f106; - sub.f32 %f171, %f107, %f106; - fma.rn.f32 %f172, %f171, 0f40900000, 0f40400000; - fma.rn.f32 %f173, %f171, %f172, %f118; - mul.f32 %f174, %f136, %f173; - fma.rn.f32 %f10, %f9, 0fBF733334, %f174; - sub.f32 %f175, %f170, %f107; - fma.rn.f32 %f176, %f175, 0f40900000, 0f40400000; - fma.rn.f32 %f177, %f175, %f176, %f118; - mul.f32 %f178, %f136, %f177; - fma.rn.f32 %f8, %f11, 0fBF733334, %f178; - sub.f32 %f179, %f108, %f106; - fma.rn.f32 %f180, %f179, 0f40900000, 0f40400000; - fma.rn.f32 %f181, %f179, %f180, %f118; - mul.f32 %f182, %f136, %f181; - fma.rn.f32 %f17, %f18, 0fBF733334, %f182; - sub.f32 %f183, %f170, %f108; - fma.rn.f32 %f184, %f183, 0f40900000, 0f40400000; - fma.rn.f32 %f185, %f183, %f184, %f118; - mul.f32 %f186, %f136, %f185; - fma.rn.f32 %f16, %f19, 0fBF733334, %f186; - mov.f32 %f5, %f25; - mov.f32 %f7, %f23; - mov.f32 %f11, %f31; - mov.f32 %f9, %f32; - mov.f32 %f15, %f27; - mov.f32 %f14, %f28; - mov.f32 %f19, %f33; - mov.f32 %f18, %f34; - mov.f32 %f3, %f21; - mov.f32 %f1, %f20; -BB0_2: - add.s32 %r35, %r4, %r11; - st.global.f32 [%r35], %f1; - add.s32 %r37, %r13, %r4; - st.global.f32 [%r37+9584640], %f3; - st.global.f32 [%r37+19046400], %f2; - st.global.f32 [%r37+28508160], %f5; - st.global.f32 [%r37+37969920], %f4; - st.global.f32 [%r37+47431680], %f7; - st.global.f32 [%r37+56893440], %f6; - st.global.f32 [%r37+66355200], %f11; - st.global.f32 [%r37+75816960], %f10; - st.global.f32 [%r37+85278720], %f9; - st.global.f32 [%r37+94740480], %f8; - st.global.f32 [%r37+104202240], %f15; - st.global.f32 [%r37+113664000], %f14; - st.global.f32 [%r37+123125760], %f13; - st.global.f32 [%r37+132587520], %f12; - st.global.f32 [%r37+142049280], %f19; - st.global.f32 [%r37+151511040], %f18; - st.global.f32 [%r37+160972800], %f17; - st.global.f32 [%r37+170434560], %f16; - ret; -} - diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c index 128acb0298e093c7315366ff231a95e47de95154..074e5ca195383a19b7900df0779de80697b7d217 100644 --- a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c +++ b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c @@ -230,5 +230,5 @@ void OpenCL_initialize(OpenCL_Param* prm) //free((void*)clSource[0]); - pb_CreateAndBuildKernelFromBinary("src/opencl_nvidia/kernel_offline.nvptx.s", "performStreamCollide_kernel", &prm->clContext, &prm->clDevice, &prm->clProgram, &prm->clKernel); + pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia/kernel_offline.nvptx.s", "performStreamCollide_kernel", &prm->clContext, &prm->clDevice, &prm->clProgram, &prm->clKernel); } diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/Makefile b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/Makefile index d9c2b594b136f56c0903b8bddd2dab2482d89e55..36b421ec6f1359114ea0035d21048ab0b95bf30e 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/Makefile +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/Makefile @@ -5,3 +5,4 @@ SRCDIR_OBJS=main.o io.o #compute_gold.o APP_CUDALDFLAGS=-lm -lstdc++ APP_CFLAGS=-ffast-math -O3 APP_CXXFLAGS=-ffast-math -O3 +KERNEL_OBJS=kernel_offline.nvptx.s diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.nvptx.s b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.nvptx.s deleted file mode 100644 index 13cfd2d24547432388d9d3de6ef92eca164fc822..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.nvptx.s +++ /dev/null @@ -1,79 +0,0 @@ -// -// Generated by LLVM NVPTX Back-End -// - -.version 3.1 -.target sm_20, texmode_independent -.address_size 32 - - // .globl mysgemmNT - -.entry mysgemmNT( - .param .u32 .ptr .global .align 4 mysgemmNT_param_0, - .param .u32 mysgemmNT_param_1, - .param .u32 .ptr .global .align 4 mysgemmNT_param_2, - .param .u32 mysgemmNT_param_3, - .param .u32 .ptr .global .align 4 mysgemmNT_param_4, - .param .u32 mysgemmNT_param_5, - .param .u32 mysgemmNT_param_6, - .param .f32 mysgemmNT_param_7, - .param .f32 mysgemmNT_param_8 -) -{ - .reg .pred %p<3>; - .reg .f32 %f<14>; - .reg .s32 %r<36>; - - mov.u32 %r1, %ctaid.x; - mov.u32 %r2, %ntid.x; - mov.u32 %r3, %tid.x; - mad.lo.s32 %r4, %r2, %r1, %r3; - ld.param.u32 %r23, [mysgemmNT_param_4]; - mov.u32 %r5, %ctaid.y; - ld.param.u32 %r24, [mysgemmNT_param_5]; - mov.u32 %r6, %ntid.y; - ld.param.u32 %r25, [mysgemmNT_param_6]; - ld.param.f32 %f4, [mysgemmNT_param_7]; - mov.u32 %r7, %tid.y; - ld.param.f32 %f5, [mysgemmNT_param_8]; - mad.lo.s32 %r8, %r6, %r5, %r7; - setp.lt.s32 %p1, %r25, 1; - @%p1 bra BB0_1; - ld.param.u32 %r19, [mysgemmNT_param_0]; - ld.param.u32 %r20, [mysgemmNT_param_1]; - ld.param.u32 %r21, [mysgemmNT_param_2]; - ld.param.u32 %r22, [mysgemmNT_param_3]; - shl.b32 %r27, %r8, 2; - add.s32 %r9, %r21, %r27; - shl.b32 %r10, %r22, 2; - shl.b32 %r29, %r4, 2; - add.s32 %r11, %r19, %r29; - shl.b32 %r12, %r20, 2; - mov.f32 %f7, 0f00000000; -BB0_3: - ld.global.f32 %f8, [%r11]; - ld.global.f32 %f9, [%r9]; - fma.rn.f32 %f2, %f8, %f9, %f7; - add.s32 %r16, %r25, -1; - add.s32 %r17, %r9, %r10; - add.s32 %r18, %r11, %r12; - setp.ne.s32 %p2, %r16, 0; - mov.u32 %r11, %r18; - mov.u32 %r9, %r17; - mov.u32 %r25, %r16; - mov.f32 %f7, %f2; - @%p2 bra BB0_3; - bra.uni BB0_4; -BB0_1: - mov.f32 %f7, 0f00000000; -BB0_4: - mad.lo.s32 %r30, %r8, %r24, %r4; - shl.b32 %r31, %r30, 2; - add.s32 %r32, %r23, %r31; - ld.global.f32 %f10, [%r32]; - mul.f32 %f11, %f7, %f4; - fma.rn.f32 %f12, %f10, %f5, %f11; - st.global.f32 [%r32], %f12; - ret; -} - diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc index 8c915dd7f8e4c561fd83169fa72d9ab507a278da..025a430321e5a470bc94d6d123cc806efbde0fa1 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc @@ -125,7 +125,7 @@ int main (int argc, char *argv[]) { // cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); cl_kernel clKernel; cl_program clProgram; - pb_CreateAndBuildKernelFromBinary("src/opencl_base/kernel_offline.nvptx.s", "mysgemmNT", &clContext, &clDevice, &clProgram, &clKernel); + pb_CreateAndBuildKernelFromBinary("build/opencl_base/kernel_offline.nvptx.s", "mysgemmNT", &clContext, &clDevice, &clProgram, &clKernel); //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); //CHECK_ERROR("clCreateProgramWithSource") diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/Makefile b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/Makefile index 164a219b2521cb9c2e288354094eff7d937aeea0..1de7315205e2616d393b0a8d9c9b7502994f2229 100644 --- a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/Makefile +++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/Makefile @@ -6,5 +6,6 @@ SRCDIR_OBJS=main.o gpu_info.o file.o ocl.o APP_CUDALDFLAGS=-lm APP_CFLAGS=-ffast-math -g3 -O3 -I$(TOOLS_SRC) APP_CXXFLAGS=-ffast-math -g3 -O3 +KERNEL_OBJS=kernel_offline.nvptx.s include $(TOOLS_SRC)/commontools.mk diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/kernel_offline.nvptx.s b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/kernel_offline.nvptx.s deleted file mode 100644 index e65b07e90982d517648dd4103e5b40a69a843ae8..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/kernel_offline.nvptx.s +++ /dev/null @@ -1,123 +0,0 @@ -// -// Generated by LLVM NVPTX Back-End -// - -.version 3.1 -.target sm_20, texmode_independent -.address_size 32 - - // .globl spmv_jds - -.entry spmv_jds( - .param .u32 .ptr .global .align 4 spmv_jds_param_0, - .param .u32 .ptr .global .align 4 spmv_jds_param_1, - .param .u32 .ptr .global .align 4 spmv_jds_param_2, - .param .u32 .ptr .global .align 4 spmv_jds_param_3, - .param .u32 .ptr .global .align 4 spmv_jds_param_4, - .param .u32 spmv_jds_param_5, - .param .u32 .ptr .global .align 4 spmv_jds_param_6, - .param .u32 .ptr .global .align 4 spmv_jds_param_7 -) -{ - .reg .pred %p<5>; - .reg .f32 %f<39>; - .reg .s32 %r<56>; - - mov.u32 %r20, %ctaid.x; - mov.u32 %r21, %ntid.x; - mov.u32 %r22, %tid.x; - mad.lo.s32 %r1, %r21, %r20, %r22; - ld.param.u32 %r23, [spmv_jds_param_5]; - setp.ge.s32 %p1, %r1, %r23; - @%p1 bra BB0_9; - ld.param.u32 %r13, [spmv_jds_param_0]; - ld.param.u32 %r14, [spmv_jds_param_1]; - ld.param.u32 %r15, [spmv_jds_param_2]; - ld.param.u32 %r16, [spmv_jds_param_3]; - ld.param.u32 %r17, [spmv_jds_param_4]; - ld.param.u32 %r18, [spmv_jds_param_6]; - ld.param.u32 %r19, [spmv_jds_param_7]; - shr.s32 %r24, %r1, 5; - shl.b32 %r25, %r24, 2; - add.s32 %r26, %r19, %r25; - ld.global.u32 %r2, [%r26]; - ld.global.u32 %r27, [%r18]; - add.s32 %r28, %r27, %r1; - shl.b32 %r29, %r28, 2; - add.s32 %r30, %r14, %r29; - ld.global.f32 %f1, [%r30]; - add.s32 %r31, %r15, %r29; - ld.global.u32 %r32, [%r31]; - shl.b32 %r33, %r32, 2; - add.s32 %r34, %r17, %r33; - ld.global.f32 %f2, [%r34]; - setp.lt.s32 %p2, %r2, 2; - @%p2 bra BB0_2; - ld.global.u32 %r35, [%r18+4]; - add.s32 %r36, %r35, %r1; - shl.b32 %r37, %r36, 2; - add.s32 %r38, %r15, %r37; - ld.global.u32 %r3, [%r38]; - add.s32 %r39, %r14, %r37; - ld.global.f32 %f3, [%r39]; - setp.lt.s32 %p3, %r2, 3; - @%p3 bra BB0_4; - add.s32 %r4, %r2, -2; - add.s32 %r5, %r18, 8; - mov.f32 %f22, 0f00000000; - mov.f32 %f24, %f3; - mov.f32 %f29, %f1; -BB0_6: - mov.f32 %f7, %f29; - mov.f32 %f35, %f24; - shl.b32 %r40, %r3, 2; - add.s32 %r41, %r17, %r40; - ld.global.f32 %f8, [%r41]; - fma.rn.f32 %f9, %f2, %f7, %f22; - ld.global.u32 %r42, [%r5]; - add.s32 %r43, %r42, %r1; - shl.b32 %r44, %r43, 2; - add.s32 %r45, %r15, %r44; - ld.global.u32 %r3, [%r45]; - add.s32 %r46, %r14, %r44; - ld.global.f32 %f10, [%r46]; - add.s32 %r10, %r4, -1; - add.s32 %r11, %r5, 4; - setp.ne.s32 %p4, %r10, 0; - mov.u32 %r5, %r11; - mov.u32 %r4, %r10; - mov.f32 %f24, %f10; - mov.f32 %f22, %f9; - mov.f32 %f2, %f8; - mov.f32 %f29, %f35; - mov.f32 %f32, %f10; - @%p4 bra BB0_6; - bra.uni BB0_7; -BB0_2: - mov.f32 %f20, 0f00000000; - mov.f32 %f37, %f1; - bra.uni BB0_8; -BB0_4: - mov.f32 %f22, 0f00000000; - mov.f32 %f32, %f3; - mov.f32 %f35, %f1; -BB0_7: - mov.f32 %f14, %f35; - mov.f32 %f37, %f32; - shl.b32 %r47, %r3, 2; - add.s32 %r48, %r17, %r47; - ld.global.f32 %f15, [%r48]; - fma.rn.f32 %f20, %f2, %f14, %f22; - mov.f32 %f2, %f15; -BB0_8: - fma.rn.f32 %f23, %f37, %f2, %f20; - shl.b32 %r49, %r1, 2; - add.s32 %r50, %r16, %r49; - ld.global.u32 %r51, [%r50]; - shl.b32 %r52, %r51, 2; - add.s32 %r53, %r13, %r52; - st.global.f32 [%r53], %f23; -BB0_9: - ret; -} - diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c index 155dcb157579fb08f7a9ac8b552df5a39107ba28..62bee2cf96a29364fcea2243c263477599cd7e67 100644 --- a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c +++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c @@ -78,7 +78,7 @@ int main(int argc, char** argv) { //CHECK_ERROR("clCreateKernel") cl_kernel clKernel; cl_program clProgram; - pb_CreateAndBuildKernelFromBinary("src/opencl_nvidia/kernel_offline.nvptx.s", "spmv_jds", &clContext, &clDevice, &clProgram, &clKernel); + pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia/kernel_offline.nvptx.s", "spmv_jds", &clContext, &clDevice, &clProgram, &clKernel); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); //parameters declaration diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/Makefile b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/Makefile index 141a47f74153bf464f6e50d2b8567ab6ad9549da..d5f6d9708d0589222d80f654586af06ef491a12c 100644 --- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/Makefile +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/Makefile @@ -5,3 +5,4 @@ SRCDIR_OBJS=main.o file.o APP_CUDALDFLAGS=-lm APP_CFLAGS=-ffast-math -g3 -O3 APP_CXXFLAGS=-ffast-math -g3 -O3 +KERNEL_OBJS=kernel_offline.nvptx.s diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s deleted file mode 100644 index b925d6459752a5b890ed71e90e466ceafb876494..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s +++ /dev/null @@ -1,93 +0,0 @@ -// -// Generated by LLVM NVPTX Back-End -// - -.version 3.1 -.target sm_20, texmode_independent -.address_size 32 - - // .globl naive_kernel - -.entry naive_kernel( - .param .f32 naive_kernel_param_0, - .param .f32 naive_kernel_param_1, - .param .u32 .ptr .global .align 4 naive_kernel_param_2, - .param .u32 .ptr .global .align 4 naive_kernel_param_3, - .param .u32 naive_kernel_param_4, - .param .u32 naive_kernel_param_5, - .param .u32 naive_kernel_param_6 -) -{ - .reg .pred %p<2>; - .reg .f32 %f<18>; - .reg .s32 %r<46>; - - mov.u32 %r10, %ctaid.x; - mov.u32 %r11, %ntid.x; - mov.u32 %r12, %tid.x; - mad.lo.s32 %r1, %r11, %r10, %r12; - ld.param.u32 %r8, [naive_kernel_param_4]; - add.s32 %r2, %r1, 1; - add.s32 %r19, %r8, -1; - setp.ge.s32 %p1, %r2, %r19; - @%p1 bra BB0_2; - ld.param.f32 %f1, [naive_kernel_param_0]; - ld.param.f32 %f2, [naive_kernel_param_1]; - ld.param.u32 %r6, [naive_kernel_param_2]; - ld.param.u32 %r7, [naive_kernel_param_3]; - ld.param.u32 %r9, [naive_kernel_param_5]; - mov.u32 %r13, %ctaid.y; - mov.u32 %r14, %ntid.y; - mov.u32 %r15, %tid.y; - mad.lo.s32 %r3, %r14, %r13, %r15; - add.s32 %r4, %r3, 1; - mov.u32 %r16, %ctaid.z; - mov.u32 %r17, %ntid.z; - mov.u32 %r18, %tid.z; - mad.lo.s32 %r5, %r17, %r16, %r18; - add.s32 %r20, %r5, 1; - add.s32 %r21, %r5, 2; - mad.lo.s32 %r22, %r21, %r9, %r4; - mad.lo.s32 %r23, %r22, %r8, %r2; - shl.b32 %r24, %r23, 2; - add.s32 %r25, %r6, %r24; - ld.global.f32 %f3, [%r25]; - mad.lo.s32 %r26, %r5, %r9, %r4; - mad.lo.s32 %r27, %r26, %r8, %r2; - shl.b32 %r28, %r27, 2; - add.s32 %r29, %r6, %r28; - ld.global.f32 %f4, [%r29]; - add.f32 %f5, %f3, %f4; - mad.lo.s32 %r30, %r20, %r9, %r3; - add.s32 %r31, %r30, 2; - mad.lo.s32 %r32, %r31, %r8, %r2; - shl.b32 %r33, %r32, 2; - add.s32 %r34, %r6, %r33; - ld.global.f32 %f6, [%r34]; - add.f32 %f7, %f5, %f6; - mad.lo.s32 %r35, %r30, %r8, %r2; - shl.b32 %r36, %r35, 2; - add.s32 %r37, %r6, %r36; - ld.global.f32 %f8, [%r37]; - add.f32 %f9, %f7, %f8; - mad.lo.s32 %r38, %r20, %r9, %r4; - mad.lo.s32 %r39, %r38, %r8, %r1; - shl.b32 %r40, %r39, 2; - add.s32 %r41, %r6, %r40; - ld.global.f32 %f10, [%r41+8]; - add.f32 %f11, %f9, %f10; - ld.global.f32 %f12, [%r41]; - add.f32 %f13, %f11, %f12; - mad.lo.s32 %r42, %r38, %r8, %r2; - shl.b32 %r43, %r42, 2; - add.s32 %r44, %r6, %r43; - ld.global.f32 %f14, [%r44]; - mul.f32 %f15, %f2, %f13; - neg.f32 %f16, %f14; - fma.rn.f32 %f17, %f16, %f1, %f15; - add.s32 %r45, %r7, %r43; - st.global.f32 [%r45], %f17; -BB0_2: - ret; -} - diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c index 427fab549b4b8d34ac24fb44ce63d9418bd6d1ee..92584fa10fa62bd475ae4f16996d25397a72fc56 100644 --- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c @@ -106,7 +106,7 @@ int main(int argc, char** argv) { cl_program clProgram; cl_kernel clKernel; - pb_CreateAndBuildKernelFromBinary("src/opencl_base/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel); + pb_CreateAndBuildKernelFromBinary("build/opencl_base/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel); //const char* clSource[] = {readFile("src/opencl_base/kernel.cl")}; //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); //CHECK_ERROR("clCreateProgramWithSource")