diff --git a/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll b/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll index 36e772486444c3428b3cafb24b8fb0bd2b2ea4a8..4621c021f3738f5d91fd01414f30716b5e8f6b03 100644 --- a/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll +++ b/llvm/test/VISC/gemm_opencl/matrixMul/visc_gemm_opencl.ll @@ -17,16 +17,6 @@ target triple = "x86_64-unknown-linux-gnu" %struct._cl_kernel = type opaque %struct._cl_event = type opaque -; Return Type of VISC Compute Matrix Mul -%rtype = type {float*, i32} -%struct.arg = type { float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype } - -; Function Attrs: nounwind -declare i8* @llvm.visc.launch(i8*, i8*) #0 - -; Function Attrs: nounwind -declare void @llvm.visc.wait(i8*) #0 - @.str = private unnamed_addr constant [3 x i8] c"rb\00", align 1 @.str1 = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1 @.str2 = private unnamed_addr constant [35 x i8] c"Failure to get number of platforms\00", align 1 @@ -80,64 +70,6 @@ for.end: ; preds = %for.body, %entry ; Function Attrs: nounwind declare i32 @rand() #1 -; Function Attrs: nounwind uwtable -define noalias i8* @LoadProgSource(i8* nocapture %cFilename, i64* %szFinalLength) #0 { -entry: - %call = tail call %struct._IO_FILE* @fopen(i8* %cFilename, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0)) #4 - %cmp = icmp eq %struct._IO_FILE* %call, null - br i1 %cmp, label %return, label %if.end - -if.end: ; preds = %entry - %call1 = tail call i32 @fseek(%struct._IO_FILE* %call, i64 0, i32 2) #4 - %call2 = tail call i64 @ftell(%struct._IO_FILE* %call) #4 - %call3 = tail call i32 @fseek(%struct._IO_FILE* %call, i64 0, i32 0) #4 - %add = add i64 %call2, 1 - %call4 = tail call noalias i8* @malloc(i64 %add) #4 - %call5 = tail call i64 @fread(i8* %call4, i64 %call2, i64 1, %struct._IO_FILE* %call) #4 - %cmp6 = icmp eq i64 %call5, 1 - %call8 = tail call i32 @fclose(%struct._IO_FILE* %call) #4 - br i1 %cmp6, label %if.end9, label %if.then7 - -if.then7: ; preds = %if.end - tail call void @free(i8* %call4) #4 - br label %return - -if.end9: ; preds = %if.end - %cmp11 = icmp eq i64* %szFinalLength, null - br i1 %cmp11, label %if.end13, label %if.then12 - -if.then12: ; preds = %if.end9 - store i64 %call2, i64* %szFinalLength, align 8, !tbaa !3 - br label %if.end13 - -if.end13: ; preds = %if.end9, %if.then12 - %arrayidx = getelementptr inbounds i8* %call4, i64 %call2 - store i8 0, i8* %arrayidx, align 1, !tbaa !1 - br label %return - -return: ; preds = %entry, %if.end13, %if.then7 - %retval.0 = phi i8* [ null, %if.then7 ], [ %call4, %if.end13 ], [ null, %entry ] - ret i8* %retval.0 -} - -; Function Attrs: nounwind -declare noalias %struct._IO_FILE* @fopen(i8* nocapture, i8* nocapture) #1 - -; Function Attrs: nounwind -declare i32 @fseek(%struct._IO_FILE* nocapture, i64, i32) #1 - -; Function Attrs: nounwind -declare i64 @ftell(%struct._IO_FILE* nocapture) #1 - -; Function Attrs: nounwind -declare noalias i8* @malloc(i64) #1 - -; Function Attrs: nounwind -declare i64 @fread(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1 - -; Function Attrs: nounwind -declare i32 @fclose(%struct._IO_FILE* nocapture) #1 - ; Function Attrs: nounwind declare void @free(i8* nocapture) #1 @@ -223,330 +155,76 @@ return: ; preds = %for.inc50, %if.then ; Function Attrs: nounwind declare i32 @printf(i8* nocapture, ...) #1 -; Function Attrs: nounwind uwtable -define void @computeMatrixMul(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_B, float* %h_C, i32 %bytes_C) #0 { -entry: - %dataBytes = alloca i64, align 8 - %errcode = alloca i32, align 4 - %d_A = alloca %struct._cl_mem*, align 8 - %d_B = alloca %struct._cl_mem*, align 8 - %d_C = alloca %struct._cl_mem*, align 8 - %numPlatforms = alloca i32, align 4 - %buffer = alloca [10240 x i8], align 16 - %properties = alloca [3 x i64], align 16 - %binaryLength = alloca i64, align 8 - %clMatrixMul = alloca i8*, align 8 - %binaryStatus = alloca i32, align 4 - %localWorkSize = alloca [2 x i64], align 16 - %globalWorkSize = alloca [2 x i64], align 16 - %wA = alloca i32, align 4 - %wC = alloca i32, align 4 - %call = call i32 @clGetPlatformIDs(i32 0, %struct._cl_platform_id** null, i32* %numPlatforms) #4 - store i32 %call, i32* %errcode, align 4, !tbaa !4 - %cmp.i = icmp eq i32 %call, 0 - br i1 %cmp.i, label %checkErr.exit, label %if.then.i - -if.then.i: ; preds = %entry - %0 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit: ; preds = %entry - %1 = load i32* %numPlatforms, align 4, !tbaa !4 - %2 = zext i32 %1 to i64 - %vla = alloca %struct._cl_platform_id*, i64 %2, align 16 - %call1 = call i32 @clGetPlatformIDs(i32 %1, %struct._cl_platform_id** %vla, i32* null) #4 - store i32 %call1, i32* %errcode, align 4, !tbaa !4 - %cmp.i105 = icmp eq i32 %call1, 0 - br i1 %cmp.i105, label %for.cond.preheader, label %if.then.i107 - -for.cond.preheader: ; preds = %checkErr.exit - %3 = load i32* %numPlatforms, align 4, !tbaa !4 - %cmp148 = icmp eq i32 %3, 0 - br i1 %cmp148, label %for.end, label %for.body.lr.ph - -for.body.lr.ph: ; preds = %for.cond.preheader - %4 = getelementptr inbounds [10240 x i8]* %buffer, i64 0, i64 0 - br label %for.body - -if.then.i107: ; preds = %checkErr.exit - %5 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i106 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([28 x i8]* @.str3, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -for.body: ; preds = %for.body.lr.ph, %for.body - %i.0149 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] - call void @llvm.lifetime.start(i64 10240, i8* %4) #4 - %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str4, i64 0, i64 0), i32 %i.0149) #4 - %idxprom = zext i32 %i.0149 to i64 - %arrayidx = getelementptr inbounds %struct._cl_platform_id** %vla, i64 %idxprom - %6 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 - %call3 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %6, i32 2304, i64 10240, i8* %4, i64* null) #4 - %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str5, i64 0, i64 0), i8* %4) #4 - %7 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 - %call9 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %7, i32 2305, i64 10240, i8* %4, i64* null) #4 - %call11 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str6, i64 0, i64 0), i8* %4) #4 - %8 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 - %call15 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %8, i32 2306, i64 10240, i8* %4, i64* null) #4 - %call17 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str7, i64 0, i64 0), i8* %4) #4 - %9 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 - %call21 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %9, i32 2307, i64 10240, i8* %4, i64* null) #4 - %call23 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str8, i64 0, i64 0), i8* %4) #4 - %10 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5 - %call27 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %10, i32 2308, i64 10240, i8* %4, i64* null) #4 - %call29 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([19 x i8]* @.str9, i64 0, i64 0), i8* %4) #4 - call void @llvm.lifetime.end(i64 10240, i8* %4) #4 - %inc = add i32 %i.0149, 1 - %11 = load i32* %numPlatforms, align 4, !tbaa !4 - %cmp = icmp ult i32 %inc, %11 - br i1 %cmp, label %for.body, label %for.end - -for.end: ; preds = %for.body, %for.cond.preheader - %arrayinit.begin = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 0 - store i64 4228, i64* %arrayinit.begin, align 16, !tbaa !3 - %arrayinit.element = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 1 - %12 = load %struct._cl_platform_id** %vla, align 16, !tbaa !5 - %13 = ptrtoint %struct._cl_platform_id* %12 to i64 - %sext = shl i64 %13, 32 - %conv = ashr exact i64 %sext, 32 - store i64 %conv, i64* %arrayinit.element, align 8, !tbaa !3 - %arrayinit.element31 = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 2 - store i64 0, i64* %arrayinit.element31, align 16, !tbaa !3 - %call33 = call %struct._cl_context* @clCreateContextFromType(i64* %arrayinit.begin, i64 4, void (i8*, i8*, i64, i8*)* null, i8* null, i32* %errcode) #4 - %14 = load i32* %errcode, align 4, !tbaa !4 - %cmp.i109 = icmp eq i32 %14, 0 - br i1 %cmp.i109, label %checkErr.exit112, label %if.then.i111 - -if.then.i111: ; preds = %for.end - %15 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i110 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %15, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([30 x i8]* @.str10, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit112: ; preds = %for.end - %call34 = call i32 @clGetContextInfo(%struct._cl_context* %call33, i32 4225, i64 0, i8* null, i64* %dataBytes) #4 - store i32 %call34, i32* %errcode, align 4, !tbaa !4 - %16 = load i64* %dataBytes, align 8, !tbaa !3 - %call35 = call noalias i8* @malloc(i64 %16) #4 - %17 = bitcast i8* %call35 to %struct._cl_device_id** - %call36 = call i32 @clGetContextInfo(%struct._cl_context* %call33, i32 4225, i64 %16, i8* %call35, i64* null) #4 - %18 = load i32* %errcode, align 4, !tbaa !4 - %or = or i32 %18, %call36 - store i32 %or, i32* %errcode, align 4, !tbaa !4 - %cmp.i113 = icmp eq i32 %or, 0 - br i1 %cmp.i113, label %checkErr.exit116, label %if.then.i115 - -if.then.i115: ; preds = %checkErr.exit112 - %19 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i114 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %19, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([28 x i8]* @.str11, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit116: ; preds = %checkErr.exit112 - %20 = load %struct._cl_device_id** %17, align 8, !tbaa !5 - %call38 = call %struct._cl_command_queue* @clCreateCommandQueue(%struct._cl_context* %call33, %struct._cl_device_id* %20, i64 0, i32* %errcode) #4 - %21 = load i32* %errcode, align 4, !tbaa !4 - %cmp.i117 = icmp eq i32 %21, 0 - br i1 %cmp.i117, label %checkErr.exit120, label %if.then.i119 - -if.then.i119: ; preds = %checkErr.exit116 - %22 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i118 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %22, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([32 x i8]* @.str12, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit120: ; preds = %checkErr.exit116 - %conv39 = zext i32 %bytes_C to i64 - %call40 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 1, i64 %conv39, i8* null, i32* %errcode) #4 - store %struct._cl_mem* %call40, %struct._cl_mem** %d_C, align 8, !tbaa !5 - %conv41 = zext i32 %bytes_A to i64 - %23 = bitcast float* %h_A to i8* - %call42 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 33, i64 %conv41, i8* %23, i32* %errcode) #4 - store %struct._cl_mem* %call42, %struct._cl_mem** %d_A, align 8, !tbaa !5 - %conv43 = zext i32 %bytes_B to i64 - %24 = bitcast float* %h_B to i8* - %call44 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 33, i64 %conv43, i8* %24, i32* %errcode) #4 - store %struct._cl_mem* %call44, %struct._cl_mem** %d_B, align 8, !tbaa !5 - %call45 = call i8* @LoadProgSource(i8* getelementptr inbounds ([18 x i8]* @.str13, i64 0, i64 0), i64* %binaryLength) - store i8* %call45, i8** %clMatrixMul, align 8, !tbaa !5 - %cmp46 = icmp eq i8* %call45, null - br i1 %cmp46, label %if.then.i122, label %checkErr.exit123 - -if.then.i122: ; preds = %checkErr.exit120 - %25 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i121 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %25, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8]* @.str14, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit123: ; preds = %checkErr.exit120 - %call49 = call %struct._cl_program* @clCreateProgramWithBinary(%struct._cl_context* %call33, i32 1, %struct._cl_device_id** %17, i64* %binaryLength, i8** %clMatrixMul, i32* %binaryStatus, i32* %errcode) #4 - %26 = load i32* %errcode, align 4, !tbaa !4 - %cmp.i124 = icmp eq i32 %26, 0 - br i1 %cmp.i124, label %checkErr.exit127, label %if.then.i126 - -if.then.i126: ; preds = %checkErr.exit123 - %27 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i125 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %27, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([38 x i8]* @.str15, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit127: ; preds = %checkErr.exit123 - %call50 = call i32 @clBuildProgram(%struct._cl_program* %call49, i32 0, %struct._cl_device_id** null, i8* null, void (%struct._cl_program*, i8*)* null, i8* null) #4 - store i32 %call50, i32* %errcode, align 4, !tbaa !4 - %cmp.i128 = icmp eq i32 %call50, 0 - br i1 %cmp.i128, label %checkErr.exit131, label %if.then.i130 - -if.then.i130: ; preds = %checkErr.exit127 - %28 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i129 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %28, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([25 x i8]* @.str16, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit131: ; preds = %checkErr.exit127 - %call51 = call %struct._cl_kernel* @clCreateKernel(%struct._cl_program* %call49, i8* getelementptr inbounds ([10 x i8]* @.str17, i64 0, i64 0), i32* %errcode) #4 - %29 = load i32* %errcode, align 4, !tbaa !4 - %cmp.i132 = icmp eq i32 %29, 0 - br i1 %cmp.i132, label %checkErr.exit135, label %if.then.i134 - -if.then.i134: ; preds = %checkErr.exit131 - %30 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i133 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %30, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([25 x i8]* @.str18, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit135: ; preds = %checkErr.exit131 - store i32 1024, i32* %wA, align 4, !tbaa !4 - store i32 1024, i32* %wC, align 4, !tbaa !4 - %31 = bitcast %struct._cl_mem** %d_C to i8* - %call52 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 0, i64 8, i8* %31) #4 - store i32 %call52, i32* %errcode, align 4, !tbaa !4 - %32 = bitcast %struct._cl_mem** %d_A to i8* - %call53 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 1, i64 8, i8* %32) #4 - %33 = load i32* %errcode, align 4, !tbaa !4 - %or54 = or i32 %33, %call53 - store i32 %or54, i32* %errcode, align 4, !tbaa !4 - %34 = bitcast %struct._cl_mem** %d_B to i8* - %call55 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 2, i64 8, i8* %34) #4 - %35 = load i32* %errcode, align 4, !tbaa !4 - %or56 = or i32 %35, %call55 - store i32 %or56, i32* %errcode, align 4, !tbaa !4 - %36 = bitcast i32* %wA to i8* - %call57 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 3, i64 4, i8* %36) #4 - %37 = load i32* %errcode, align 4, !tbaa !4 - %or58 = or i32 %37, %call57 - store i32 %or58, i32* %errcode, align 4, !tbaa !4 - %38 = bitcast i32* %wC to i8* - %call59 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 4, i64 4, i8* %38) #4 - %39 = load i32* %errcode, align 4, !tbaa !4 - %or60 = or i32 %39, %call59 - store i32 %or60, i32* %errcode, align 4, !tbaa !4 - %cmp.i136 = icmp eq i32 %or60, 0 - br i1 %cmp.i136, label %checkErr.exit139, label %if.then.i138 - -if.then.i138: ; preds = %checkErr.exit135 - %40 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i137 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %40, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([32 x i8]* @.str19, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit139: ; preds = %checkErr.exit135 - %arrayidx61 = getelementptr inbounds [2 x i64]* %localWorkSize, i64 0, i64 0 - store i64 16, i64* %arrayidx61, align 16, !tbaa !3 - %arrayidx62 = getelementptr inbounds [2 x i64]* %localWorkSize, i64 0, i64 1 - store i64 16, i64* %arrayidx62, align 8, !tbaa !3 - %arrayidx63 = getelementptr inbounds [2 x i64]* %globalWorkSize, i64 0, i64 0 - store i64 1024, i64* %arrayidx63, align 16, !tbaa !3 - %arrayidx64 = getelementptr inbounds [2 x i64]* %globalWorkSize, i64 0, i64 1 - store i64 1024, i64* %arrayidx64, align 8, !tbaa !3 - %call67 = call i32 @clEnqueueNDRangeKernel(%struct._cl_command_queue* %call38, %struct._cl_kernel* %call51, i32 2, i64* null, i64* %arrayidx63, i64* %arrayidx61, i32 0, %struct._cl_event** null, %struct._cl_event** null) #4 - store i32 %call67, i32* %errcode, align 4, !tbaa !4 - %cmp.i140 = icmp eq i32 %call67, 0 - br i1 %cmp.i140, label %checkErr.exit143, label %if.then.i142 - -if.then.i142: ; preds = %checkErr.exit139 - %41 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i141 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %41, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([26 x i8]* @.str20, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit143: ; preds = %checkErr.exit139 - %42 = load %struct._cl_mem** %d_C, align 8, !tbaa !5 - %43 = bitcast float* %h_C to i8* - %call69 = call i32 @clEnqueueReadBuffer(%struct._cl_command_queue* %call38, %struct._cl_mem* %42, i32 1, i64 0, i64 %conv39, i8* %43, i32 0, %struct._cl_event** null, %struct._cl_event** null) #4 - store i32 %call69, i32* %errcode, align 4, !tbaa !4 - %cmp.i144 = icmp eq i32 %call69, 0 - br i1 %cmp.i144, label %checkErr.exit147, label %if.then.i146 - -if.then.i146: ; preds = %checkErr.exit143 - %44 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5 - %call.i145 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %44, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([23 x i8]* @.str21, i64 0, i64 0)) #4 - call void @exit(i32 1) #7 - unreachable - -checkErr.exit147: ; preds = %checkErr.exit143 - %45 = load %struct._cl_mem** %d_A, align 8, !tbaa !5 - %call70 = call i32 @clReleaseMemObject(%struct._cl_mem* %45) #4 - %46 = load %struct._cl_mem** %d_C, align 8, !tbaa !5 - %call71 = call i32 @clReleaseMemObject(%struct._cl_mem* %46) #4 - %47 = load %struct._cl_mem** %d_B, align 8, !tbaa !5 - %call72 = call i32 @clReleaseMemObject(%struct._cl_mem* %47) #4 - call void @free(i8* %call35) #4 - %48 = load i8** %clMatrixMul, align 8, !tbaa !5 - call void @free(i8* %48) #4 - %call73 = call i32 @clReleaseContext(%struct._cl_context* %call33) #4 - %call74 = call i32 @clReleaseKernel(%struct._cl_kernel* %call51) #4 - %call75 = call i32 @clReleaseProgram(%struct._cl_program* %call49) #4 - %call76 = call i32 @clReleaseCommandQueue(%struct._cl_command_queue* %call38) #4 - ret void -} - -declare i32 @clGetPlatformIDs(i32, %struct._cl_platform_id**, i32*) #3 - ; Function Attrs: nounwind declare void @llvm.lifetime.start(i64, i8* nocapture) #4 -declare i32 @clGetPlatformInfo(%struct._cl_platform_id*, i32, i64, i8*, i64*) #3 - ; Function Attrs: nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) #4 -declare %struct._cl_context* @clCreateContextFromType(i64*, i64, void (i8*, i8*, i64, i8*)*, i8*, i32*) #3 +; --------------- VISC Intrinsics --------------- +; Return Type of VISC Compute Matrix Mul +%rtype = type {float*, i32} +%struct.arg = type { float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype } -declare i32 @clGetContextInfo(%struct._cl_context*, i32, i64, i8*, i64*) #3 +; Function Attrs: nounwind +declare i8* @llvm.visc.launch(i8*, i8*) #0 -declare %struct._cl_command_queue* @clCreateCommandQueue(%struct._cl_context*, %struct._cl_device_id*, i64, i32*) #3 +; Function Attrs: nounwind +declare void @llvm.visc.wait(i8*) #0 -declare %struct._cl_mem* @clCreateBuffer(%struct._cl_context*, i64, i64, i8*, i32*) #3 +; Function Attrs: nounwind +declare i8* @llvm.visc.createNode(i8*) #0 -declare %struct._cl_program* @clCreateProgramWithBinary(%struct._cl_context*, i32, %struct._cl_device_id**, i64*, i8**, i32*, i32*) #3 +; Function Attrs: nounwind +declare i8* @llvm.visc.createNode1D(i8*, i32) #0 -declare i32 @clBuildProgram(%struct._cl_program*, i32, %struct._cl_device_id**, i8*, void (%struct._cl_program*, i8*)*, i8*) #3 +; Function Attrs: nounwind +declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.createNode3D(i8*, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0 -declare %struct._cl_kernel* @clCreateKernel(%struct._cl_program*, i8*, i32*) #3 +; Function Attrs: nounwind +declare i8* @llvm.visc.getNode() #0 -declare i32 @clSetKernelArg(%struct._cl_kernel*, i32, i64, i8*) #3 +; Function Attrs: nounwind +declare i8* @llvm.visc.getParentNode(i8*) #0 -declare i32 @clEnqueueNDRangeKernel(%struct._cl_command_queue*, %struct._cl_kernel*, i32, i64*, i64*, i64*, i32, %struct._cl_event**, %struct._cl_event**) #3 +; Function Attrs: nounwind +declare i32 @llvm.visc.getNumDims(i8*) #0 -declare i32 @clEnqueueReadBuffer(%struct._cl_command_queue*, %struct._cl_mem*, i32, i64, i64, i8*, i32, %struct._cl_event**, %struct._cl_event**) #3 +; Function Attrs: nounwind +declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0 -declare i32 @clReleaseMemObject(%struct._cl_mem*) #3 +; Function Attrs: nounwind +declare i32 @llvm.visc.getNumNodeInstances.y(i8*) #0 -declare i32 @clReleaseContext(%struct._cl_context*) #3 +; Function Attrs: nounwind +declare void @llvm.visc.bind.input(i8*, i32, i32) -declare i32 @clReleaseKernel(%struct._cl_kernel*) #3 +; Function Attrs: nounwind +declare void @llvm.visc.bind.output(i8*, i32, i32) +; ----------------- VISC intrinsics end ------------------ -declare i32 @clReleaseProgram(%struct._cl_program*) #3 -declare i32 @clReleaseCommandQueue(%struct._cl_command_queue*) #3 ; Function Attrs: nounwind uwtable define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 { entry: - %call = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2 - %call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #2 + ; ------------------------- VISC changes ------------------ + ; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x + ; Replaced statement -- %call = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2 + %this_node = call i8* @llvm.visc.getNode() + %call = tail call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node) + + ; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x + ; Replaced statement -- %call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #2 + %call1 = tail call i32 @llvm.visc.getNumNodeInstances.y(i8* %this_node) + ; ---------------------- VISC changes End ------------------ + %cmp22 = icmp sgt i32 %k, 0 br i1 %cmp22, label %for.body.lr.ph, label %for.end @@ -581,9 +259,9 @@ for.end: ; preds = %for.body, %entry %idxprom10 = sext i32 %add9 to i64 %arrayidx11 = getelementptr inbounds float* %C, i64 %idxprom10 store float %res.0.lcssa, float* %arrayidx11, align 4, !tbaa !0 - %.fca.0.insert = insertvalue { float*, i32 } undef, float* %C, 0 - %.fca.1.insert = insertvalue { float*, i32 } %.fca.0.insert, i32 %bytes_C, 1 - ret { float*, i32 } %.fca.1.insert + %.fca.0.insert = insertvalue %rtype undef, float* %C, 0 + %.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %bytes_C, 1 + ret %rtype %.fca.1.insert } define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_B, float* %h_C, i32 %bytes_C, i32 %WA, i32 %WB, i32 %HA) { @@ -604,6 +282,9 @@ define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_ ret %rtype zeroinitializer } +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) #1 + ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture %argv) #0 { entry: @@ -679,7 +360,7 @@ randomInit.exit41: ; preds = %for.body.i40 call void @llvm.visc.wait(i8* %graphID) ; Get the result - %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 4 + %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9 %out = load %rtype* %out.addr %out.h_C = extractvalue %rtype %out, 0 ;%2 = extractvalue %rtype %out, 0