Skip to content
Snippets Groups Projects
Commit aa480186 authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

First cut of example visc_gemm_opencl.ll

parent a12e4c7b
No related branches found
No related tags found
No related merge requests found
...@@ -17,16 +17,6 @@ target triple = "x86_64-unknown-linux-gnu" ...@@ -17,16 +17,6 @@ target triple = "x86_64-unknown-linux-gnu"
%struct._cl_kernel = type opaque %struct._cl_kernel = type opaque
%struct._cl_event = type opaque %struct._cl_event = type opaque
; Return Type of VISC Compute Matrix Mul
%rtype = type {float*, i32}
%struct.arg = type { float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype }
; Function Attrs: nounwind
declare i8* @llvm.visc.launch(i8*, i8*) #0
; Function Attrs: nounwind
declare void @llvm.visc.wait(i8*) #0
@.str = private unnamed_addr constant [3 x i8] c"rb\00", align 1 @.str = private unnamed_addr constant [3 x i8] c"rb\00", align 1
@.str1 = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1 @.str1 = private unnamed_addr constant [45 x i8] c"Mismatch at %d,%d --- C = %f and goldC = %f\0A\00", align 1
@.str2 = private unnamed_addr constant [35 x i8] c"Failure to get number of platforms\00", align 1 @.str2 = private unnamed_addr constant [35 x i8] c"Failure to get number of platforms\00", align 1
...@@ -80,64 +70,6 @@ for.end: ; preds = %for.body, %entry ...@@ -80,64 +70,6 @@ for.end: ; preds = %for.body, %entry
; Function Attrs: nounwind ; Function Attrs: nounwind
declare i32 @rand() #1 declare i32 @rand() #1
; Function Attrs: nounwind uwtable
define noalias i8* @LoadProgSource(i8* nocapture %cFilename, i64* %szFinalLength) #0 {
entry:
%call = tail call %struct._IO_FILE* @fopen(i8* %cFilename, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0)) #4
%cmp = icmp eq %struct._IO_FILE* %call, null
br i1 %cmp, label %return, label %if.end
if.end: ; preds = %entry
%call1 = tail call i32 @fseek(%struct._IO_FILE* %call, i64 0, i32 2) #4
%call2 = tail call i64 @ftell(%struct._IO_FILE* %call) #4
%call3 = tail call i32 @fseek(%struct._IO_FILE* %call, i64 0, i32 0) #4
%add = add i64 %call2, 1
%call4 = tail call noalias i8* @malloc(i64 %add) #4
%call5 = tail call i64 @fread(i8* %call4, i64 %call2, i64 1, %struct._IO_FILE* %call) #4
%cmp6 = icmp eq i64 %call5, 1
%call8 = tail call i32 @fclose(%struct._IO_FILE* %call) #4
br i1 %cmp6, label %if.end9, label %if.then7
if.then7: ; preds = %if.end
tail call void @free(i8* %call4) #4
br label %return
if.end9: ; preds = %if.end
%cmp11 = icmp eq i64* %szFinalLength, null
br i1 %cmp11, label %if.end13, label %if.then12
if.then12: ; preds = %if.end9
store i64 %call2, i64* %szFinalLength, align 8, !tbaa !3
br label %if.end13
if.end13: ; preds = %if.end9, %if.then12
%arrayidx = getelementptr inbounds i8* %call4, i64 %call2
store i8 0, i8* %arrayidx, align 1, !tbaa !1
br label %return
return: ; preds = %entry, %if.end13, %if.then7
%retval.0 = phi i8* [ null, %if.then7 ], [ %call4, %if.end13 ], [ null, %entry ]
ret i8* %retval.0
}
; Function Attrs: nounwind
declare noalias %struct._IO_FILE* @fopen(i8* nocapture, i8* nocapture) #1
; Function Attrs: nounwind
declare i32 @fseek(%struct._IO_FILE* nocapture, i64, i32) #1
; Function Attrs: nounwind
declare i64 @ftell(%struct._IO_FILE* nocapture) #1
; Function Attrs: nounwind
declare noalias i8* @malloc(i64) #1
; Function Attrs: nounwind
declare i64 @fread(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1
; Function Attrs: nounwind
declare i32 @fclose(%struct._IO_FILE* nocapture) #1
; Function Attrs: nounwind ; Function Attrs: nounwind
declare void @free(i8* nocapture) #1 declare void @free(i8* nocapture) #1
...@@ -223,330 +155,76 @@ return: ; preds = %for.inc50, %if.then ...@@ -223,330 +155,76 @@ return: ; preds = %for.inc50, %if.then
; Function Attrs: nounwind ; Function Attrs: nounwind
declare i32 @printf(i8* nocapture, ...) #1 declare i32 @printf(i8* nocapture, ...) #1
; Function Attrs: nounwind uwtable
define void @computeMatrixMul(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_B, float* %h_C, i32 %bytes_C) #0 {
entry:
%dataBytes = alloca i64, align 8
%errcode = alloca i32, align 4
%d_A = alloca %struct._cl_mem*, align 8
%d_B = alloca %struct._cl_mem*, align 8
%d_C = alloca %struct._cl_mem*, align 8
%numPlatforms = alloca i32, align 4
%buffer = alloca [10240 x i8], align 16
%properties = alloca [3 x i64], align 16
%binaryLength = alloca i64, align 8
%clMatrixMul = alloca i8*, align 8
%binaryStatus = alloca i32, align 4
%localWorkSize = alloca [2 x i64], align 16
%globalWorkSize = alloca [2 x i64], align 16
%wA = alloca i32, align 4
%wC = alloca i32, align 4
%call = call i32 @clGetPlatformIDs(i32 0, %struct._cl_platform_id** null, i32* %numPlatforms) #4
store i32 %call, i32* %errcode, align 4, !tbaa !4
%cmp.i = icmp eq i32 %call, 0
br i1 %cmp.i, label %checkErr.exit, label %if.then.i
if.then.i: ; preds = %entry
%0 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit: ; preds = %entry
%1 = load i32* %numPlatforms, align 4, !tbaa !4
%2 = zext i32 %1 to i64
%vla = alloca %struct._cl_platform_id*, i64 %2, align 16
%call1 = call i32 @clGetPlatformIDs(i32 %1, %struct._cl_platform_id** %vla, i32* null) #4
store i32 %call1, i32* %errcode, align 4, !tbaa !4
%cmp.i105 = icmp eq i32 %call1, 0
br i1 %cmp.i105, label %for.cond.preheader, label %if.then.i107
for.cond.preheader: ; preds = %checkErr.exit
%3 = load i32* %numPlatforms, align 4, !tbaa !4
%cmp148 = icmp eq i32 %3, 0
br i1 %cmp148, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %for.cond.preheader
%4 = getelementptr inbounds [10240 x i8]* %buffer, i64 0, i64 0
br label %for.body
if.then.i107: ; preds = %checkErr.exit
%5 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i106 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([28 x i8]* @.str3, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
for.body: ; preds = %for.body.lr.ph, %for.body
%i.0149 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
call void @llvm.lifetime.start(i64 10240, i8* %4) #4
%call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str4, i64 0, i64 0), i32 %i.0149) #4
%idxprom = zext i32 %i.0149 to i64
%arrayidx = getelementptr inbounds %struct._cl_platform_id** %vla, i64 %idxprom
%6 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5
%call3 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %6, i32 2304, i64 10240, i8* %4, i64* null) #4
%call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str5, i64 0, i64 0), i8* %4) #4
%7 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5
%call9 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %7, i32 2305, i64 10240, i8* %4, i64* null) #4
%call11 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str6, i64 0, i64 0), i8* %4) #4
%8 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5
%call15 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %8, i32 2306, i64 10240, i8* %4, i64* null) #4
%call17 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str7, i64 0, i64 0), i8* %4) #4
%9 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5
%call21 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %9, i32 2307, i64 10240, i8* %4, i64* null) #4
%call23 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str8, i64 0, i64 0), i8* %4) #4
%10 = load %struct._cl_platform_id** %arrayidx, align 8, !tbaa !5
%call27 = call i32 @clGetPlatformInfo(%struct._cl_platform_id* %10, i32 2308, i64 10240, i8* %4, i64* null) #4
%call29 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([19 x i8]* @.str9, i64 0, i64 0), i8* %4) #4
call void @llvm.lifetime.end(i64 10240, i8* %4) #4
%inc = add i32 %i.0149, 1
%11 = load i32* %numPlatforms, align 4, !tbaa !4
%cmp = icmp ult i32 %inc, %11
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %for.cond.preheader
%arrayinit.begin = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 0
store i64 4228, i64* %arrayinit.begin, align 16, !tbaa !3
%arrayinit.element = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 1
%12 = load %struct._cl_platform_id** %vla, align 16, !tbaa !5
%13 = ptrtoint %struct._cl_platform_id* %12 to i64
%sext = shl i64 %13, 32
%conv = ashr exact i64 %sext, 32
store i64 %conv, i64* %arrayinit.element, align 8, !tbaa !3
%arrayinit.element31 = getelementptr inbounds [3 x i64]* %properties, i64 0, i64 2
store i64 0, i64* %arrayinit.element31, align 16, !tbaa !3
%call33 = call %struct._cl_context* @clCreateContextFromType(i64* %arrayinit.begin, i64 4, void (i8*, i8*, i64, i8*)* null, i8* null, i32* %errcode) #4
%14 = load i32* %errcode, align 4, !tbaa !4
%cmp.i109 = icmp eq i32 %14, 0
br i1 %cmp.i109, label %checkErr.exit112, label %if.then.i111
if.then.i111: ; preds = %for.end
%15 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i110 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %15, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([30 x i8]* @.str10, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit112: ; preds = %for.end
%call34 = call i32 @clGetContextInfo(%struct._cl_context* %call33, i32 4225, i64 0, i8* null, i64* %dataBytes) #4
store i32 %call34, i32* %errcode, align 4, !tbaa !4
%16 = load i64* %dataBytes, align 8, !tbaa !3
%call35 = call noalias i8* @malloc(i64 %16) #4
%17 = bitcast i8* %call35 to %struct._cl_device_id**
%call36 = call i32 @clGetContextInfo(%struct._cl_context* %call33, i32 4225, i64 %16, i8* %call35, i64* null) #4
%18 = load i32* %errcode, align 4, !tbaa !4
%or = or i32 %18, %call36
store i32 %or, i32* %errcode, align 4, !tbaa !4
%cmp.i113 = icmp eq i32 %or, 0
br i1 %cmp.i113, label %checkErr.exit116, label %if.then.i115
if.then.i115: ; preds = %checkErr.exit112
%19 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i114 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %19, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([28 x i8]* @.str11, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit116: ; preds = %checkErr.exit112
%20 = load %struct._cl_device_id** %17, align 8, !tbaa !5
%call38 = call %struct._cl_command_queue* @clCreateCommandQueue(%struct._cl_context* %call33, %struct._cl_device_id* %20, i64 0, i32* %errcode) #4
%21 = load i32* %errcode, align 4, !tbaa !4
%cmp.i117 = icmp eq i32 %21, 0
br i1 %cmp.i117, label %checkErr.exit120, label %if.then.i119
if.then.i119: ; preds = %checkErr.exit116
%22 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i118 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %22, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([32 x i8]* @.str12, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit120: ; preds = %checkErr.exit116
%conv39 = zext i32 %bytes_C to i64
%call40 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 1, i64 %conv39, i8* null, i32* %errcode) #4
store %struct._cl_mem* %call40, %struct._cl_mem** %d_C, align 8, !tbaa !5
%conv41 = zext i32 %bytes_A to i64
%23 = bitcast float* %h_A to i8*
%call42 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 33, i64 %conv41, i8* %23, i32* %errcode) #4
store %struct._cl_mem* %call42, %struct._cl_mem** %d_A, align 8, !tbaa !5
%conv43 = zext i32 %bytes_B to i64
%24 = bitcast float* %h_B to i8*
%call44 = call %struct._cl_mem* @clCreateBuffer(%struct._cl_context* %call33, i64 33, i64 %conv43, i8* %24, i32* %errcode) #4
store %struct._cl_mem* %call44, %struct._cl_mem** %d_B, align 8, !tbaa !5
%call45 = call i8* @LoadProgSource(i8* getelementptr inbounds ([18 x i8]* @.str13, i64 0, i64 0), i64* %binaryLength)
store i8* %call45, i8** %clMatrixMul, align 8, !tbaa !5
%cmp46 = icmp eq i8* %call45, null
br i1 %cmp46, label %if.then.i122, label %checkErr.exit123
if.then.i122: ; preds = %checkErr.exit120
%25 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i121 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %25, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8]* @.str14, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit123: ; preds = %checkErr.exit120
%call49 = call %struct._cl_program* @clCreateProgramWithBinary(%struct._cl_context* %call33, i32 1, %struct._cl_device_id** %17, i64* %binaryLength, i8** %clMatrixMul, i32* %binaryStatus, i32* %errcode) #4
%26 = load i32* %errcode, align 4, !tbaa !4
%cmp.i124 = icmp eq i32 %26, 0
br i1 %cmp.i124, label %checkErr.exit127, label %if.then.i126
if.then.i126: ; preds = %checkErr.exit123
%27 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i125 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %27, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([38 x i8]* @.str15, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit127: ; preds = %checkErr.exit123
%call50 = call i32 @clBuildProgram(%struct._cl_program* %call49, i32 0, %struct._cl_device_id** null, i8* null, void (%struct._cl_program*, i8*)* null, i8* null) #4
store i32 %call50, i32* %errcode, align 4, !tbaa !4
%cmp.i128 = icmp eq i32 %call50, 0
br i1 %cmp.i128, label %checkErr.exit131, label %if.then.i130
if.then.i130: ; preds = %checkErr.exit127
%28 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i129 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %28, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([25 x i8]* @.str16, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit131: ; preds = %checkErr.exit127
%call51 = call %struct._cl_kernel* @clCreateKernel(%struct._cl_program* %call49, i8* getelementptr inbounds ([10 x i8]* @.str17, i64 0, i64 0), i32* %errcode) #4
%29 = load i32* %errcode, align 4, !tbaa !4
%cmp.i132 = icmp eq i32 %29, 0
br i1 %cmp.i132, label %checkErr.exit135, label %if.then.i134
if.then.i134: ; preds = %checkErr.exit131
%30 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i133 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %30, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([25 x i8]* @.str18, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit135: ; preds = %checkErr.exit131
store i32 1024, i32* %wA, align 4, !tbaa !4
store i32 1024, i32* %wC, align 4, !tbaa !4
%31 = bitcast %struct._cl_mem** %d_C to i8*
%call52 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 0, i64 8, i8* %31) #4
store i32 %call52, i32* %errcode, align 4, !tbaa !4
%32 = bitcast %struct._cl_mem** %d_A to i8*
%call53 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 1, i64 8, i8* %32) #4
%33 = load i32* %errcode, align 4, !tbaa !4
%or54 = or i32 %33, %call53
store i32 %or54, i32* %errcode, align 4, !tbaa !4
%34 = bitcast %struct._cl_mem** %d_B to i8*
%call55 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 2, i64 8, i8* %34) #4
%35 = load i32* %errcode, align 4, !tbaa !4
%or56 = or i32 %35, %call55
store i32 %or56, i32* %errcode, align 4, !tbaa !4
%36 = bitcast i32* %wA to i8*
%call57 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 3, i64 4, i8* %36) #4
%37 = load i32* %errcode, align 4, !tbaa !4
%or58 = or i32 %37, %call57
store i32 %or58, i32* %errcode, align 4, !tbaa !4
%38 = bitcast i32* %wC to i8*
%call59 = call i32 @clSetKernelArg(%struct._cl_kernel* %call51, i32 4, i64 4, i8* %38) #4
%39 = load i32* %errcode, align 4, !tbaa !4
%or60 = or i32 %39, %call59
store i32 %or60, i32* %errcode, align 4, !tbaa !4
%cmp.i136 = icmp eq i32 %or60, 0
br i1 %cmp.i136, label %checkErr.exit139, label %if.then.i138
if.then.i138: ; preds = %checkErr.exit135
%40 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i137 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %40, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([32 x i8]* @.str19, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit139: ; preds = %checkErr.exit135
%arrayidx61 = getelementptr inbounds [2 x i64]* %localWorkSize, i64 0, i64 0
store i64 16, i64* %arrayidx61, align 16, !tbaa !3
%arrayidx62 = getelementptr inbounds [2 x i64]* %localWorkSize, i64 0, i64 1
store i64 16, i64* %arrayidx62, align 8, !tbaa !3
%arrayidx63 = getelementptr inbounds [2 x i64]* %globalWorkSize, i64 0, i64 0
store i64 1024, i64* %arrayidx63, align 16, !tbaa !3
%arrayidx64 = getelementptr inbounds [2 x i64]* %globalWorkSize, i64 0, i64 1
store i64 1024, i64* %arrayidx64, align 8, !tbaa !3
%call67 = call i32 @clEnqueueNDRangeKernel(%struct._cl_command_queue* %call38, %struct._cl_kernel* %call51, i32 2, i64* null, i64* %arrayidx63, i64* %arrayidx61, i32 0, %struct._cl_event** null, %struct._cl_event** null) #4
store i32 %call67, i32* %errcode, align 4, !tbaa !4
%cmp.i140 = icmp eq i32 %call67, 0
br i1 %cmp.i140, label %checkErr.exit143, label %if.then.i142
if.then.i142: ; preds = %checkErr.exit139
%41 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i141 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %41, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([26 x i8]* @.str20, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit143: ; preds = %checkErr.exit139
%42 = load %struct._cl_mem** %d_C, align 8, !tbaa !5
%43 = bitcast float* %h_C to i8*
%call69 = call i32 @clEnqueueReadBuffer(%struct._cl_command_queue* %call38, %struct._cl_mem* %42, i32 1, i64 0, i64 %conv39, i8* %43, i32 0, %struct._cl_event** null, %struct._cl_event** null) #4
store i32 %call69, i32* %errcode, align 4, !tbaa !4
%cmp.i144 = icmp eq i32 %call69, 0
br i1 %cmp.i144, label %checkErr.exit147, label %if.then.i146
if.then.i146: ; preds = %checkErr.exit143
%44 = load %struct._IO_FILE** @stderr, align 8, !tbaa !5
%call.i145 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %44, i8* getelementptr inbounds ([11 x i8]* @.str25, i64 0, i64 0), i8* getelementptr inbounds ([23 x i8]* @.str21, i64 0, i64 0)) #4
call void @exit(i32 1) #7
unreachable
checkErr.exit147: ; preds = %checkErr.exit143
%45 = load %struct._cl_mem** %d_A, align 8, !tbaa !5
%call70 = call i32 @clReleaseMemObject(%struct._cl_mem* %45) #4
%46 = load %struct._cl_mem** %d_C, align 8, !tbaa !5
%call71 = call i32 @clReleaseMemObject(%struct._cl_mem* %46) #4
%47 = load %struct._cl_mem** %d_B, align 8, !tbaa !5
%call72 = call i32 @clReleaseMemObject(%struct._cl_mem* %47) #4
call void @free(i8* %call35) #4
%48 = load i8** %clMatrixMul, align 8, !tbaa !5
call void @free(i8* %48) #4
%call73 = call i32 @clReleaseContext(%struct._cl_context* %call33) #4
%call74 = call i32 @clReleaseKernel(%struct._cl_kernel* %call51) #4
%call75 = call i32 @clReleaseProgram(%struct._cl_program* %call49) #4
%call76 = call i32 @clReleaseCommandQueue(%struct._cl_command_queue* %call38) #4
ret void
}
declare i32 @clGetPlatformIDs(i32, %struct._cl_platform_id**, i32*) #3
; Function Attrs: nounwind ; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #4 declare void @llvm.lifetime.start(i64, i8* nocapture) #4
declare i32 @clGetPlatformInfo(%struct._cl_platform_id*, i32, i64, i8*, i64*) #3
; Function Attrs: nounwind ; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #4 declare void @llvm.lifetime.end(i64, i8* nocapture) #4
declare %struct._cl_context* @clCreateContextFromType(i64*, i64, void (i8*, i8*, i64, i8*)*, i8*, i32*) #3 ; --------------- VISC Intrinsics ---------------
; Return Type of VISC Compute Matrix Mul
%rtype = type {float*, i32}
%struct.arg = type { float*, i32, float*, i32, float*, i32, i32, i32, i32, %rtype }
declare i32 @clGetContextInfo(%struct._cl_context*, i32, i64, i8*, i64*) #3 ; Function Attrs: nounwind
declare i8* @llvm.visc.launch(i8*, i8*) #0
declare %struct._cl_command_queue* @clCreateCommandQueue(%struct._cl_context*, %struct._cl_device_id*, i64, i32*) #3 ; Function Attrs: nounwind
declare void @llvm.visc.wait(i8*) #0
declare %struct._cl_mem* @clCreateBuffer(%struct._cl_context*, i64, i64, i8*, i32*) #3 ; Function Attrs: nounwind
declare i8* @llvm.visc.createNode(i8*) #0
declare %struct._cl_program* @clCreateProgramWithBinary(%struct._cl_context*, i32, %struct._cl_device_id**, i64*, i8**, i32*, i32*) #3 ; Function Attrs: nounwind
declare i8* @llvm.visc.createNode1D(i8*, i32) #0
declare i32 @clBuildProgram(%struct._cl_program*, i32, %struct._cl_device_id**, i8*, void (%struct._cl_program*, i8*)*, i8*) #3 ; Function Attrs: nounwind
declare i8* @llvm.visc.createNode2D(i8*, i32, i32) #0
; Function Attrs: nounwind
declare i8* @llvm.visc.createNode3D(i8*, i32, i32, i32) #0
; Function Attrs: nounwind
declare i8* @llvm.visc.createEdge(i8*, i8*, i1, i32, i32) #0
declare %struct._cl_kernel* @clCreateKernel(%struct._cl_program*, i8*, i32*) #3 ; Function Attrs: nounwind
declare i8* @llvm.visc.getNode() #0
declare i32 @clSetKernelArg(%struct._cl_kernel*, i32, i64, i8*) #3 ; Function Attrs: nounwind
declare i8* @llvm.visc.getParentNode(i8*) #0
declare i32 @clEnqueueNDRangeKernel(%struct._cl_command_queue*, %struct._cl_kernel*, i32, i64*, i64*, i64*, i32, %struct._cl_event**, %struct._cl_event**) #3 ; Function Attrs: nounwind
declare i32 @llvm.visc.getNumDims(i8*) #0
declare i32 @clEnqueueReadBuffer(%struct._cl_command_queue*, %struct._cl_mem*, i32, i64, i64, i8*, i32, %struct._cl_event**, %struct._cl_event**) #3 ; Function Attrs: nounwind
declare i32 @llvm.visc.getNumNodeInstances.x(i8*) #0
declare i32 @clReleaseMemObject(%struct._cl_mem*) #3 ; Function Attrs: nounwind
declare i32 @llvm.visc.getNumNodeInstances.y(i8*) #0
declare i32 @clReleaseContext(%struct._cl_context*) #3 ; Function Attrs: nounwind
declare void @llvm.visc.bind.input(i8*, i32, i32)
declare i32 @clReleaseKernel(%struct._cl_kernel*) #3 ; Function Attrs: nounwind
declare void @llvm.visc.bind.output(i8*, i32, i32)
; ----------------- VISC intrinsics end ------------------
declare i32 @clReleaseProgram(%struct._cl_program*) #3
declare i32 @clReleaseCommandQueue(%struct._cl_command_queue*) #3
; Function Attrs: nounwind uwtable ; Function Attrs: nounwind uwtable
define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 { define %rtype @matrixMul(float* nocapture %A, i32 %bytes_A, float* nocapture %B, i32 %bytes_B, float* %C, i32 %bytes_C, i32 %k, i32 %n, i32 %m) #0 {
entry: entry:
%call = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2 ; ------------------------- VISC changes ------------------
%call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #2 ; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
; Replaced statement -- %call = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2
%this_node = call i8* @llvm.visc.getNode()
%call = tail call i32 @llvm.visc.getNumNodeInstances.x(i8* %this_node)
; Replace get_global_id calls with calls to getNode followed but getNumNodeInstances.x
; Replaced statement -- %call1 = tail call i32 (i32, ...)* bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 1) #2
%call1 = tail call i32 @llvm.visc.getNumNodeInstances.y(i8* %this_node)
; ---------------------- VISC changes End ------------------
%cmp22 = icmp sgt i32 %k, 0 %cmp22 = icmp sgt i32 %k, 0
br i1 %cmp22, label %for.body.lr.ph, label %for.end br i1 %cmp22, label %for.body.lr.ph, label %for.end
...@@ -581,9 +259,9 @@ for.end: ; preds = %for.body, %entry ...@@ -581,9 +259,9 @@ for.end: ; preds = %for.body, %entry
%idxprom10 = sext i32 %add9 to i64 %idxprom10 = sext i32 %add9 to i64
%arrayidx11 = getelementptr inbounds float* %C, i64 %idxprom10 %arrayidx11 = getelementptr inbounds float* %C, i64 %idxprom10
store float %res.0.lcssa, float* %arrayidx11, align 4, !tbaa !0 store float %res.0.lcssa, float* %arrayidx11, align 4, !tbaa !0
%.fca.0.insert = insertvalue { float*, i32 } undef, float* %C, 0 %.fca.0.insert = insertvalue %rtype undef, float* %C, 0
%.fca.1.insert = insertvalue { float*, i32 } %.fca.0.insert, i32 %bytes_C, 1 %.fca.1.insert = insertvalue %rtype %.fca.0.insert, i32 %bytes_C, 1
ret { float*, i32 } %.fca.1.insert ret %rtype %.fca.1.insert
} }
define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_B, float* %h_C, i32 %bytes_C, i32 %WA, i32 %WB, i32 %HA) { define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_B, float* %h_C, i32 %bytes_C, i32 %WA, i32 %WB, i32 %HA) {
...@@ -604,6 +282,9 @@ define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_ ...@@ -604,6 +282,9 @@ define %rtype @MatrixMulRoot(float* %h_A, i32 %bytes_A, float* %h_B, i32 %bytes_
ret %rtype zeroinitializer ret %rtype zeroinitializer
} }
; Function Attrs: nounwind
declare noalias i8* @malloc(i64) #1
; Function Attrs: nounwind uwtable ; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** nocapture %argv) #0 { define i32 @main(i32 %argc, i8** nocapture %argv) #0 {
entry: entry:
...@@ -679,7 +360,7 @@ randomInit.exit41: ; preds = %for.body.i40 ...@@ -679,7 +360,7 @@ randomInit.exit41: ; preds = %for.body.i40
call void @llvm.visc.wait(i8* %graphID) call void @llvm.visc.wait(i8* %graphID)
; Get the result ; Get the result
%out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 4 %out.addr = getelementptr %struct.arg* %in.addr, i32 0, i32 9
%out = load %rtype* %out.addr %out = load %rtype* %out.addr
%out.h_C = extractvalue %rtype %out, 0 %out.h_C = extractvalue %rtype %out, 0
;%2 = extractvalue %rtype %out, 0 ;%2 = extractvalue %rtype %out, 0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment