Skip to content
Snippets Groups Projects
Commit d179193d authored by Aaron Councilman's avatar Aaron Councilman
Browse files

Parallel tiled cpu schedule

parent 4b1630b2
No related branches found
No related tags found
1 merge request!205Fork reshape
Pipeline #201835 canceled
This commit is part of merge request !205. Comments created here will be created in the context of that merge request.
macro optimize!(X) {
gvn(X);
phi-elim(X);
dce(X);
ip-sroa(X);
sroa(X);
dce(X);
gvn(X);
phi-elim(X);
dce(X);
}
macro codegen!(X) {
gcm(*);
float-collections(*);
dce(*);
gcm(*);
}
optimize!(*);
fixpoint panic after 20 {
forkify(matmul);
fork-guard-elim(matmul);
}
// Mark the whole loop nest as associative, any order of iterations is equivalent
associative(matmul@outer);
// Tile the outer 2 loops to create 16 parallel threads (each responsible for
// computing one block of the output
let par = matmul@outer \ matmul@inner;
fork-tile[4, 0, false, true](par);
fork-coalesce(par);
fork-interchange[0, 1](par);
fork-interchange[2, 3](par);
fork-interchange[1, 2](par);
let split = fork-split(*);
fork-coalesce(split.matmul.fj0 \ split.matmul.fj2);
parallel-fork(split.matmul.fj0 \ split.matmul.fj2);
// Pull the body of the parallel loop out into its own device function
let body = outline(split.matmul.fj2);
cpu(body);
// Tile the loop nest for cache performance; 16x16x16 tile
fork-tile[16, 0, false, true](body);
fixpoint { fork-coalesce(body); }
fork-interchange[1, 2](body);
fork-interchange[3, 4](body);
fork-interchange[2, 3](body);
optimize!(*);
fork-split(body);
reduce-slf(*);
unforkify(body);
optimize!(*);
codegen!(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment