GPU backend
Compare changes
- Praneet Rathi authored
+ 240
− 70
@@ -85,6 +85,7 @@ pub fn gpu_codegen<W: Write>(
@@ -219,6 +220,7 @@ impl GPUContext<'_> {
@@ -293,7 +295,7 @@ impl GPUContext<'_> {
@@ -302,10 +304,10 @@ impl GPUContext<'_> {
@@ -314,7 +316,7 @@ impl GPUContext<'_> {
@@ -348,7 +350,7 @@ impl GPUContext<'_> {
@@ -424,7 +426,7 @@ impl GPUContext<'_> {
// Construct block forks by greedily accepting while: a) each fork join is strictly nested meaning no other neighbor fork joins, b) the join has no user reduce nodes, c) total number of blocks < max_num_blocks, d) each fork join's bounds are independent of outer fork joins, and e) each fork join's reduction has no dependency or synchronization between {hercules} threads. smarter policy may be needed, particularly for underutilized kernels where saturating threads per block instead of blocks per kernel may be preferred.
// Construct block forks by greedily accepting while: a) each fork join is strictly nested meaning no other neighbor fork joins, b) the forks are parallel reduce forks, c) total number of blocks < max_num_blocks, d) each fork join's bounds are independent of outer fork joins, and e) each fork join's reduction has no dependency or synchronization between {hercules} threads. smarter policy may be needed, particularly for underutilized kernels where saturating threads per block instead of blocks per kernel may be preferred.
@@ -442,7 +444,7 @@ impl GPUContext<'_> {
@@ -556,7 +558,7 @@ impl GPUContext<'_> {
@@ -578,6 +580,7 @@ impl GPUContext<'_> {
@@ -598,13 +601,10 @@ impl GPUContext<'_> {
@@ -628,7 +628,8 @@ impl GPUContext<'_> {
@@ -636,7 +637,8 @@ impl GPUContext<'_> {
@@ -647,7 +649,8 @@ impl GPUContext<'_> {
@@ -659,7 +662,8 @@ impl GPUContext<'_> {
@@ -672,36 +676,29 @@ impl GPUContext<'_> {
@@ -726,11 +723,17 @@ impl GPUContext<'_> {
@@ -743,30 +746,53 @@ impl GPUContext<'_> {
@@ -848,7 +874,7 @@ impl GPUContext<'_> {
@@ -857,18 +883,41 @@ impl GPUContext<'_> {
@@ -878,23 +927,95 @@ impl GPUContext<'_> {
@@ -905,15 +1026,54 @@ impl GPUContext<'_> {
@@ -1050,7 +1210,17 @@ impl GPUContext<'_> {
@@ -1077,7 +1247,7 @@ impl GPUContext<'_> {