Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • llvm/hercules
1 result
Show changes
Showing
with 585244 additions and 0 deletions
!*.txt
LICENSE TERMS
Copyright (c)2008-2011 University of Virginia
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted without royalty fees or other restrictions, provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the University of Virginia, the Dept. of Computer Science, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF VIRGINIA OR THE SOFTWARE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
If you use this software or a modified version of it, please cite the most relevant among the following papers:
- M. A. Goodrum, M. J. Trotter, A. Aksel, S. T. Acton, and K. Skadron. Parallelization of Particle Filter Algorithms. In Proceedings
of the 3rd Workshop on Emerging Applications and Many-core Architecture (EAMA), in conjunction with the IEEE/ACM International
Symposium on Computer Architecture (ISCA), June 2010.
- S. Che, M. Boyer, J. Meng, D. Tarjan, J. W. Sheaffer, Sang-Ha Lee and K. Skadron.
"Rodinia: A Benchmark Suite for Heterogeneous Computing". IEEE International Symposium
on Workload Characterization, Oct 2009.
- J. Meng and K. Skadron. "Performance Modeling and Automatic Ghost Zone Optimization
for Iterative Stencil Loops on GPUs." In Proceedings of the 23rd Annual ACM International
Conference on Supercomputing (ICS), June 2009.
- L.G. Szafaryn, K. Skadron and J. Saucerman. "Experiences Accelerating MATLAB Systems
Biology Applications." in Workshop on Biomedicine in Computing (BiC) at the International
Symposium on Computer Architecture (ISCA), June 2009.
- M. Boyer, D. Tarjan, S. T. Acton, and K. Skadron. "Accelerating Leukocyte Tracking using CUDA:
A Case Study in Leveraging Manycore Coprocessors." In Proceedings of the International Parallel
and Distributed Processing Symposium (IPDPS), May 2009.
- S. Che, M. Boyer, J. Meng, D. Tarjan, J. W. Sheaffer, and K. Skadron. "A Performance
Study of General Purpose Applications on Graphics Processors using CUDA" Journal of
Parallel and Distributed Computing, Elsevier, June 2008.
BFS Examples from Rodinia Benchmark Suite 3.1
The data provided herein are governed by the [LICENSE](./LICENSE).
This diff is collapsed.
This diff is collapsed.
type Node = struct { edge_start: u32; num_edges: u32; };
#[entry]
fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] {
let stop = false;
// The mask selects the set of nodes that we consider in each iteration
// It includes only the nodes that were visited for the first time in the
// prior iteration (and in the first iteration just the source node)
let mask: bool[n];
mask[source as u64] = true;
let visited: bool[n];
visited[source as u64] = true;
let cost: i32[n];
for i in 0..n {
cost[i] = -1;
}
cost[source as u64] = 0;
// Nodes that were updated in the current iteration
let updated: bool[n];
while !stop {
stop = true;
for i in 0..n {
if mask[i] {
mask[i] = false;
let edge_start = graph_nodes[i].edge_start as u64;
let num_edges = graph_nodes[i].num_edges as u64;
for edge in edge_start..edge_start + num_edges {
let id = edges[edge] as u64;
if !visited[id] {
cost[id] = cost[i] + 1;
updated[id] = true;
}
}
}
}
for i in 0..n {
if updated[i] {
mask[i] = true;
visited[i] = true;
stop = false;
updated[i] = false;
}
}
}
return cost;
}
gvn(*);
phi-elim(*);
dce(*);
let outline = auto-outline(bfs);
gpu(outline.bfs);
ip-sroa(*);
sroa(*);
dce(*);
gvn(*);
phi-elim(*);
dce(*);
//forkify(*);
infer-schedules(*);
gcm(*);
fixpoint {
float-collections(*);
dce(*);
gcm(*);
}
use std::fs::File;
use std::io::Read;
use std::str::FromStr;
use nom::Parser;
#[repr(C)]
#[derive(Clone, Default)]
pub struct Node {
pub edge_start: u32,
pub num_edges: u32,
}
pub fn parse_graph(file: String) -> (Vec<Node>, u32, Vec<u32>) {
let mut file = File::open(file).expect("Error opening input file");
let mut contents = String::new();
file.read_to_string(&mut contents)
.expect("Error reading input file");
let mut parser = nom::combinator::all_consuming(graph_parser);
let (_, result) = parser.parse(&contents).expect("Parser error");
result
}
fn graph_parser<'a>(text: &'a str) -> nom::IResult<&'a str, (Vec<Node>, u32, Vec<u32>)> {
// First, we find the number of nodes
let text = nom::character::complete::multispace0(text)?.0;
let (text, num_nodes) = nom::character::complete::digit1(text)?;
let num_nodes = u32::from_str(num_nodes).unwrap();
// Then, for each node there are two numbers: the index of that node's first edge and the
// number of edges that node has
let mut nodes = vec![];
let mut text = text;
for _ in 0..num_nodes {
let ntext = nom::character::complete::multispace0(text)?.0;
let (ntext, edge_start) = nom::character::complete::digit1(ntext)?;
let ntext = nom::character::complete::multispace0(ntext)?.0;
let (ntext, num_edges) = nom::character::complete::digit1(ntext)?;
let edge_start = u32::from_str(edge_start).unwrap();
let num_edges = u32::from_str(num_edges).unwrap();
nodes.push(Node {
edge_start,
num_edges,
});
text = ntext;
}
// Next, we find the source node
let text = nom::character::complete::multispace0(text)?.0;
let (text, source) = nom::character::complete::digit1(text)?;
let source = u32::from_str(source).unwrap();
// Next, the number of edges
let text = nom::character::complete::multispace0(text)?.0;
let (text, num_edges) = nom::character::complete::digit1(text)?;
let num_edges = u32::from_str(num_edges).unwrap();
// Finally, for each edge there are two numbers: the id (i.e. what the edge goes to) and the
// weight which is ignored (weighted BFS can't be parallelized in the same way, it would
// require synchronization)
let mut edges = vec![];
let mut text = text;
for _ in 0..num_edges {
let ntext = nom::character::complete::multispace0(text)?.0;
let (ntext, id) = nom::character::complete::digit1(ntext)?;
let ntext = nom::character::complete::multispace0(ntext)?.0;
let (ntext, _) = nom::character::complete::digit1(ntext)?;
let id = u32::from_str(id).unwrap();
edges.push(id);
text = ntext;
}
// Consume any remaining whitespace
let text = nom::character::complete::multispace0(text)?.0;
Ok((text, (nodes, source, edges)))
}
#![feature(concat_idents)]
mod graph_parser;
mod rust_bfs;
use graph_parser::*;
use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox};
use clap::Parser;
juno_build::juno!("bfs");
#[derive(Parser)]
#[clap(author, version, about, long_about = None)]
struct BFSInputs {
input: String,
}
fn run_bfs(nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> {
let n = nodes.len() as u64;
let m = edges.len() as u64;
let nodes = HerculesImmBox::from(nodes);
let edges = HerculesImmBox::from(edges);
let mut runner = runner!(bfs);
HerculesMutBox::from(async_std::task::block_on(async {
runner.run(n, m, nodes.to(), source, edges.to()).await
}))
.as_slice()
.to_vec()
}
fn bfs_harness(args: BFSInputs) {
let BFSInputs { input } = args;
let (nodes, source, edges) = parse_graph(input);
let costs_juno = run_bfs(&nodes, source, &edges);
let costs_ref = rust_bfs::bfs(&nodes, source, &edges);
assert_eq!(costs_juno, costs_ref);
}
fn main() {
let args = BFSInputs::parse();
bfs_harness(args);
}
#[test]
fn bfs_test_4096() {
bfs_harness(BFSInputs {
input: "data/graph4096.txt".to_string(),
});
}
#[test]
fn bfs_test_65536() {
bfs_harness(BFSInputs {
input: "data/graph65536.txt".to_string(),
});
}
use crate::graph_parser::Node;
use std::collections::VecDeque;
pub fn bfs(graph_nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> {
let mut explored = vec![false; graph_nodes.len()];
let mut costs = vec![-1; graph_nodes.len()];
let mut worklist = VecDeque::new();
let source = source as usize;
explored[source] = true;
costs[source] = 0;
worklist.push_back(source);
while let Some(node) = worklist.pop_front() {
let edge_start = graph_nodes[node].edge_start;
let num_edges = graph_nodes[node].num_edges;
for edge in edge_start..edge_start + num_edges {
let dst = edges[edge as usize] as usize;
if !explored[dst] {
explored[dst] = true;
costs[dst] = costs[node] + 1;
worklist.push_back(dst as usize);
}
}
}
costs
}
[package]
name = "juno_cfd"
version = "0.1.0"
authors = ["Aaron Councilman <aaronjc4@illinois.edu>"]
edition = "2021"
[[bin]]
name = "juno_cfd"
path = "src/main.rs"
[features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
[build-dependencies]
juno_build = { path = "../../../juno_build" }
[dependencies]
juno_build = { path = "../../../juno_build" }
hercules_rt = { path = "../../../hercules_rt" }
async-std = "*"
clap = { version = "*", features = ["derive"] }
with_builtin_macros = "0.1.0"
nom = "*"
use juno_build::JunoCompiler;
fn main() {
#[cfg(feature = "cuda")]
JunoCompiler::new()
.file_in_src("euler.jn")
.unwrap()
.schedule_in_src("gpu_euler.sch")
.unwrap()
.build()
.unwrap();
#[cfg(not(feature = "cuda"))]
JunoCompiler::new()
.file_in_src("euler.jn")
.unwrap()
.schedule_in_src("cpu_euler.sch")
.unwrap()
.build()
.unwrap();
#[cfg(feature = "cuda")]
JunoCompiler::new()
.file_in_src("pre_euler.jn")
.unwrap()
.schedule_in_src("gpu_pre_euler.sch")
.unwrap()
.build()
.unwrap();
#[cfg(not(feature = "cuda"))]
JunoCompiler::new()
.file_in_src("pre_euler.jn")
.unwrap()
.schedule_in_src("cpu_pre_euler.sch")
.unwrap()
.build()
.unwrap();
}
!*.txt
LICENSE TERMS
Copyright (c)2008-2011 University of Virginia
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted without royalty fees or other restrictions, provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the University of Virginia, the Dept. of Computer Science, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF VIRGINIA OR THE SOFTWARE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
If you use this software or a modified version of it, please cite the most relevant among the following papers:
- M. A. Goodrum, M. J. Trotter, A. Aksel, S. T. Acton, and K. Skadron. Parallelization of Particle Filter Algorithms. In Proceedings
of the 3rd Workshop on Emerging Applications and Many-core Architecture (EAMA), in conjunction with the IEEE/ACM International
Symposium on Computer Architecture (ISCA), June 2010.
- S. Che, M. Boyer, J. Meng, D. Tarjan, J. W. Sheaffer, Sang-Ha Lee and K. Skadron.
"Rodinia: A Benchmark Suite for Heterogeneous Computing". IEEE International Symposium
on Workload Characterization, Oct 2009.
- J. Meng and K. Skadron. "Performance Modeling and Automatic Ghost Zone Optimization
for Iterative Stencil Loops on GPUs." In Proceedings of the 23rd Annual ACM International
Conference on Supercomputing (ICS), June 2009.
- L.G. Szafaryn, K. Skadron and J. Saucerman. "Experiences Accelerating MATLAB Systems
Biology Applications." in Workshop on Biomedicine in Computing (BiC) at the International
Symposium on Computer Architecture (ISCA), June 2009.
- M. Boyer, D. Tarjan, S. T. Acton, and K. Skadron. "Accelerating Leukocyte Tracking using CUDA:
A Case Study in Leveraging Manycore Coprocessors." In Proceedings of the International Parallel
and Distributed Processing Symposium (IPDPS), May 2009.
- S. Che, M. Boyer, J. Meng, D. Tarjan, J. W. Sheaffer, and K. Skadron. "A Performance
Study of General Purpose Applications on Graphics Processors using CUDA" Journal of
Parallel and Distributed Computing, Elsevier, June 2008.
CFD Examples from Rodinia Benchmark Suite 3.1
The data provided herein are governed by the [LICENSE](./LICENSE).
This diff is collapsed.
gvn(*);
dce(*);
phi-elim(*);
dce(*);
crc(*);
dce(*);
slf(*);
dce(*);
let auto = auto-outline(euler);
cpu(auto.euler);
inline(auto.euler);
inline(auto.euler);
delete-uncalled(*);
sroa[false](auto.euler);
dce(*);
float-collections(*);
dce(*);
gcm(*);
gvn(*);
dce(*);
phi-elim(*);
dce(*);
crc(*);
dce(*);
slf(*);
dce(*);
let auto = auto-outline(pre_euler);
cpu(auto.pre_euler);
inline(auto.pre_euler);
inline(auto.pre_euler);
delete-uncalled(*);
sroa[false](auto.pre_euler);
dce(*);
float-collections(*);
dce(*);
gcm(*);
const NNB : usize = 4;
type Normals<nelr: usize> = struct {
x: f32[NNB, nelr],
y: f32[NNB, nelr],
z: f32[NNB, nelr],
};
type Momentum<nelr: usize> = struct {
x: f32[nelr],
y: f32[nelr],
z: f32[nelr],
};
type Variables<nelr: usize> = struct {
density: f32[nelr],
momentum: Momentum::<nelr>,
energy: f32[nelr],
};
type float3 = struct { x: f32, y: f32, z: f32 };
type Variable = struct {
density: f32,
momentum: float3,
energy: f32,
};
fn compute_velocity(density: f32, momentum: float3) -> float3 {
return float3 { x: momentum.x / density,
y: momentum.y / density,
z: momentum.z / density };
}
fn compute_speed_sqd(velocity: float3) -> f32 {
return velocity.x * velocity.x + velocity.y * velocity.y + velocity.z * velocity.z;
}
const GAMMA : f32 = 1.4;
fn compute_pressure(density: f32, density_energy: f32, speed_sqd: f32) -> f32 {
return (GAMMA - 1.0) * (density_energy - 0.5 * density * speed_sqd);
}
fn compute_speed_of_sound(density: f32, pressure: f32) -> f32 {
return sqrt!(GAMMA * pressure / density);
}
fn compute_step_factor<nelr: usize>(variables: Variables::<nelr>, areas: f32[nelr]) -> f32[nelr] {
let step_factors : f32[nelr];
for i in 0..nelr {
let density = variables.density[i];
let momentum : float3;
momentum.x = variables.momentum.x[i];
momentum.y = variables.momentum.y[i];
momentum.z = variables.momentum.z[i];
let density_energy = variables.energy[i];
let velocity = compute_velocity(density, momentum);
let speed_sqd = compute_speed_sqd(velocity);
let pressure = compute_pressure(density, density_energy, speed_sqd);
let speed_of_sound = compute_speed_of_sound(density, pressure);
step_factors[i] = 0.5 / (sqrt!(areas[i]) * (sqrt!(speed_sqd) + speed_of_sound));
}
return step_factors;
}
fn compute_flux_contribution(
density: f32,
momentum: float3,
density_energy: f32,
pressure: f32,
velocity: float3,
) -> (float3, float3, float3, float3) {
let fc_momentum_x = float3 { x: velocity.x * momentum.x + pressure,
y: velocity.x * momentum.y,
z: velocity.x * momentum.z };
let fc_momentum_y = float3 { x: fc_momentum_x.y,
y: velocity.y * momentum.y + pressure,
z: velocity.y * momentum.z };
let fc_momentum_z = float3 { x: fc_momentum_x.z,
y: fc_momentum_y.z,
z: velocity.z * momentum.z + pressure };
let de_p = density_energy + pressure;
let fc_density_energy = float3 { x: velocity.x * de_p,
y: velocity.y * de_p,
z: velocity.z * de_p };
return (fc_momentum_x, fc_momentum_y, fc_momentum_z, fc_density_energy);
}
fn compute_flux<nelr: usize>(
variables: Variables::<nelr>,
elements_surrounding_elements: i32[NNB, nelr],
normals: Normals::<nelr>,
ff_variable: Variable,
ff_flux_contribution_density_energy: float3,
ff_flux_contribution_momentum_x: float3,
ff_flux_contribution_momentum_y: float3,
ff_flux_contribution_momentum_z: float3,
) -> Variables::<nelr> {
const smoothing_coefficient : f32 = 0.2;
let fluxes: Variables::<nelr>;
for i in 0..nelr {
let density_i = variables.density[i];
let momentum_i = float3 { x: variables.momentum.x[i],
y: variables.momentum.y[i],
z: variables.momentum.z[i] };
let density_energy_i = variables.energy[i];
let velocity_i = compute_velocity(density_i, momentum_i);
let speed_sqd_i = compute_speed_sqd(velocity_i);
let speed_i = sqrt!(speed_sqd_i);
let pressure_i = compute_pressure(density_i, density_energy_i, speed_sqd_i);
let speed_of_sound_i = compute_speed_of_sound(density_i, pressure_i);
let (flux_contribution_i_momentum_x, flux_contribution_i_momentum_y,
flux_contribution_i_momentum_z, flux_contribution_i_density_energy)
= compute_flux_contribution(density_i, momentum_i, density_energy_i, pressure_i, velocity_i);
let flux_i_density : f32 = 0;
let flux_i_momentum = float3 { x: 0.0, y: 0.0, z: 0.0 };
let flux_i_density_energy : f32 = 0.0;
for j in 0..NNB {
let nb = elements_surrounding_elements[j, i];
let normal = float3 {
x: normals.x[j, i],
y: normals.y[j, i],
z: normals.z[j, i],
};
let normal_len = sqrt!(normal.x*normal.x + normal.y*normal.y + normal.z*normal.z);
if nb >= 0 { // a legitimate neighbor
let nb = nb as usize;
let density_nb = variables.density[nb];
let momentum_nb = float3 {
x: variables.momentum.x[nb],
y: variables.momentum.y[nb],
z: variables.momentum.z[nb],
};
let density_energy_nb = variables.energy[nb];
let velocity_nb = compute_velocity(density_nb, momentum_nb);
let speed_sqd_nb = compute_speed_sqd(velocity_nb);
let pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb);
let speed_of_sound_nb = compute_speed_of_sound(density_nb, pressure_nb);
let (flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y,
flux_contribution_nb_momentum_z, flux_contribution_nb_density_energy)
= compute_flux_contribution(density_nb, momentum_nb, density_energy_nb, pressure_nb, velocity_nb);
// artificial viscosity
let factor = -normal_len * smoothing_coefficient * 0.5
* (speed_i + sqrt!(speed_sqd_nb) + speed_of_sound_i + speed_of_sound_nb);
flux_i_density += factor * (density_i - density_nb);
flux_i_density_energy += factor * (density_energy_i - density_energy_nb);
flux_i_momentum.x += factor * (momentum_i.x - momentum_nb.x);
flux_i_momentum.y += factor * (momentum_i.y - momentum_nb.y);
flux_i_momentum.z += factor * (momentum_i.z - momentum_nb.z);
// accumulate cell-centered fluxes
let factor = 0.5 * normal.x;
flux_i_density += factor * (momentum_nb.x + momentum_i.x);
flux_i_density_energy += factor * (flux_contribution_nb_density_energy.x + flux_contribution_i_density_energy.x);
flux_i_momentum.x += factor * (flux_contribution_nb_momentum_x.x + flux_contribution_i_momentum_x.x);
flux_i_momentum.y += factor * (flux_contribution_nb_momentum_y.x + flux_contribution_i_momentum_y.x);
flux_i_momentum.z += factor * (flux_contribution_nb_momentum_z.x + flux_contribution_i_momentum_z.x);
let factor = 0.5 * normal.y;
flux_i_density += factor * (momentum_nb.y + momentum_i.y);
flux_i_density_energy += factor * (flux_contribution_nb_density_energy.y + flux_contribution_i_density_energy.y);
flux_i_momentum.x += factor * (flux_contribution_nb_momentum_x.y + flux_contribution_i_momentum_x.y);
flux_i_momentum.y += factor * (flux_contribution_nb_momentum_y.y + flux_contribution_i_momentum_y.y);
flux_i_momentum.z += factor * (flux_contribution_nb_momentum_z.y + flux_contribution_i_momentum_z.y);
let factor = 0.5 * normal.z;
flux_i_density += factor * (momentum_nb.z + momentum_i.z);
flux_i_density_energy += factor * (flux_contribution_nb_density_energy.z + flux_contribution_i_density_energy.z);
flux_i_momentum.x += factor * (flux_contribution_nb_momentum_x.z + flux_contribution_i_momentum_x.z);
flux_i_momentum.y += factor * (flux_contribution_nb_momentum_y.z + flux_contribution_i_momentum_y.z);
flux_i_momentum.z += factor * (flux_contribution_nb_momentum_z.z + flux_contribution_i_momentum_z.z);
} else if nb == -1 { // a wing boundary
flux_i_momentum.x += normal.x * pressure_i;
flux_i_momentum.y += normal.y * pressure_i;
flux_i_momentum.z += normal.z * pressure_i;
} else if nb == -2 { // a far field boundary
let factor = 0.5 * normal.x;
flux_i_density += factor * (ff_variable.momentum.x + momentum_i.x);
flux_i_density_energy += factor * (ff_flux_contribution_density_energy.x + flux_contribution_i_density_energy.x);
flux_i_momentum.x += factor * (ff_flux_contribution_momentum_x.x + flux_contribution_i_momentum_x.x);
flux_i_momentum.y += factor * (ff_flux_contribution_momentum_y.x + flux_contribution_i_momentum_y.x);
flux_i_momentum.z += factor * (ff_flux_contribution_momentum_z.x + flux_contribution_i_momentum_z.x);
let factor = 0.5 * normal.y;
flux_i_density += factor * (ff_variable.momentum.y + momentum_i.y);
flux_i_density_energy += factor * (ff_flux_contribution_density_energy.y + flux_contribution_i_density_energy.y);
flux_i_momentum.x += factor * (ff_flux_contribution_momentum_x.y + flux_contribution_i_momentum_x.y);
flux_i_momentum.y += factor * (ff_flux_contribution_momentum_y.y + flux_contribution_i_momentum_y.y);
flux_i_momentum.z += factor * (ff_flux_contribution_momentum_z.y + flux_contribution_i_momentum_z.y);
let factor = 0.5 * normal.z;
flux_i_density += factor * (ff_variable.momentum.y + momentum_i.z);
flux_i_density_energy += factor * (ff_flux_contribution_density_energy.z + flux_contribution_i_density_energy.z);
flux_i_momentum.x += factor * (ff_flux_contribution_momentum_x.z + flux_contribution_i_momentum_x.z);
flux_i_momentum.y += factor * (ff_flux_contribution_momentum_y.z + flux_contribution_i_momentum_y.z);
flux_i_momentum.z += factor * (ff_flux_contribution_momentum_z.z + flux_contribution_i_momentum_z.z);
}
}
fluxes.density[i] = flux_i_density;
fluxes.momentum.x[i] = flux_i_momentum.x;
fluxes.momentum.y[i] = flux_i_momentum.y;
fluxes.momentum.z[i] = flux_i_momentum.z;
fluxes.energy[i] = flux_i_density_energy;
}
return fluxes;
}
const RK : usize = 3;
fn time_step<nelr: usize>(
j: usize,
old_variables: Variables::<nelr>,
variables: Variables::<nelr>,
step_factors: f32[nelr],
fluxes: Variables::<nelr>,
) -> Variables::<nelr> {
for i in 0..nelr {
let factor = step_factors[i] / (RK + 1 - j) as f32;
variables.density[i] = old_variables.density[i] + factor * fluxes.density[i];
variables.momentum.x[i] = old_variables.momentum.x[i] + factor * fluxes.momentum.x[i];
variables.momentum.y[i] = old_variables.momentum.y[i] + factor * fluxes.momentum.y[i];
variables.momentum.z[i] = old_variables.momentum.z[i] + factor * fluxes.momentum.z[i];
variables.energy[i] = old_variables.energy[i] + factor * fluxes.energy[i];
}
return variables;
}
fn copy_vars<nelr: usize>(variables: Variables::<nelr>) -> Variables::<nelr> {
let result : Variables::<nelr>;
for i in 0..nelr {
result.density[i] = variables.density[i];
result.momentum.x[i] = variables.momentum.x[i];
result.momentum.y[i] = variables.momentum.y[i];
result.momentum.z[i] = variables.momentum.z[i];
result.energy[i] = variables.energy[i];
}
return result;
}
#[entry]
fn euler<nelr: usize>(
iterations: usize,
variables: Variables::<nelr>,
areas: f32[nelr],
elements_surrounding_elements: i32[NNB, nelr],
normals: Normals::<nelr>,
ff_variable: Variable,
ff_flux_contribution_density_energy: float3,
ff_flux_contribution_momentum_x: float3,
ff_flux_contribution_momentum_y: float3,
ff_flux_contribution_momentum_z: float3,
) -> Variables::<nelr> {
for i in 0..iterations {
let old_variables = copy_vars::<nelr>(variables);
let step_factors = compute_step_factor::<nelr>(variables, areas);
for j in 0..RK {
let fluxes = compute_flux::<nelr>(variables, elements_surrounding_elements,
normals, ff_variable, ff_flux_contribution_density_energy,
ff_flux_contribution_momentum_x,
ff_flux_contribution_momentum_y,
ff_flux_contribution_momentum_z);
variables = time_step::<nelr>(j, old_variables, variables, step_factors, fluxes);
}
}
return variables;
}
gvn(*);
dce(*);
phi-elim(*);
dce(*);
crc(*);
dce(*);
slf(*);
dce(*);
let auto = auto-outline(euler);
gpu(auto.euler);
inline(auto.euler);
inline(auto.euler);
delete-uncalled(*);
sroa[false](auto.euler);
dce(*);
float-collections(*);
dce(*);
gcm(*);