diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index af867eb8e4752b67cfb7cd503a343cd27ee13d91..7a14950ee0416e427b5aeceaacd4b5f68b014fba 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,9 +1,9 @@ test-cpu: stage: test script: - - cargo test --features=opencv -vv + - cargo test --features=opencv -vv -- --nocapture test-gpu: stage: test script: - - cargo test --features=cuda,opencv -vv + - cargo test --features=cuda,opencv -vv -- --nocapture diff --git a/Cargo.lock b/Cargo.lock index 32dc6a0ee754c243e4d1b7f45892190cf54c1d7c..fdcbaf8426dd64fca782b34240256cf149657303 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,9 +10,9 @@ checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "aho-corasick" -version = "0.7.15" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -29,6 +29,12 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.18" @@ -102,12 +108,6 @@ dependencies = [ "syn 2.0.98", ] -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - [[package]] name = "arrayvec" version = "0.7.6" @@ -255,7 +255,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf" dependencies = [ "anyhow", - "arrayvec 0.7.6", + "arrayvec", "log", "nom 7.1.3", "num-rational", @@ -268,7 +268,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e335041290c43101ca215eed6f43ec437eb5a42125573f600fc3fa42b9bddd62" dependencies = [ - "arrayvec 0.7.6", + "arrayvec", ] [[package]] @@ -310,28 +310,16 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2" -[[package]] -name = "bitvec" -version = "0.19.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55f93d0ef3363c364d5976646a38f04cf67cfe1d4c8d160cdea02cab2c116b33" -dependencies = [ - "funty 1.1.0", - "radium 0.5.3", - "tap", - "wyz 0.2.0", -] - [[package]] name = "bitvec" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" dependencies = [ - "funty 2.0.0", - "radium 0.7.0", + "funty", + "radium", "tap", - "wyz 0.5.1", + "wyz", ] [[package]] @@ -349,9 +337,9 @@ dependencies = [ [[package]] name = "built" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73848a43c5d63a1251d17adf6c2bf78aa94830e60a335a95eeea45d6ba9e1e4d" +checksum = "56ed6191a7e78c36abdb16ab65341eefd73d64d303fffccdbb00d51e4205967b" [[package]] name = "bumpalo" @@ -394,11 +382,17 @@ dependencies = [ "with_builtin_macros", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" -version = "1.2.13" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7777341816418c02e033934a09f20dc0ccaf65a5201ef8a450ae0105a573fda" +checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" dependencies = [ "jobserver", "libc", @@ -446,6 +440,33 @@ dependencies = [ "vob", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clang" version = "2.0.0" @@ -469,9 +490,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.29" +version = "4.5.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acebd8ad879283633b343856142139f2da2317c96b05b4dd6181c61e2480184" +checksum = "92b7b18d71fad5313a1e320fa9897994228ce274b60faa4d694fe0ea89cd9e6d" dependencies = [ "clap_builder", "clap_derive", @@ -479,9 +500,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.29" +version = "4.5.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ba32cbda51c7e1dfd49acc1457ba1a7dec5b64fe360e828acb13ca8dc9c2f9" +checksum = "a35db2071778a7344791a4fb4f95308b5673d219dee3ae348b86642574ecc90c" dependencies = [ "anstream", "anstyle", @@ -495,7 +516,7 @@ version = "4.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn 2.0.98", @@ -543,6 +564,42 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "critical-section" version = "1.2.0" @@ -677,9 +734,9 @@ dependencies = [ [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" @@ -794,12 +851,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" -[[package]] -name = "funty" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" - [[package]] name = "funty" version = "2.0.0" @@ -944,12 +995,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -960,7 +1005,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" name = "hercules_cg" version = "0.1.0" dependencies = [ - "bitvec 1.0.1", + "bitvec", "hercules_ir", "ordered-float", "rand 0.9.0", @@ -971,7 +1016,7 @@ dependencies = [ name = "hercules_interpreter" version = "0.1.0" dependencies = [ - "bitvec 1.0.1", + "bitvec", "clap", "derive_more", "hercules_ir", @@ -988,9 +1033,9 @@ dependencies = [ name = "hercules_ir" version = "0.1.0" dependencies = [ - "bitvec 1.0.1", + "bitvec", "either", - "nom 6.2.2", + "nom 8.0.0", "ordered-float", "rand 0.9.0", "serde", @@ -1001,7 +1046,7 @@ name = "hercules_opt" version = "0.1.0" dependencies = [ "bimap", - "bitvec 1.0.1", + "bitvec", "egg", "either", "hercules_cg", @@ -1025,7 +1070,7 @@ version = "0.1.0" name = "hercules_tests" version = "0.1.0" dependencies = [ - "bitvec 1.0.1", + "bitvec", "clap", "hercules_interpreter", "hercules_ir", @@ -1102,12 +1147,32 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "is-terminal" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -1175,7 +1240,7 @@ dependencies = [ "clap", "hercules_rt", "juno_build", - "nom 6.2.2", + "nom 8.0.0", "rand 0.9.0", "with_builtin_macros", ] @@ -1188,7 +1253,7 @@ dependencies = [ "clap", "hercules_rt", "juno_build", - "nom 6.2.2", + "nom 8.0.0", "with_builtin_macros", ] @@ -1217,6 +1282,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "image", "juno_build", @@ -1231,7 +1297,7 @@ dependencies = [ "clap", "hercules_rt", "juno_build", - "nom 6.2.2", + "nom 8.0.0", "with_builtin_macros", ] @@ -1272,6 +1338,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "opencv", @@ -1424,7 +1491,7 @@ dependencies = [ "clap", "hercules_rt", "juno_build", - "nom 6.2.2", + "nom 8.0.0", "with_builtin_macros", ] @@ -1456,19 +1523,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" -[[package]] -name = "lexical-core" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" -dependencies = [ - "arrayvec 0.5.2", - "bitflags 1.3.2", - "cfg-if", - "ryu", - "static_assertions", -] - [[package]] name = "libc" version = "0.2.169" @@ -1553,7 +1607,7 @@ dependencies = [ "num-traits", "quote", "regex", - "regex-syntax 0.8.5", + "regex-syntax", "serde", "vergen", ] @@ -1618,9 +1672,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.3.4" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "minimal-lexical" @@ -1658,25 +1712,21 @@ checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] name = "nom" -version = "6.2.2" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6a7a9657c84d5814c6196b68bb4429df09c18b1573806259fba397ea4ad0d44" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ - "bitvec 0.19.6", - "funty 1.1.0", - "lexical-core", "memchr", - "version_check", + "minimal-lexical", ] [[package]] name = "nom" -version = "7.1.3" +version = "8.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" dependencies = [ "memchr", - "minimal-lexical", ] [[package]] @@ -1756,6 +1806,12 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + [[package]] name = "opencv" version = "0.94.2" @@ -1793,9 +1849,9 @@ dependencies = [ [[package]] name = "ordered-float" -version = "4.6.0" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" dependencies = [ "num-traits", "rand 0.8.5", @@ -1901,6 +1957,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "png" version = "0.17.16" @@ -2058,12 +2142,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "radium" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" - [[package]] name = "radium" version = "0.7.0" @@ -2089,8 +2167,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.0", - "zerocopy 0.8.17", + "rand_core 0.9.1", + "zerocopy 0.8.18", ] [[package]] @@ -2110,7 +2188,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.0", + "rand_core 0.9.1", ] [[package]] @@ -2125,12 +2203,12 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" +checksum = "a88e0da7a2c97baa202165137c158d0a2e824ac465d13d81046727b34cb247d3" dependencies = [ "getrandom 0.3.1", - "zerocopy 0.8.17", + "zerocopy 0.8.18", ] [[package]] @@ -2141,7 +2219,7 @@ checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9" dependencies = [ "arbitrary", "arg_enum_proc_macro", - "arrayvec 0.7.6", + "arrayvec", "av1-grain", "bitstream-io", "built", @@ -2223,20 +2301,26 @@ dependencies = [ [[package]] name = "regex" -version = "1.4.6" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.6.29", + "regex-automata", + "regex-syntax", ] [[package]] -name = "regex-syntax" -version = "0.6.29" +name = "regex-automata" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] [[package]] name = "regex-syntax" @@ -2290,6 +2374,15 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "saturating" version = "0.1.0" @@ -2328,6 +2421,18 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "serde_json" +version = "1.0.138" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + [[package]] name = "serde_spanned" version = "0.6.8" @@ -2384,9 +2489,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" [[package]] name = "sparsevec" @@ -2468,12 +2573,12 @@ dependencies = [ [[package]] name = "system-deps" -version = "6.1.1" +version = "6.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30c2de8a4d8f4b823d634affc9cd2a74ec98c53a756f317e529a48046cbf71f3" +checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349" dependencies = [ "cfg-expr", - "heck 0.4.1", + "heck", "pkg-config", "toml", "version-compare", @@ -2499,9 +2604,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.16.0" +version = "3.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91" +checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" dependencies = [ "cfg-if", "fastrand", @@ -2575,11 +2680,21 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "toml" -version = "0.7.6" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17e963a819c331dcacd7ab957d80bc2b9a9c1e71c804826d2f283dd65306542" +checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148" dependencies = [ "serde", "serde_spanned", @@ -2598,9 +2713,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.19.12" +version = "0.22.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c500344a19072298cd05a7224b3c0c629348b78692bf48466c5238656e315a78" +checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" dependencies = [ "indexmap", "serde", @@ -2685,9 +2800,9 @@ dependencies = [ [[package]] name = "version-compare" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "579a42fc0b8e0c63b76519a339be31bed574929511fa53c1a3acae26eb258f29" +checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" [[package]] name = "version_check" @@ -2706,6 +2821,16 @@ dependencies = [ "serde", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -2824,6 +2949,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -3032,9 +3166,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "winnow" -version = "0.4.7" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca0ace3845f0d96209f0375e6d367e3eb87eb65d27d445bdc9f1843a26f39448" +checksum = "59690dea168f2198d1a3b0cac23b8063efcd11012f10ae4698f284808c8ef603" dependencies = [ "memchr", ] @@ -3068,12 +3202,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "wyz" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" - [[package]] name = "wyz" version = "0.5.1" @@ -3095,11 +3223,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.17" +version = "0.8.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa91407dacce3a68c56de03abe2760159582b846c6a4acd2f456618087f12713" +checksum = "79386d31a42a4996e3336b0919ddb90f81112af416270cff95b5f5af22b839c2" dependencies = [ - "zerocopy-derive 0.8.17", + "zerocopy-derive 0.8.18", ] [[package]] @@ -3115,9 +3243,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.17" +version = "0.8.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06718a168365cad3d5ff0bb133aad346959a2074bd4a85c121255a11304a8626" +checksum = "76331675d372f91bf8d17e13afbd5fe639200b73d01f0fc748bb059f9cca2db7" dependencies = [ "proc-macro2", "quote", diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs index 5edddd86df2901c29109148e2b107c0b923dbd0e..bd1520745c72f9300ba750cf48e2c86c4bee978e 100644 --- a/hercules_cg/src/rt.rs +++ b/hercules_cg/src/rt.rs @@ -560,23 +560,33 @@ impl<'a> RTContext<'a> { // same interface as AsyncRust functions. let block = &mut blocks.get_mut(&bb).unwrap().data; let is_async = func.schedules[id.idx()].contains(&Schedule::AsyncCall); + if is_async { + for arg in args { + if let Some(arc) = self.clone_arc(*arg, false) { + write!(block, "{}", arc)?; + } + } + } let device = self.devices[callee_id.idx()]; let prefix = match (device, is_async) { - (Device::AsyncRust, false) => "", - (_, false) => "", - (Device::AsyncRust, true) => "Some(::async_std::task::spawn(", - (_, true) => "Some(::async_std::task::spawn(async move {", + (Device::AsyncRust, false) | (_, false) => { + format!("{} = ", self.get_value(id, bb, true)) + } + (_, true) => format!( + "{}::async_std::task::spawn(async move {{ async_call_sender_{}.send(", + self.clone_arc(id, true).unwrap(), + id.idx() + ), }; let postfix = match (device, is_async) { (Device::AsyncRust, false) => ".await", (_, false) => "", - (Device::AsyncRust, true) => "))", - (_, true) => "}))", + (Device::AsyncRust, true) => ".await).await})", + (_, true) => ").await})", }; write!( block, - "{} = {}{}(", - self.get_value(id, bb, true), + "{}{}(", prefix, self.module.functions[callee_id.idx()].name )?; @@ -1069,11 +1079,15 @@ impl<'a> RTContext<'a> { } // If the node is a call with an AsyncCall schedule, it should be - // spawned as a task and awaited later. + // lowered to a channel. let is_async_call = func.nodes[idx].is_call() && func.schedules[idx].contains(&Schedule::AsyncCall); if is_async_call { - write!(w, "let mut async_call_{} = None;", idx)?; + write!( + w, + "let mut async_call_channel_{} = ::async_std::channel::bounded(1);let async_call_sender_{} = ::std::sync::Arc::new(async_call_channel_{}.0);let async_call_receiver_{} = ::std::sync::Arc::new(async_call_channel_{}.1);", + idx, idx, idx, idx, idx + )?; } else { write!( w, @@ -1356,16 +1370,30 @@ impl<'a> RTContext<'a> { } else if func.nodes[id.idx()].is_call() && func.schedules[id.idx()].contains(&Schedule::AsyncCall) { - format!( - "async_call_{}{}", - id.idx(), - if lhs { "" } else { ".unwrap().await" } - ) + assert!(!lhs); + format!("async_call_receiver_{}.recv().await.unwrap()", id.idx(),) } else { format!("node_{}", id.idx()) } } + fn clone_arc(&self, id: NodeID, lhs: bool) -> Option<String> { + let func = self.get_func(); + if func.nodes[id.idx()].is_call() && func.schedules[id.idx()].contains(&Schedule::AsyncCall) + { + let kind = if lhs { "sender" } else { "receiver" }; + Some(format!( + "let async_call_{}_{} = async_call_{}_{}.clone();", + kind, + id.idx(), + kind, + id.idx() + )) + } else { + None + } + } + fn get_type(&self, id: TypeID) -> &'static str { convert_type(&self.module.types[id.idx()]) } diff --git a/hercules_ir/Cargo.toml b/hercules_ir/Cargo.toml index 0d6a25fb3884f0512b6defaba1c3255951e701a7..26950d4b7700d19326e6ea61aa2488b4c5d5df59 100644 --- a/hercules_ir/Cargo.toml +++ b/hercules_ir/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] rand = "*" -nom = "6.2.2" +nom = "*" ordered-float = { version = "*", features = ["serde"] } bitvec = "*" serde = { version = "*", features = ["derive"] } diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs index b41b1f6fca55e05262135e9b159029f103f41078..a019f4d37d2ebfa645d8c73729e93e49290c13d3 100644 --- a/hercules_ir/src/parse.rs +++ b/hercules_ir/src/parse.rs @@ -3,6 +3,8 @@ use std::collections::{HashMap, HashSet}; use std::ops::Deref; use std::str::FromStr; +use nom::Parser; + use crate::*; /* @@ -128,9 +130,8 @@ fn parse_module<'a>(ir_text: &'a str, context: Context<'a>) -> nom::IResult<&'a // If there is any text left after successfully parsing some functions, // treat that as an error. let (rest, functions) = - nom::combinator::all_consuming(nom::multi::many0(|x| parse_function(x, &context)))( - ir_text, - )?; + nom::combinator::all_consuming(nom::multi::many0(|x| parse_function(x, &context))) + .parse(ir_text)?; let mut context = context.into_inner(); // Functions, as returned by parsing, is in parse order, which may differ @@ -215,7 +216,7 @@ fn parse_function<'a>( Ok((ir_text, num_dynamic_constants)) }; let (ir_text, num_dynamic_constants) = - nom::combinator::opt(parse_num_dynamic_constants)(ir_text)?; + nom::combinator::opt(parse_num_dynamic_constants).parse(ir_text)?; // If unspecified, assumed function has no dynamic constant arguments. let num_dynamic_constants = num_dynamic_constants.unwrap_or(0); @@ -223,7 +224,7 @@ fn parse_function<'a>( let ir_text = nom::character::complete::char('(')(ir_text)?.0; let (ir_text, params) = nom::multi::separated_list0( nom::character::complete::char(','), - nom::sequence::tuple(( + ( nom::character::complete::multispace0, parse_identifier, nom::character::complete::multispace0, @@ -231,8 +232,9 @@ fn parse_function<'a>( nom::character::complete::multispace0, |x| parse_type_id(x, context), nom::character::complete::multispace0, - )), - )(ir_text)?; + ), + ) + .parse(ir_text)?; // The start node is not explicitly specified in the textual IR, so create // it manually. @@ -252,8 +254,8 @@ fn parse_function<'a>( nom::character::complete::multispace0, )), |text| parse_type_id(text, context), - )(ir_text)?; - let (ir_text, nodes) = nom::multi::many1(|x| parse_node(x, context))(ir_text)?; + ).parse(ir_text)?; + let (ir_text, nodes) = nom::multi::many1(|x| parse_node(x, context)).parse(ir_text)?; // `nodes`, as returned by parsing, is in parse order, which may differ from // the order dictated by NodeIDs in the node name intern map. @@ -391,11 +393,11 @@ fn parse_region<'a>( // explicitly using nom's separated list functionality. This example here // is a bit of an abuse of what parse_tupleN functions are meant for. let (ir_text, (preds,)) = parse_tuple1(nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), parse_identifier, ))(ir_text)?; @@ -419,11 +421,11 @@ fn parse_fork<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes let (ir_text, (control, factors)) = parse_tuple2( parse_identifier, nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), |x| parse_dynamic_constant_id(x, context), ), )(ir_text)?; @@ -448,11 +450,11 @@ fn parse_phi<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResu let (ir_text, (control, data)) = parse_tuple2( parse_identifier, nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), parse_identifier, ), )(ir_text)?; @@ -599,29 +601,32 @@ fn parse_call<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes let ir_text = nom::character::complete::char('<')(ir_text)?.0; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let (ir_text, dynamic_constants) = nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), |x| parse_dynamic_constant_id(x, context), - )(ir_text)?; + ) + .parse(ir_text)?; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let ir_text = nom::character::complete::char('>')(ir_text)?.0; Ok((ir_text, dynamic_constants)) }; - let (ir_text, dynamic_constants) = nom::combinator::opt(parse_dynamic_constants)(ir_text)?; + let (ir_text, dynamic_constants) = + nom::combinator::opt(parse_dynamic_constants).parse(ir_text)?; let dynamic_constants = dynamic_constants.unwrap_or(vec![]); let ir_text = nom::character::complete::char('(')(ir_text)?.0; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let (ir_text, mut function_and_args) = nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), parse_identifier, - )(ir_text)?; + ) + .parse(ir_text)?; let function = function_and_args.remove(0); let mut args: Vec<NodeID> = function_and_args .into_iter() @@ -652,13 +657,14 @@ fn parse_intrinsic<'a>( let ir_text = nom::character::complete::char('(')(ir_text)?.0; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let (ir_text, mut intrinsic_and_args) = nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), parse_identifier, - )(ir_text)?; + ) + .parse(ir_text)?; let intrinsic = intrinsic_and_args.remove(0); let args: Vec<NodeID> = intrinsic_and_args .into_iter() @@ -685,7 +691,7 @@ fn parse_index<'a>( ) -> nom::IResult<&'a str, Index> { let (ir_text, idx) = nom::branch::alt(( nom::combinator::map( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::bytes::complete::tag("field"), nom::character::complete::multispace0, @@ -695,11 +701,11 @@ fn parse_index<'a>( nom::character::complete::multispace0, nom::character::complete::char(')'), nom::character::complete::multispace0, - )), + ), |(_, _, _, _, _, x, _, _, _)| Index::Field(x), ), nom::combinator::map( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::bytes::complete::tag("variant"), nom::character::complete::multispace0, @@ -709,28 +715,28 @@ fn parse_index<'a>( nom::character::complete::multispace0, nom::character::complete::char(')'), nom::character::complete::multispace0, - )), + ), |(_, _, _, _, _, x, _, _, _)| Index::Variant(x), ), nom::combinator::map( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::bytes::complete::tag("position"), nom::character::complete::multispace0, nom::character::complete::char('('), nom::character::complete::multispace0, nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), parse_identifier, ), nom::character::complete::multispace0, nom::character::complete::char(')'), nom::character::complete::multispace0, - )), + ), |(_, _, _, _, _, x, _, _, _)| { Index::Position( x.into_iter() @@ -739,7 +745,8 @@ fn parse_index<'a>( ) }, ), - ))(ir_text)?; + )) + .parse(ir_text)?; Ok((ir_text, idx)) } @@ -784,13 +791,14 @@ fn parse_read<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes let ir_text = nom::character::complete::char(',')(ir_text)?.0; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let (ir_text, indices) = nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), |x| parse_index(x, context), - )(ir_text)?; + ) + .parse(ir_text)?; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let ir_text = nom::character::complete::char(')')(ir_text)?.0; let collect = context.borrow_mut().get_node_id(collect); @@ -819,13 +827,14 @@ fn parse_write<'a>( let ir_text = nom::character::complete::char(',')(ir_text)?.0; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let (ir_text, indices) = nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), |x| parse_index(x, context), - )(ir_text)?; + ) + .parse(ir_text)?; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let ir_text = nom::character::complete::char(')')(ir_text)?.0; let collect = context.borrow_mut().get_node_id(collect); @@ -884,48 +893,48 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes nom::combinator::map(nom::bytes::complete::tag("f64"), |_| Type::Float64), // Product types are parsed as a list of their element types. nom::combinator::map( - nom::sequence::tuple(( + ( nom::bytes::complete::tag("prod"), nom::character::complete::multispace0, nom::character::complete::char('('), nom::character::complete::multispace0, nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), |x| parse_type_id(x, context), ), nom::character::complete::multispace0, nom::character::complete::char(')'), - )), + ), |(_, _, _, _, ids, _, _)| Type::Product(ids.into_boxed_slice()), ), // Sum types are parsed as a list of their variant types. nom::combinator::map( - nom::sequence::tuple(( + ( nom::bytes::complete::tag("sum"), nom::character::complete::multispace0, nom::character::complete::char('('), nom::character::complete::multispace0, nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), |x| parse_type_id(x, context), ), nom::character::complete::multispace0, nom::character::complete::char(')'), - )), + ), |(_, _, _, _, ids, _, _)| Type::Summation(ids.into_boxed_slice()), ), // Array types are just a list of an element type and at least one // dynamic constant representing its extent. nom::combinator::map( - nom::sequence::tuple(( + ( nom::bytes::complete::tag("array"), nom::character::complete::multispace0, nom::character::complete::char('('), @@ -935,21 +944,22 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes nom::character::complete::char(','), nom::character::complete::multispace0, nom::multi::separated_list1( - nom::sequence::tuple(( + ( nom::character::complete::multispace0, nom::character::complete::char(','), nom::character::complete::multispace0, - )), + ), |x| parse_dynamic_constant_id(x, context), ), nom::character::complete::multispace0, nom::character::complete::char(')'), - )), + ), |(_, _, _, _, ty_id, _, _, _, dc_ids, _, _)| { Type::Array(ty_id, dc_ids.into_boxed_slice()) }, ), - ))(ir_text)?; + )) + .parse(ir_text)?; Ok((ir_text, ty)) } @@ -980,20 +990,20 @@ fn parse_dynamic_constant<'a>( // Parameter dynamic constants of a function are written by preprending // a '#' to the parameter's number. nom::combinator::map( - nom::sequence::tuple((nom::character::complete::char('#'), |x| { + (nom::character::complete::char('#'), |x| { parse_prim::<usize>(x, "1234567890") - })), + }), |(_, x)| DynamicConstant::Parameter(x), ), // Dynamic constant math is written using a prefix function nom::combinator::map( - nom::sequence::tuple(( + ( nom::character::complete::one_of("+-*/%"), parse_tuple2( |x| parse_dynamic_constant_id(x, context), |x| parse_dynamic_constant_id(x, context), ), - )), + ), |(op, (x, y))| match op { '+' => DynamicConstant::Add(vec![x, y]), '-' => DynamicConstant::Sub(x, y), @@ -1003,7 +1013,8 @@ fn parse_dynamic_constant<'a>( _ => panic!("Invalid parse"), }, ), - ))(ir_text)?; + )) + .parse(ir_text)?; Ok((ir_text, dc)) } @@ -1081,7 +1092,8 @@ fn parse_boolean<'a>(ir_text: &'a str) -> nom::IResult<&'a str, Constant> { let (ir_text, val) = nom::branch::alt(( nom::combinator::map(nom::bytes::complete::tag("false"), |_| false), nom::combinator::map(nom::bytes::complete::tag("true"), |_| true), - ))(ir_text)?; + )) + .parse(ir_text)?; Ok((ir_text, Constant::Boolean(val))) } @@ -1218,7 +1230,8 @@ fn parse_identifier<'a>(ir_text: &'a str) -> nom::IResult<&'a str, &'a str> { "1234567890_@ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", ), |s: &str| s.len() > 0, - )(ir_text) + ) + .parse(ir_text) } /* @@ -1226,7 +1239,7 @@ fn parse_identifier<'a>(ir_text: &'a str) -> nom::IResult<&'a str, &'a str> { */ fn parse_tuple1<'a, A, AF>(mut parse_a: AF) -> impl FnMut(&'a str) -> nom::IResult<&'a str, (A,)> where - AF: nom::Parser<&'a str, A, nom::error::Error<&'a str>>, + AF: nom::Parser<&'a str, Output = A, Error = nom::error::Error<&'a str>>, { move |ir_text: &'a str| { let ir_text = nom::character::complete::multispace0(ir_text)?.0; @@ -1244,8 +1257,8 @@ fn parse_tuple2<'a, A, B, AF, BF>( mut parse_b: BF, ) -> impl FnMut(&'a str) -> nom::IResult<&'a str, (A, B)> where - AF: nom::Parser<&'a str, A, nom::error::Error<&'a str>>, - BF: nom::Parser<&'a str, B, nom::error::Error<&'a str>>, + AF: nom::Parser<&'a str, Output = A, Error = nom::error::Error<&'a str>>, + BF: nom::Parser<&'a str, Output = B, Error = nom::error::Error<&'a str>>, { move |ir_text: &'a str| { let ir_text = nom::character::complete::multispace0(ir_text)?.0; @@ -1268,9 +1281,9 @@ fn parse_tuple3<'a, A, B, C, AF, BF, CF>( mut parse_c: CF, ) -> impl FnMut(&'a str) -> nom::IResult<&'a str, (A, B, C)> where - AF: nom::Parser<&'a str, A, nom::error::Error<&'a str>>, - BF: nom::Parser<&'a str, B, nom::error::Error<&'a str>>, - CF: nom::Parser<&'a str, C, nom::error::Error<&'a str>>, + AF: nom::Parser<&'a str, Output = A, Error = nom::error::Error<&'a str>>, + BF: nom::Parser<&'a str, Output = B, Error = nom::error::Error<&'a str>>, + CF: nom::Parser<&'a str, Output = C, Error = nom::error::Error<&'a str>>, { move |ir_text: &'a str| { let ir_text = nom::character::complete::multispace0(ir_text)?.0; diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index 446b31849b20fd3fa3d5361a74c15f0514d1dbad..f240589300520d680c1e126ba93728977f1d17e8 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -486,11 +486,10 @@ fn basic_blocks( // Look between the LCA and the schedule early location to place the // node. let schedule_early = schedule_early[id.idx()].unwrap(); + // If the node has no users, then it doesn't really matter where we + // place it - just place it at the early placement. let schedule_late = lca.unwrap_or(schedule_early); - let mut chain = dom - // If the node has no users, then it doesn't really matter where we - // place it - just place it at the early placement. - .chain(schedule_late, schedule_early); + let mut chain = dom.chain(schedule_late, schedule_early); if let Some(mut location) = chain.next() { while let Some(control_node) = chain.next() { diff --git a/hercules_opt/src/loop_bound_canon.rs b/hercules_opt/src/loop_bound_canon.rs index a1ad625785aae6e0628cda3816a70f32aca43ef1..edda6b63cb033cae86b722109c8f6b57f530639b 100644 --- a/hercules_opt/src/loop_bound_canon.rs +++ b/hercules_opt/src/loop_bound_canon.rs @@ -64,8 +64,7 @@ pub fn canonicalize_single_loop_bounds( let ivs = compute_iv_ranges(editor, l, ivs, &loop_condition); if has_canonical_iv(editor, l, &ivs).is_some() { - // println!("has canon iv!"); - return true; + return false; } let loop_bound_iv_phis = get_loop_condition_ivs(editor, l, &ivs, &loop_condition); @@ -76,7 +75,6 @@ pub fn canonicalize_single_loop_bounds( // Assume there is only one loop bound iv. if loop_bound_ivs.len() != 1 { - // println!("has multiple iv!"); return false; } @@ -223,25 +221,15 @@ pub fn canonicalize_single_loop_bounds( // If increment is negative (how in the world do we know that...) // Increment can be DefinetlyPostiive, Unknown, DefinetlyNegative. - // // First, massage loop condition to be <, because that is normal! - // Also includes - // editor.edit(|mut edit| { - - // } - // Collect immediate IV users - let update_expr_users: Vec<_> = editor .get_users(*update_expression) .filter(|node| *node != iv.phi() && *node != condition_node) .collect(); - // println!("update_expr_users: {:?}", update_expr_users); let iv_phi_users: Vec<_> = editor .get_users(iv.phi()) .filter(|node| *node != iv.phi() && *node != *update_expression) .collect(); - // println!(" iv_phi_users: {:?}", iv_phi_users); - let result = editor.edit(|mut edit| { // 4) Second, change loop IV to go from 0..N. // we subtract off init from init and dc_bound_node, @@ -268,17 +256,19 @@ pub fn canonicalize_single_loop_bounds( edit.replace_all_uses_where(bound_id, new_condition, |usee| *usee == binop_node)?; } - // Add back to uses of the IV - for user in update_expr_users { - let new_user = Node::Binary { - left: user, - right: *initializer, - op: BinaryOperator::Add, - }; - let new_user = edit.add_node(new_user); - edit = edit.replace_all_uses(user, new_user)?; - } + // for user in update_expr_users { + // let new_user = Node::Binary { + // left: user, + // right: *initializer, + // op: BinaryOperator::Add, + // }; + // let new_user = edit.add_node(new_user); + // edit = edit.replace_all_uses(user, new_user)?; + // } + + // for + // Add the offset back to users of the IV update expression let new_user = Node::Binary { left: *update_expression, right: *initializer, @@ -292,6 +282,7 @@ pub fn canonicalize_single_loop_bounds( && *usee != condition_node })?; + // Add the offset back to users of the IV directly let new_user = Node::Binary { left: *iv_phi, right: *initializer, diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs index 419a760fa49647c28295625bd7df9db0e87707c1..090a38a02cbbcd46253452f76a0b71681363c833 100644 --- a/hercules_rt/src/lib.rs +++ b/hercules_rt/src/lib.rs @@ -1,6 +1,6 @@ #![feature(once_cell_try)] -use std::alloc::{alloc, dealloc, Layout}; +use std::alloc::{alloc, dealloc, GlobalAlloc, Layout, System}; use std::marker::PhantomData; use std::ptr::{copy_nonoverlapping, write_bytes, NonNull}; use std::slice::{from_raw_parts, from_raw_parts_mut}; @@ -867,3 +867,24 @@ impl<'a, T> HerculesRefInto<'a> for Box<[T]> { HerculesCPURef::from_slice(self) } } + +/* + * We need all allocations to be aligned to LARGEST_ALIGNMENT bytes for + * vectorization. This is the easiest way to do that. + */ +pub struct AlignedAlloc; + +unsafe impl GlobalAlloc for AlignedAlloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + let layout = layout.align_to(LARGEST_ALIGNMENT).unwrap(); + System.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + let layout = layout.align_to(LARGEST_ALIGNMENT).unwrap(); + System.dealloc(ptr, layout) + } +} + +#[global_allocator] +static A: AlignedAlloc = AlignedAlloc; diff --git a/hercules_samples/dot/src/main.rs b/hercules_samples/dot/src/main.rs index 7f5b453ab426f1ce0ab220682ce6be89bf851305..1f28cee28241827277f8836e963a0d80edeb5abc 100644 --- a/hercules_samples/dot/src/main.rs +++ b/hercules_samples/dot/src/main.rs @@ -2,36 +2,20 @@ #[cfg(feature = "cuda")] use hercules_rt::CUDABox; -use hercules_rt::{runner, HerculesCPURef}; +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo}; juno_build::juno!("dot"); fn main() { async_std::task::block_on(async { - #[cfg(not(feature = "cuda"))] - { - let a: [f32; 8] = [0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0]; - let a = HerculesCPURef::from_slice(&a); - let b: [f32; 8] = [0.0, 5.0, 0.0, 6.0, 0.0, 7.0, 0.0, 8.0]; - let b = HerculesCPURef::from_slice(&b); - let mut r = runner!(dot); - let c = r.run(8, a, b).await; - println!("{}", c); - assert_eq!(c, 70.0); - } - #[cfg(feature = "cuda")] - { - let mut a: [f32; 8] = [0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0]; - let a_box = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); - let a = a_box.get_ref(); - let mut b: [f32; 8] = [0.0, 5.0, 0.0, 6.0, 0.0, 7.0, 0.0, 8.0]; - let b_box = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); - let b = b_box.get_ref(); - let mut r = runner!(dot); - let c = r.run(8, a, b).await; - println!("{}", c); - assert_eq!(c, 70.0); - } + let a: Box<[f32; 8]> = Box::new([0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0]); + let b: Box<[f32; 8]> = Box::new([0.0, 5.0, 0.0, 6.0, 0.0, 7.0, 0.0, 8.0]); + let a = HerculesImmBox::from(a.as_ref() as &[f32]); + let b = HerculesImmBox::from(b.as_ref() as &[f32]); + let mut r = runner!(dot); + let c = r.run(8, a.to(), b.to()).await; + println!("{}", c); + assert_eq!(c, 70.0); }); } diff --git a/hercules_samples/matmul/src/main.rs b/hercules_samples/matmul/src/main.rs index 5c87991569ab59e8d978f17d74f1d18423679669..277276648e905186bfeb54714fb00f7275f17b22 100644 --- a/hercules_samples/matmul/src/main.rs +++ b/hercules_samples/matmul/src/main.rs @@ -1,20 +1,19 @@ #![feature(concat_idents)] +use std::iter::zip; use rand::random; -#[cfg(feature = "cuda")] -use hercules_rt::CUDABox; -use hercules_rt::{runner, HerculesCPURef}; +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; juno_build::juno!("matmul"); fn main() { async_std::task::block_on(async { const I: usize = 256; - const J: usize = 8; // hardcoded constant in matmul.hir + const J: usize = 64; const K: usize = 128; - let mut a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect(); - let mut b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect(); + let a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect(); + let b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect(); let mut correct_c: Box<[i32]> = (0..I * K).map(|_| 0).collect(); for i in 0..I { for k in 0..K { @@ -23,26 +22,11 @@ fn main() { } } } - #[cfg(not(feature = "cuda"))] - { - let a = HerculesCPURef::from_slice(&mut a); - let b = HerculesCPURef::from_slice(&mut b); - let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a, b).await; - assert_eq!(c.as_slice::<i32>(), &*correct_c); - } - #[cfg(feature = "cuda")] - { - let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); - let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); - let mut r = runner!(matmul); - let c = r - .run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()) - .await; - let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); - c.to_cpu_ref(&mut c_cpu); - assert_eq!(&*c_cpu, &*correct_c); - } + let a = HerculesImmBox::from(a.as_ref()); + let b = HerculesImmBox::from(b.as_ref()); + let mut r = runner!(matmul); + let mut c: HerculesMutBox<i32> = HerculesMutBox::from(r.run(I as u64, J as u64, K as u64, a.to(), b.to()).await); + assert_eq!(c.as_slice(), correct_c.as_ref()); }); } diff --git a/hercules_samples/matmul/src/matmul.hir b/hercules_samples/matmul/src/matmul.hir index f9d37afcbb9e74038aafd35dfacf91f533bc4b99..b0c31da4b32207bf3308c4b03583bc74c61f3737 100644 --- a/hercules_samples/matmul/src/matmul.hir +++ b/hercules_samples/matmul/src/matmul.hir @@ -1,9 +1,9 @@ -fn matmul<3>(a: array(i32, #0, 8), b: array(i32, 8, #2)) -> array(i32, #0, #2) +fn matmul<3>(a: array(i32, #0, #1), b: array(i32, #1, #2)) -> array(i32, #0, #2) c = constant(array(i32, #0, #2), []) i_j_ctrl = fork(start, #0, #2) i_idx = thread_id(i_j_ctrl, 0) j_idx = thread_id(i_j_ctrl, 1) - k_ctrl = fork(i_j_ctrl, 8) + k_ctrl = fork(i_j_ctrl, #1) k_idx = thread_id(k_ctrl, 0) k_join_ctrl = join(k_ctrl) i_j_join_ctrl = join(k_join_ctrl) diff --git a/juno_samples/cava/Cargo.toml b/juno_samples/cava/Cargo.toml index 63b6b2ac98dcc022a45f3c4084930cbfb24956ff..17e9a1d330ae08c0ae36dd2474ac29aa9ce76d7a 100644 --- a/juno_samples/cava/Cargo.toml +++ b/juno_samples/cava/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_cava" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -21,3 +24,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } image = "*" with_builtin_macros = "0.1.0" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "cava_bench" +harness = false \ No newline at end of file diff --git a/juno_samples/cava/benches/cava_bench.rs b/juno_samples/cava/benches/cava_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..b8dd3ce26901e94872a5d1b28f1494c5bdfb8a77 --- /dev/null +++ b/juno_samples/cava/benches/cava_bench.rs @@ -0,0 +1,77 @@ +#![feature(concat_idents)] + +use criterion::{criterion_group, criterion_main, Criterion}; + +use hercules_rt::{runner, HerculesImmBoxTo}; + +use juno_cava::*; + +juno_build::juno!("cava"); + +fn cava_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("cava bench"); + group.sample_size(10); + + let args = CavaInputs { + input: "examples/raw_tulip-small.bin".to_string(), + output: None, + verify: false, + output_verify: None, + cam_model: "cam_models/NikonD7000".to_string(), + crop_rows: Some(144), + crop_cols: Some(192), + }; + let (raw_image, cam_model) = make_raw_image_and_cam_model(&args); + let (rows, cols, num_ctrl_pts, image, tstw, ctrl_pts, weights, coefs, tonemap) = + prepare_hercules_inputs(&raw_image, &cam_model); + let mut r = runner!(cava); + + group.bench_function("cava bench small", |b| { + b.iter(|| { + async_std::task::block_on(r.run( + rows as u64, + cols as u64, + num_ctrl_pts as u64, + image.to(), + tstw.to(), + ctrl_pts.to(), + weights.to(), + coefs.to(), + tonemap.to(), + )); + }) + }); + + let args = CavaInputs { + input: "examples/raw_tulips.bin".to_string(), + output: None, + verify: true, + output_verify: None, + cam_model: "cam_models/NikonD7000".to_string(), + crop_rows: None, + crop_cols: None, + }; + let (raw_image, cam_model) = make_raw_image_and_cam_model(&args); + let (rows, cols, num_ctrl_pts, image, tstw, ctrl_pts, weights, coefs, tonemap) = + prepare_hercules_inputs(&raw_image, &cam_model); + let mut r = runner!(cava); + + group.bench_function("cava bench full", |b| { + b.iter(|| { + async_std::task::block_on(r.run( + rows as u64, + cols as u64, + num_ctrl_pts as u64, + image.to(), + tstw.to(), + ctrl_pts.to(), + weights.to(), + coefs.to(), + tonemap.to(), + )); + }) + }); +} + +criterion_group!(benches, cava_bench); +criterion_main!(benches); diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn index 8158bf0a9bec25d57e1136bbf040ae0f561dcc3f..dbe799f9f23e63ed40157c9d57f5c4c8d9b4eb23 100644 --- a/juno_samples/cava/src/cava.jn +++ b/juno_samples/cava/src/cava.jn @@ -40,7 +40,7 @@ fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] { @res2 let res : f32[CHAN, row, col]; - for r = 1 to row-1 { + @loop for r = 1 to row-1 { for c = 1 to col-1 { if r % 2 == 0 && c % 2 == 0 { let R1 = input[0, r, c-1]; @@ -152,7 +152,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( let v = v1 * v1 + v2 * v2 + v3 * v3; l2_dist[cp] = sqrt!::<f32>(v); } - + @channel_loop for chan = 0 to CHAN { let chan_val : f32 = 0.0; for cp = 0 to num_ctrl_pts { diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 1b595b052582740c0a4886041fa8de2d54cfc5b2..3ac2f326115bb4aafb80c6a2d4b3cd024096db8c 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -38,6 +38,17 @@ fixpoint { } simpl!(fuse1); array-slf(fuse1); +loop-bound-canon(fuse1); +fixpoint { + forkify(fuse1); + fork-guard-elim(fuse1); + fork-coalesce(fuse1); +} +predication(fuse1); +simpl!(fuse1); +write-predication(fuse1); +simpl!(fuse1); +parallel-reduce(fuse1@loop); inline(fuse2); no-memset(fuse2@res); @@ -116,16 +127,8 @@ simpl!(fuse5); delete-uncalled(*); simpl!(*); -simpl!(fuse1); -unforkify(fuse1); -fork-split(fuse2); -unforkify(fuse2); -fork-split(fuse3); -unforkify(fuse3); -fork-split(fuse4); -unforkify(fuse4); -fork-split(fuse5); -unforkify(fuse5); +fork-split(fuse1, fuse2, fuse3, fuse4, fuse5); +unforkify(fuse1, fuse2, fuse3, fuse4, fuse5); simpl!(*); diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index f440dacde5d4dc0f9d42599e19923ccba91d82ab..c8db124ede9b98220866a8c1cdc7b17cdfb8093f 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -1,23 +1,138 @@ -gvn(*); -phi-elim(*); -dce(*); +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + dce(X); + infer-schedules(X); +} + +simpl!(*); + +let fuse1 = outline(cava@fuse1); +inline(fuse1); +gpu(fuse1); + +let fuse2 = outline(cava@fuse2); +inline(fuse2); +gpu(fuse2); + +let fuse3 = outline(cava@fuse3); +inline(fuse3); +gpu(fuse3); -inline(denoise); -gpu(scale, demosaic, denoise, transform, gamut, tone_map, descale); +let fuse4 = outline(cava@fuse4); +inline(fuse4); +gpu(fuse4); + +let fuse5 = outline(cava@fuse5); +inline(fuse5); +gpu(fuse5); ip-sroa(*); sroa(*); -dce(*); -gvn(*); -phi-elim(*); -dce(*); +simpl!(*); -// forkify(*); -infer-schedules(*); +no-memset(fuse1@res1); +no-memset(fuse1@res2); +fixpoint { + forkify(fuse1); + fork-guard-elim(fuse1); + fork-coalesce(fuse1); +} +simpl!(fuse1); +array-slf(fuse1); +loop-bound-canon(fuse1); +fixpoint { + forkify(fuse1); + fork-guard-elim(fuse1); + fork-coalesce(fuse1); +} +predication(fuse1); +simpl!(fuse1); +write-predication(fuse1); +simpl!(fuse1); +parallel-reduce(fuse1@loop); -gcm(*); +inline(fuse2); +no-memset(fuse2@res); +no-memset(fuse2@filter); +no-memset(fuse2@tmp); +fixpoint { + forkify(fuse2); + fork-guard-elim(fuse2); + fork-coalesce(fuse2); +} +simpl!(fuse2); +predication(fuse2); +simpl!(fuse2); + +let median = outline(fuse2@median); +fork-unroll(median@medianOuter); +simpl!(median); +fixpoint { + forkify(median); + fork-guard-elim(median); +} +simpl!(median); fixpoint { - float-collections(*); - dce(*); - gcm(*); + fork-unroll(median); } +ccp(median); +array-to-product(median); +sroa(median); +phi-elim(median); +predication(median); +simpl!(median); + +inline(fuse2); +ip-sroa(*); +sroa(*); +array-slf(fuse2); +write-predication(fuse2); +simpl!(fuse2); + +no-memset(fuse3@res); +fixpoint { + forkify(fuse3); + fork-guard-elim(fuse3); + fork-coalesce(fuse3); +} +simpl!(fuse3); + +no-memset(fuse4@res); +no-memset(fuse4@l2); +fixpoint { + forkify(fuse4); + fork-guard-elim(fuse4); + fork-coalesce(fuse4); +} +simpl!(fuse4); +fork-unroll(fuse4@channel_loop); +simpl!(fuse4); +fixpoint { + fork-fusion(fuse4@channel_loop); +} +simpl!(fuse4); +array-slf(fuse4); +simpl!(fuse4); +//fork-tile[2, 0, false, true](fuse4@channel_loop); +//fork-split(fuse4@channel_loop); +//clean-monoid-reduces(fuse4); + +no-memset(fuse5@res1); +no-memset(fuse5@res2); +fixpoint { + forkify(fuse5); + fork-guard-elim(fuse5); + fork-coalesce(fuse5); +} +simpl!(fuse5); +array-slf(fuse5); +simpl!(fuse5); + +delete-uncalled(*); +simpl!(*); + +gcm(*); diff --git a/juno_samples/cava/src/lib.rs b/juno_samples/cava/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..1810a24670d8fa7325b2ff353ba70756824a0269 --- /dev/null +++ b/juno_samples/cava/src/lib.rs @@ -0,0 +1,198 @@ +#![feature(concat_idents)] + +mod camera_model; +mod cava_rust; +mod image_proc; + +pub use self::camera_model::*; +pub use self::cava_rust::CHAN; +pub use self::image_proc::*; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; + +use image::ImageError; + +use clap::Parser; + +juno_build::juno!("cava"); + +pub fn make_raw_image_and_cam_model(args: &CavaInputs) -> (RawImage, CamModel) { + let raw_image = + read_raw(&args.input, args.crop_rows, args.crop_cols).expect("Error loading image"); + let cam_model = load_cam_model(&args.cam_model, CHAN).expect("Error loading camera model"); + println!( + "Running cava with {} rows, {} columns, and {} control points.", + raw_image.rows, raw_image.cols, cam_model.num_ctrl_pts + ); + (raw_image, cam_model) +} + +pub fn prepare_hercules_inputs<'a, 'b>( + raw_image: &'a RawImage, + cam_model: &'b CamModel, +) -> ( + usize, + usize, + usize, + HerculesImmBox<'a, u8>, + HerculesImmBox<'b, f32>, + HerculesImmBox<'b, f32>, + HerculesImmBox<'b, f32>, + HerculesImmBox<'b, f32>, + HerculesImmBox<'b, f32>, +) { + assert_eq!( + raw_image.pixels.len(), + CHAN * raw_image.rows * raw_image.cols + ); + assert_eq!(cam_model.tstw.len(), CHAN * CHAN); + assert_eq!(cam_model.ctrl_pts.len(), cam_model.num_ctrl_pts * CHAN); + assert_eq!(cam_model.weights.len(), cam_model.num_ctrl_pts * CHAN); + assert_eq!(cam_model.coefs.len(), 4 * CHAN); + assert_eq!(cam_model.tonemap.len(), 256 * CHAN); + + let image = HerculesImmBox::from(&raw_image.pixels as &[u8]); + let tstw = HerculesImmBox::from(&cam_model.tstw as &[f32]); + let ctrl_pts = HerculesImmBox::from(&cam_model.ctrl_pts as &[f32]); + let weights = HerculesImmBox::from(&cam_model.weights as &[f32]); + let coefs = HerculesImmBox::from(&cam_model.coefs as &[f32]); + let tonemap = HerculesImmBox::from(&cam_model.tonemap as &[f32]); + + ( + raw_image.rows, + raw_image.cols, + cam_model.num_ctrl_pts, + image, + tstw, + ctrl_pts, + weights, + coefs, + tonemap, + ) +} + +enum Error { + IOError(std::io::Error), + ImageError(image::ImageError), + ParseIntError(std::num::ParseIntError), + ParseFloatError(std::num::ParseFloatError), +} + +impl std::fmt::Debug for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IOError(err) => write!(f, "IO Error: {:?}", err), + Error::ImageError(err) => write!(f, "Image Error: {:?}", err), + Error::ParseIntError(err) => write!(f, "Parse Error: {:?}", err), + Error::ParseFloatError(err) => write!(f, "Parse Error: {:?}", err), + } + } +} + +impl From<std::io::Error> for Error { + fn from(value: std::io::Error) -> Self { + Error::IOError(value) + } +} + +impl From<ImageError> for Error { + fn from(value: ImageError) -> Self { + Error::ImageError(value) + } +} + +impl From<std::num::ParseIntError> for Error { + fn from(value: std::num::ParseIntError) -> Self { + Error::ParseIntError(value) + } +} + +impl From<std::num::ParseFloatError> for Error { + fn from(value: std::num::ParseFloatError) -> Self { + Error::ParseFloatError(value) + } +} + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct CavaInputs { + pub input: String, + #[clap(short, long, value_name = "PATH")] + pub output: Option<String>, + #[clap(short, long)] + pub verify: bool, + #[clap(long = "output-verify", value_name = "PATH")] + pub output_verify: Option<String>, + pub cam_model: String, + #[clap(short, long)] + pub crop_rows: Option<usize>, + #[clap(short, long)] + pub crop_cols: Option<usize>, +} + +pub fn cava_harness(args: CavaInputs) { + let (raw_image, cam_model) = make_raw_image_and_cam_model(&args); + let (rows, cols, num_ctrl_pts, image, tstw, ctrl_pts, weights, coefs, tonemap) = + prepare_hercules_inputs(&raw_image, &cam_model); + let mut r = runner!(cava); + + let result = async_std::task::block_on(async { + HerculesMutBox::from( + r.run( + rows as u64, + cols as u64, + num_ctrl_pts as u64, + image.to(), + tstw.to(), + ctrl_pts.to(), + weights.to(), + coefs.to(), + tonemap.to(), + ) + .await, + ) + }) + .as_slice() + .to_vec() + .into_boxed_slice(); + + if let Some(output) = args.output { + extern_image(rows, cols, &*result) + .save(output) + .expect("Error saving image"); + } + + if args.verify { + let cpu_result = cava_rust::cava( + rows, + cols, + num_ctrl_pts, + &raw_image.pixels, + &cam_model.tstw, + &cam_model.ctrl_pts, + &cam_model.weights, + &cam_model.coefs, + &cam_model.tonemap, + ); + + if let Some(output) = args.output_verify { + extern_image(rows, cols, &cpu_result) + .save(output) + .expect("Error saving verification image"); + } + + let max_diff = result + .iter() + .zip(cpu_result.iter()) + .map(|(a, b)| (*a as i16 - *b as i16).abs()) + .max() + .unwrap_or(0); + + assert!( + max_diff <= 3, + "Verification failed: maximum pixel difference of {} exceeds threshold of 3", + max_diff + ); + println!("Verified!"); + } +} diff --git a/juno_samples/cava/src/main.rs b/juno_samples/cava/src/main.rs index 142ed70394aeb38d09ce162a30af8664e604a559..6bd84639ddac38300d7b15b82a902508df9fcfb6 100644 --- a/juno_samples/cava/src/main.rs +++ b/juno_samples/cava/src/main.rs @@ -1,234 +1,6 @@ -#![feature(concat_idents)] - -mod camera_model; -mod cava_rust; -mod image_proc; - -use self::camera_model::*; -use self::cava_rust::CHAN; -use self::image_proc::*; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; - -use image::ImageError; - use clap::Parser; -juno_build::juno!("cava"); - -// Individual lifetimes are not needed in this example but should probably be generated for -// flexibility -async fn safe_run<'a, 'b: 'a, 'c: 'a, 'd: 'a, 'e: 'a, 'f: 'a, 'g: 'a>( - runner: &'a mut HerculesRunner_cava, - r: u64, - c: u64, - num_ctrl_pts: u64, - input: &'b HerculesImmBox<'b, u8>, - tstw: &'c HerculesImmBox<'c, f32>, - ctrl_pts: &'d HerculesImmBox<'d, f32>, - weights: &'e HerculesImmBox<'e, f32>, - coefs: &'f HerculesImmBox<'f, f32>, - tonemap: &'g HerculesImmBox<'g, f32>, -) -> HerculesMutBox<'a, u8> { - HerculesMutBox::from( - runner - .run( - r, - c, - num_ctrl_pts, - input.to(), - tstw.to(), - ctrl_pts.to(), - weights.to(), - coefs.to(), - tonemap.to(), - ) - .await, - ) -} - -fn run_cava( - rows: usize, - cols: usize, - num_ctrl_pts: usize, - image: &[u8], - tstw: &[f32], - ctrl_pts: &[f32], - weights: &[f32], - coefs: &[f32], - tonemap: &[f32], -) -> Box<[u8]> { - assert_eq!(image.len(), CHAN * rows * cols); - assert_eq!(tstw.len(), CHAN * CHAN); - assert_eq!(ctrl_pts.len(), num_ctrl_pts * CHAN); - assert_eq!(weights.len(), num_ctrl_pts * CHAN); - assert_eq!(coefs.len(), 4 * CHAN); - assert_eq!(tonemap.len(), 256 * CHAN); - - let image = HerculesImmBox::from(image); - let tstw = HerculesImmBox::from(tstw); - let ctrl_pts = HerculesImmBox::from(ctrl_pts); - let weights = HerculesImmBox::from(weights); - let coefs = HerculesImmBox::from(coefs); - let tonemap = HerculesImmBox::from(tonemap); - - let mut r = runner!(cava); - - async_std::task::block_on(async { - safe_run( - &mut r, - rows as u64, - cols as u64, - num_ctrl_pts as u64, - &image, - &tstw, - &ctrl_pts, - &weights, - &coefs, - &tonemap, - ) - .await - }) - .as_slice() - .to_vec() - .into_boxed_slice() -} - -enum Error { - IOError(std::io::Error), - ImageError(image::ImageError), - ParseIntError(std::num::ParseIntError), - ParseFloatError(std::num::ParseFloatError), -} - -impl std::fmt::Debug for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::IOError(err) => write!(f, "IO Error: {:?}", err), - Error::ImageError(err) => write!(f, "Image Error: {:?}", err), - Error::ParseIntError(err) => write!(f, "Parse Error: {:?}", err), - Error::ParseFloatError(err) => write!(f, "Parse Error: {:?}", err), - } - } -} - -impl From<std::io::Error> for Error { - fn from(value: std::io::Error) -> Self { - Error::IOError(value) - } -} - -impl From<ImageError> for Error { - fn from(value: ImageError) -> Self { - Error::ImageError(value) - } -} - -impl From<std::num::ParseIntError> for Error { - fn from(value: std::num::ParseIntError) -> Self { - Error::ParseIntError(value) - } -} - -impl From<std::num::ParseFloatError> for Error { - fn from(value: std::num::ParseFloatError) -> Self { - Error::ParseFloatError(value) - } -} - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct CavaInputs { - input: String, - #[clap(short, long, value_name = "PATH")] - output: Option<String>, - #[clap(short, long)] - verify: bool, - #[clap(long = "output-verify", value_name = "PATH")] - output_verify: Option<String>, - cam_model: String, - #[clap(short, long)] - crop_rows: Option<usize>, - #[clap(short, long)] - crop_cols: Option<usize>, -} - -fn cava_harness(args: CavaInputs) { - let CavaInputs { - input, - output, - verify, - output_verify, - cam_model, - crop_rows, - crop_cols, - } = args; - let RawImage { rows, cols, pixels } = - read_raw(input, crop_rows, crop_cols).expect("Error loading image"); - let CamModel { - tstw, - num_ctrl_pts, - ctrl_pts, - weights, - coefs, - tonemap, - } = load_cam_model(cam_model, CHAN).expect("Error loading camera model"); - - println!( - "Running cava with {} rows, {} columns, and {} control points.", - rows, cols, num_ctrl_pts - ); - let result = run_cava( - rows, - cols, - num_ctrl_pts, - &pixels, - &tstw, - &ctrl_pts, - &weights, - &coefs, - &tonemap, - ); - - if let Some(output) = output { - extern_image(rows, cols, &*result) - .save(output) - .expect("Error saving image"); - } - - if verify { - let cpu_result = cava_rust::cava( - rows, - cols, - num_ctrl_pts, - &pixels, - &tstw, - &ctrl_pts, - &weights, - &coefs, - &tonemap, - ); - - if let Some(output) = output_verify { - extern_image(rows, cols, &cpu_result) - .save(output) - .expect("Error saving verification image"); - } - - let max_diff = result - .iter() - .zip(cpu_result.iter()) - .map(|(a, b)| (*a as i16 - *b as i16).abs()) - .max() - .unwrap_or(0); - - assert!( - max_diff <= 3, - "Verification failed: maximum pixel difference of {} exceeds threshold of 3", - max_diff - ); - } -} +use juno_cava::{cava_harness, CavaInputs}; fn main() { let args = CavaInputs::parse(); diff --git a/juno_samples/concat/src/main.rs b/juno_samples/concat/src/main.rs index 547dee08b118c475e1905d0fe93e9aaebfdca535..2f704f168bf64b619dde32610dd1a12603fc9dd0 100644 --- a/juno_samples/concat/src/main.rs +++ b/juno_samples/concat/src/main.rs @@ -10,12 +10,12 @@ juno_build::juno!("concat"); fn main() { async_std::task::block_on(async { let mut r = runner!(concat_entry); - let mut a_data = [7, 7, 0]; - let mut b_data = [7, 7, 0, 0, 7, 7]; + let mut a_data = Box::new([7, 7, 0]); + let mut b_data = Box::new([7, 7, 0, 0, 7, 7]); #[cfg(not(feature = "cuda"))] { - let a = HerculesCPURef::from_slice(&mut a_data); - let b = HerculesCPURef::from_slice(&mut b_data); + let a = HerculesCPURef::from_slice(a_data.as_ref()); + let b = HerculesCPURef::from_slice(b_data.as_ref()); let output = r.run(3, 6, a, b).await; assert_eq!(output, 42); @@ -36,10 +36,8 @@ fn main() { } #[cfg(feature = "cuda")] { - let mut a_data = [7, 7, 0]; - let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a_data)); - let mut b_data = [7, 7, 0, 0, 7, 7]; - let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b_data)); + let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(a_data.as_ref())); + let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(b_data.as_ref())); let output = r.run(3, 6, a.get_ref(), b.get_ref()).await; assert_eq!(output, 42); } diff --git a/juno_samples/edge_detection/Cargo.toml b/juno_samples/edge_detection/Cargo.toml index a3825eed9b6601ca4d06d8e81d332ed5986b3124..483724d8e4c2c7bcd057990ce5e149923e90cc3b 100644 --- a/juno_samples/edge_detection/Cargo.toml +++ b/juno_samples/edge_detection/Cargo.toml @@ -13,6 +13,9 @@ name = "juno_edge_detection" path = "src/main.rs" required-features = ["opencv"] +[lib] +path = "src/lib.rs" + [build-dependencies] juno_build = { path = "../../juno_build" } @@ -23,3 +26,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" opencv = { version = "*", features = ["clang-runtime"], optional = true } + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "edge_detection_bench" +harness = false diff --git a/juno_samples/edge_detection/benches/edge_detection_bench.rs b/juno_samples/edge_detection/benches/edge_detection_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..806a886510666f471335227b201daa74c000f4e1 --- /dev/null +++ b/juno_samples/edge_detection/benches/edge_detection_bench.rs @@ -0,0 +1,104 @@ +#![feature(concat_idents)] +use std::slice::from_raw_parts; + +use criterion::{criterion_group, criterion_main, Criterion}; + +use opencv::core::{Mat, Size, CV_32F, CV_8U}; +use opencv::imgproc::{cvt_color_def, ColorConversionCodes}; +use opencv::prelude::{MatTraitConst, VideoCaptureTrait, VideoCaptureTraitConst}; +use opencv::videoio::{VideoCapture, VideoCaptureProperties, VideoWriter, VideoWriterTrait}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo}; + +use juno_edge_detection::*; + +juno_build::juno!("edge_detection"); + +fn edge_detection_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("edge detection bench"); + group.sample_size(10); + + let input = "examples/formula1_scaled.mp4"; + + let gs: usize = 7; + let gaussian_filter: Vec<f32> = vec![ + 0.000036, 0.000363, 0.001446, 0.002291, 0.001446, 0.000363, 0.000036, 0.000363, 0.003676, + 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.001446, 0.014662, 0.058488, 0.092651, + 0.058488, 0.014662, 0.001446, 0.002291, 0.023226, 0.092651, 0.146768, 0.092651, 0.023226, + 0.002291, 0.001446, 0.014662, 0.058488, 0.092651, 0.058488, 0.014662, 0.001446, 0.000363, + 0.003676, 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.000036, 0.000363, 0.001446, + 0.002291, 0.001446, 0.000363, 0.000036, + ]; + let gaussian_filter_h = HerculesImmBox::from(gaussian_filter.as_slice()); + + let sz: usize = 3; + let structure: Vec<f32> = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]; + let structure_h = HerculesImmBox::from(structure.as_slice()); + + let sb: usize = 3; + let sx: Vec<f32> = vec![-1.0, 0.0, 1.0, -2.0, 0.0, 2.0, -1.0, 0.0, 1.0]; + let sx_h = HerculesImmBox::from(sx.as_slice()); + + let sy: Vec<f32> = vec![-1.0, -2.0, -1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]; + let sy_h = HerculesImmBox::from(sy.as_slice()); + + let theta: f32 = 0.1; + + let mut video = VideoCapture::from_file_def(&input).expect("Error loading video"); + assert!(video.is_opened().unwrap()); + + let fps = video + .get(VideoCaptureProperties::CAP_PROP_FPS.into()) + .expect("Error getting fps"); + + let _num_frames = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_COUNT.into()) + .expect("Error getting number of frames") as usize; + let width = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_WIDTH.into()) + .expect("Error getting width") as usize; + let height = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_HEIGHT.into()) + .expect("Error getting height") as usize; + let num_frames = 5; + + let mut r = runner!(edge_detection); + + let frames: Vec<_> = (0..num_frames).map(|_| load_frame(&mut video)).collect(); + + group.bench_function("edge detection bench", |b| { + b.iter(|| { + for i in 0..num_frames { + let frame = &frames[i]; + let ptr = frame.ptr_def().unwrap() as *const f32; + + assert!(frame.rows() as usize == height); + assert!(frame.cols() as usize == width); + + let input = unsafe { from_raw_parts(ptr, height * width) }; + + let input_h = HerculesImmBox::from(input); + + let result = async_std::task::block_on(async { + r.run( + height as u64, + width as u64, + gs as u64, + sz as u64, + sb as u64, + input_h.to(), + gaussian_filter_h.to(), + structure_h.to(), + sx_h.to(), + sy_h.to(), + theta, + ) + .await + }); + } + }) + }); +} + +criterion_group!(benches, edge_detection_bench); +criterion_main!(benches); diff --git a/juno_samples/edge_detection/build.rs b/juno_samples/edge_detection/build.rs index 7071fae7612ab871757fddebdbb576a2e4a6073c..d5d6f7b7de417f48b658c849881139347869ca05 100644 --- a/juno_samples/edge_detection/build.rs +++ b/juno_samples/edge_detection/build.rs @@ -14,6 +14,8 @@ fn main() { JunoCompiler::new() .file_in_src("edge_detection.jn") .unwrap() + .schedule_in_src("cpu.sch") + .unwrap() .build() .unwrap(); } diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..3c3d09b34f9bd8c4b412d4b19e3898769cf2670a --- /dev/null +++ b/juno_samples/edge_detection/src/cpu.sch @@ -0,0 +1,79 @@ +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + dce(X); + infer-schedules(X); +} + +simpl!(*); + +ip-sroa(*); +sroa(*); +simpl!(*); + +no-memset(gaussian_smoothing@res); +fixpoint { + forkify(gaussian_smoothing); + fork-guard-elim(gaussian_smoothing); + fork-coalesce(gaussian_smoothing); +} +predication(gaussian_smoothing); +simpl!(gaussian_smoothing); +predication(gaussian_smoothing); +simpl!(gaussian_smoothing); + +no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2); +fixpoint { + forkify(laplacian_estimate); + fork-guard-elim(laplacian_estimate); + fork-coalesce(laplacian_estimate); +} +simpl!(laplacian_estimate); + +no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2); +fixpoint { + forkify(zero_crossings); + fork-guard-elim(zero_crossings); + fork-coalesce(zero_crossings); +} +simpl!(zero_crossings); + +no-memset(gradient@res); +fixpoint { + forkify(gradient); + fork-guard-elim(gradient); + fork-coalesce(gradient); +} +predication(gradient); +simpl!(gradient); +predication(gradient); +simpl!(gradient); + +fixpoint { + forkify(max_gradient); + fork-guard-elim(max_gradient); + fork-coalesce(max_gradient); +} +simpl!(max_gradient); + +no-memset(reject_zero_crossings@res); +fixpoint { + forkify(reject_zero_crossings); + fork-guard-elim(reject_zero_crossings); + fork-coalesce(reject_zero_crossings); +} +predication(reject_zero_crossings); +simpl!(reject_zero_crossings); + +async-call(edge_detection@le, edge_detection@zc); + +fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings); +unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings); + +simpl!(*); + +delete-uncalled(*); +gcm(*); diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn index d49258c5d328d021953aaf035f80fb328a4440af..3bc5bbfbe16b6eb15d0c6387d9f0d8397ce69cc6 100644 --- a/juno_samples/edge_detection/src/edge_detection.jn +++ b/juno_samples/edge_detection/src/edge_detection.jn @@ -2,7 +2,7 @@ fn gaussian_smoothing<n, m, gs : usize>( input: f32[n, m], filter: f32[gs, gs], ) -> f32[n, m] { - let result : f32[n, m]; + @res let result : f32[n, m]; // Define the gaussian radius as half the gaussian size const gr = gs / 2; @@ -39,12 +39,12 @@ fn laplacian_estimate<n, m, sz: usize>( ) -> f32[n, m] { const r = sz / 2; - let result : f32[n, m]; + @res let result : f32[n, m]; for row = 0 to n { for col = 0 to m { // Copy data for dilation filter - let imageArea : f32[sz, sz]; + @shr1 let imageArea : f32[sz, sz]; for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MIN_BR @@ -64,7 +64,7 @@ fn laplacian_estimate<n, m, sz: usize>( } // Data copy for erotion filter - let imageArea : f32[sz, sz]; + @shr2 let imageArea : f32[sz, sz]; for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MAX_BR @@ -97,12 +97,12 @@ fn zero_crossings<n, m, sz: usize>( ) -> f32[n, m] { const r = sz / 2; - let result : f32[n, m]; + @res let result : f32[n, m]; for row = 0 to n { for col = 0 to m { // Data copy for dilation filter - let imageArea : f32[sz, sz]; + @shr1 let imageArea : f32[sz, sz]; for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MIN_BR @@ -124,7 +124,7 @@ fn zero_crossings<n, m, sz: usize>( } // Data copy for erotion filter - let imageArea : f32[sz, sz]; + @shr2 let imageArea : f32[sz, sz]; for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MAX_BR @@ -160,7 +160,7 @@ fn gradient<n, m, sb: usize>( ) -> f32[n, m] { const sbr = sb / 2; - let result : f32[n, m]; + @res let result : f32[n, m]; for row = 0 to n { for col = 0 to m { @@ -206,7 +206,7 @@ fn reject_zero_crossings<n, m: usize>( max_gradient: f32, theta: f32, ) -> f32[n, m] { - let result : f32[n, m]; + @res let result : f32[n, m]; for row = 0 to n { for col = 0 to m { @@ -229,10 +229,10 @@ fn edge_detection<n, m, gs, sz, sb: usize>( sy: f32[sb, sb], theta: f32, ) -> f32[n, m] { - let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter); - let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure); - let zcs = zero_crossings::<n, m, sz>(laplacian, structure); - let gradient = gradient::<n, m, sb>(smoothed, sx, sy); - let maxgrad = max_gradient::<n, m>(gradient); + let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter); + @le let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure); + @zc let zcs = zero_crossings::<n, m, sz>(laplacian, structure); + let gradient = gradient::<n, m, sb>(smoothed, sx, sy); + let maxgrad = max_gradient::<n, m>(gradient); return reject_zero_crossings::<n, m>(zcs, gradient, maxgrad, theta); } diff --git a/juno_samples/edge_detection/src/lib.rs b/juno_samples/edge_detection/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..6c2a15bd394a8fed3828ea79f2f8470856ead846 --- /dev/null +++ b/juno_samples/edge_detection/src/lib.rs @@ -0,0 +1,256 @@ +#![cfg(feature = "opencv")] +#![feature(concat_idents)] + +mod edge_detection_rust; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; + +use std::slice::from_raw_parts; + +use clap::Parser; + +use opencv::core::{Mat, Size, CV_32F, CV_8U}; +use opencv::highgui::{imshow, wait_key}; +use opencv::imgproc::{cvt_color_def, ColorConversionCodes}; +use opencv::prelude::{MatTraitConst, VideoCaptureTrait, VideoCaptureTraitConst}; +use opencv::videoio::{VideoCapture, VideoCaptureProperties, VideoWriter, VideoWriterTrait}; + +juno_build::juno!("edge_detection"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct EdgeDetectionInputs { + pub input: String, + #[clap(short, long)] + pub display: bool, + #[clap(short, long, value_name = "PATH")] + pub output: Option<String>, + #[clap(short, long)] + pub verify: bool, + #[clap(long = "display-verify")] + pub display_verify: bool, + #[clap(long = "output-verify", value_name = "PATH")] + pub output_verify: Option<String>, + #[clap(short, long, value_name = "COUNT")] + pub frames: Option<usize>, +} + +pub fn load_frame(video: &mut VideoCapture) -> Mat { + let mut frame = Mat::default(); + + let Ok(true) = video.read(&mut frame) else { + panic!("Failed to load frame"); + }; + let result = if frame.channels() == 3 { + let mut converted = Mat::default(); + let () = cvt_color_def( + &frame, + &mut converted, + ColorConversionCodes::COLOR_BGR2GRAY.into(), + ) + .expect("Failure in conversion to grayscale"); + let mut result = Mat::default(); + let () = converted + .convert_to(&mut result, CV_32F, 1.0 / 255.0, 0.0) + .expect("Failure in conversion to f32"); + result + } else if frame.channels() == 1 { + let mut result = Mat::default(); + let () = frame + .convert_to(&mut result, CV_32F, 1.0 / 255.0, 0.0) + .expect("Failure in conversion to f32"); + result + } else { + panic!("Expected either RGB or grayscale image"); + }; + + assert!(result.is_continuous()); + result +} + +pub fn frame_from_slice(frame: &[f32], height: usize, width: usize) -> Mat { + let result = Mat::from_slice(frame) + .expect("Failed to create matrix from result") + .reshape(1, height as i32) + .expect("Failed to reshape result matrix") + .clone_pointee(); + assert!(result.cols() == width as i32); + + // Convert to u8 since the VideoWriter seems to require that + let mut converted = Mat::default(); + let () = result + .convert_to(&mut converted, CV_8U, 255.0, 0.0) + .expect("Failure in conversion to u8"); + + converted +} + +pub fn edge_detection_harness(args: EdgeDetectionInputs) { + let EdgeDetectionInputs { + input, + display, + output, + verify, + display_verify, + output_verify, + frames, + } = args; + + let gs: usize = 7; + let gaussian_filter: Vec<f32> = vec![ + 0.000036, 0.000363, 0.001446, 0.002291, 0.001446, 0.000363, 0.000036, 0.000363, 0.003676, + 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.001446, 0.014662, 0.058488, 0.092651, + 0.058488, 0.014662, 0.001446, 0.002291, 0.023226, 0.092651, 0.146768, 0.092651, 0.023226, + 0.002291, 0.001446, 0.014662, 0.058488, 0.092651, 0.058488, 0.014662, 0.001446, 0.000363, + 0.003676, 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.000036, 0.000363, 0.001446, + 0.002291, 0.001446, 0.000363, 0.000036, + ]; + let gaussian_filter_h = HerculesImmBox::from(gaussian_filter.as_slice()); + + let sz: usize = 3; + let structure: Vec<f32> = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]; + let structure_h = HerculesImmBox::from(structure.as_slice()); + + let sb: usize = 3; + let sx: Vec<f32> = vec![-1.0, 0.0, 1.0, -2.0, 0.0, 2.0, -1.0, 0.0, 1.0]; + let sx_h = HerculesImmBox::from(sx.as_slice()); + + let sy: Vec<f32> = vec![-1.0, -2.0, -1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]; + let sy_h = HerculesImmBox::from(sy.as_slice()); + + let theta: f32 = 0.1; + + let mut video = VideoCapture::from_file_def(&input).expect("Error loading video"); + assert!(video.is_opened().unwrap()); + + let fps = video + .get(VideoCaptureProperties::CAP_PROP_FPS.into()) + .expect("Error getting fps"); + + let num_frames = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_COUNT.into()) + .expect("Error getting number of frames") as usize; + let width = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_WIDTH.into()) + .expect("Error getting width") as usize; + let height = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_HEIGHT.into()) + .expect("Error getting height") as usize; + + let num_frames = if let Some(frames) = frames { + usize::min(frames, num_frames) + } else { + num_frames + }; + + let mut r = runner!(edge_detection); + + let mut output = output.map(|filename| { + VideoWriter::new( + &filename, + VideoWriter::fourcc('m', 'p', '4', 'v').unwrap(), + fps, + Size { + width: width as i32, + height: height as i32, + }, + false, + ) + .expect("Error opening output video") + }); + + let mut output_verify = output_verify.map(|filename| { + VideoWriter::new( + &filename, + VideoWriter::fourcc('m', 'p', '4', 'v').unwrap(), + fps, + Size { + width: width as i32, + height: height as i32, + }, + false, + ) + .expect("Error opening output video") + }); + + for i in 0..num_frames { + let frame = load_frame(&mut video); + let ptr = frame.ptr_def().unwrap() as *const f32; + + assert!(frame.rows() as usize == height); + assert!(frame.cols() as usize == width); + + let input = unsafe { from_raw_parts(ptr, height * width) }; + + let input_h = HerculesImmBox::from(input); + + let result = async_std::task::block_on(async { + HerculesMutBox::from( + r.run( + height as u64, + width as u64, + gs as u64, + sz as u64, + sb as u64, + input_h.to(), + gaussian_filter_h.to(), + structure_h.to(), + sx_h.to(), + sy_h.to(), + theta, + ) + .await, + ) + }) + .as_slice() + .to_vec(); + + if display { + let result = frame_from_slice(&result, height, width); + let () = imshow("Juno", &result).expect("Failure in displaying image"); + } + if let Some(ref mut output) = output { + let result = frame_from_slice(&result, height, width); + let () = output.write(&result).expect("Failure in writing frame"); + } + + if verify { + let rust_result = edge_detection_rust::edge_detection( + height, + width, + gs, + sz, + sb, + input, + &gaussian_filter, + &structure, + &sx, + &sy, + theta, + ); + + assert_eq!(result, rust_result); + println!("Frames {} match", i); + + if display_verify { + let rust_result = frame_from_slice(&rust_result, height, width); + let () = imshow("Rust", &rust_result).expect("Failure in displaying image"); + } + if let Some(ref mut output) = output_verify { + let result = frame_from_slice(&rust_result, height, width); + let () = output.write(&result).expect("Failure in writing frame"); + } + } + + if display || (verify && display_verify) { + let _ = wait_key(0); + } + } + + if let Some(mut output) = output { + let () = output.release().expect("Failure releasing output video"); + } + if let Some(mut output) = output_verify { + let () = output.release().expect("Failure releasing output video"); + } +} diff --git a/juno_samples/edge_detection/src/main.rs b/juno_samples/edge_detection/src/main.rs index 60ccb51565bdaa6d0f1837385a9de7ac52dc0128..23c4903ac3c4d1743bdabac1ee7f7608f86eb8db 100644 --- a/juno_samples/edge_detection/src/main.rs +++ b/juno_samples/edge_detection/src/main.rs @@ -1,290 +1,6 @@ -#![feature(concat_idents)] - -mod edge_detection_rust; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; - -use std::slice::from_raw_parts; - use clap::Parser; -use opencv::core::{Mat, Size, CV_32F, CV_8U}; -use opencv::highgui::{imshow, wait_key}; -use opencv::imgproc::{cvt_color_def, ColorConversionCodes}; -use opencv::prelude::{MatTraitConst, VideoCaptureTrait, VideoCaptureTraitConst}; -use opencv::videoio::{VideoCapture, VideoCaptureProperties, VideoWriter, VideoWriterTrait}; - -juno_build::juno!("edge_detection"); - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct EdgeDetectionInputs { - input: String, - #[clap(short, long)] - display: bool, - #[clap(short, long, value_name = "PATH")] - output: Option<String>, - #[clap(short, long)] - verify: bool, - #[clap(long = "display-verify")] - display_verify: bool, - #[clap(long = "output-verify", value_name = "PATH")] - output_verify: Option<String>, - #[clap(short, long, value_name = "COUNT")] - frames: Option<usize>, -} - -fn load_frame(video: &mut VideoCapture) -> Mat { - let mut frame = Mat::default(); - - let Ok(true) = video.read(&mut frame) else { - panic!("Failed to load frame"); - }; - let result = if frame.channels() == 3 { - let mut converted = Mat::default(); - let () = cvt_color_def( - &frame, - &mut converted, - ColorConversionCodes::COLOR_BGR2GRAY.into(), - ) - .expect("Failure in conversion to grayscale"); - let mut result = Mat::default(); - let () = converted - .convert_to(&mut result, CV_32F, 1.0 / 255.0, 0.0) - .expect("Failure in conversion to f32"); - result - } else if frame.channels() == 1 { - let mut result = Mat::default(); - let () = frame - .convert_to(&mut result, CV_32F, 1.0 / 255.0, 0.0) - .expect("Failure in conversion to f32"); - result - } else { - panic!("Expected either RGB or grayscale image"); - }; - - assert!(result.is_continuous()); - result -} - -fn frame_from_slice(frame: &[f32], height: usize, width: usize) -> Mat { - let result = Mat::from_slice(frame) - .expect("Failed to create matrix from result") - .reshape(1, height as i32) - .expect("Failed to reshape result matrix") - .clone_pointee(); - assert!(result.cols() == width as i32); - - // Convert to u8 since the VideoWriter seems to require that - let mut converted = Mat::default(); - let () = result - .convert_to(&mut converted, CV_8U, 255.0, 0.0) - .expect("Failure in conversion to u8"); - - converted -} - -async fn safe_run<'a, 'b, 'c, 'd, 'e, 'f>( - runner: &'a mut HerculesRunner_edge_detection, - n: u64, - m: u64, - gs: u64, - sz: u64, - sb: u64, - input: &'b HerculesImmBox<'b, f32>, - gaussian_filter: &'c HerculesImmBox<'c, f32>, - structure: &'d HerculesImmBox<'d, f32>, - sx: &'e HerculesImmBox<'e, f32>, - sy: &'f HerculesImmBox<'f, f32>, - theta: f32, -) -> HerculesMutBox<'a, f32> { - HerculesMutBox::from( - runner - .run( - n, - m, - gs, - sz, - sb, - input.to(), - gaussian_filter.to(), - structure.to(), - sx.to(), - sy.to(), - theta, - ) - .await, - ) -} - -fn edge_detection_harness(args: EdgeDetectionInputs) { - let EdgeDetectionInputs { - input, - display, - output, - verify, - display_verify, - output_verify, - frames, - } = args; - - let gs: usize = 7; - let gaussian_filter: Vec<f32> = vec![ - 0.000036, 0.000363, 0.001446, 0.002291, 0.001446, 0.000363, 0.000036, 0.000363, 0.003676, - 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.001446, 0.014662, 0.058488, 0.092651, - 0.058488, 0.014662, 0.001446, 0.002291, 0.023226, 0.092651, 0.146768, 0.092651, 0.023226, - 0.002291, 0.001446, 0.014662, 0.058488, 0.092651, 0.058488, 0.014662, 0.001446, 0.000363, - 0.003676, 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.000036, 0.000363, 0.001446, - 0.002291, 0.001446, 0.000363, 0.000036, - ]; - let gaussian_filter_h = HerculesImmBox::from(gaussian_filter.as_slice()); - - let sz: usize = 3; - let structure: Vec<f32> = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]; - let structure_h = HerculesImmBox::from(structure.as_slice()); - - let sb: usize = 3; - let sx: Vec<f32> = vec![-1.0, 0.0, 1.0, -2.0, 0.0, 2.0, -1.0, 0.0, 1.0]; - let sx_h = HerculesImmBox::from(sx.as_slice()); - - let sy: Vec<f32> = vec![-1.0, -2.0, -1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]; - let sy_h = HerculesImmBox::from(sy.as_slice()); - - let theta: f32 = 0.1; - - let mut video = VideoCapture::from_file_def(&input).expect("Error loading video"); - assert!(video.is_opened().unwrap()); - - let fps = video - .get(VideoCaptureProperties::CAP_PROP_FPS.into()) - .expect("Error getting fps"); - - let num_frames = video - .get(VideoCaptureProperties::CAP_PROP_FRAME_COUNT.into()) - .expect("Error getting number of frames") as usize; - let width = video - .get(VideoCaptureProperties::CAP_PROP_FRAME_WIDTH.into()) - .expect("Error getting width") as usize; - let height = video - .get(VideoCaptureProperties::CAP_PROP_FRAME_HEIGHT.into()) - .expect("Error getting height") as usize; - - let num_frames = if let Some(frames) = frames { - usize::min(frames, num_frames) - } else { - num_frames - }; - - let mut r = runner!(edge_detection); - - let mut output = output.map(|filename| { - VideoWriter::new( - &filename, - VideoWriter::fourcc('m', 'p', '4', 'v').unwrap(), - fps, - Size { - width: width as i32, - height: height as i32, - }, - false, - ) - .expect("Error opening output video") - }); - - let mut output_verify = output_verify.map(|filename| { - VideoWriter::new( - &filename, - VideoWriter::fourcc('m', 'p', '4', 'v').unwrap(), - fps, - Size { - width: width as i32, - height: height as i32, - }, - false, - ) - .expect("Error opening output video") - }); - - for i in 0..num_frames { - let frame = load_frame(&mut video); - let ptr = frame.ptr_def().unwrap() as *const f32; - - assert!(frame.rows() as usize == height); - assert!(frame.cols() as usize == width); - - let input = unsafe { from_raw_parts(ptr, height * width) }; - - let input_h = HerculesImmBox::from(input); - - let result = async_std::task::block_on(async { - safe_run( - &mut r, - height as u64, - width as u64, - gs as u64, - sz as u64, - sb as u64, - &input_h, - &gaussian_filter_h, - &structure_h, - &sx_h, - &sy_h, - theta, - ) - .await - }) - .as_slice() - .to_vec(); - - if display { - let result = frame_from_slice(&result, height, width); - let () = imshow("Juno", &result).expect("Failure in displaying image"); - } - if let Some(ref mut output) = output { - let result = frame_from_slice(&result, height, width); - let () = output.write(&result).expect("Failure in writing frame"); - } - - if verify { - let rust_result = edge_detection_rust::edge_detection( - height, - width, - gs, - sz, - sb, - input, - &gaussian_filter, - &structure, - &sx, - &sy, - theta, - ); - - assert_eq!(result, rust_result); - println!("Frames {} match", i); - - if display_verify { - let rust_result = frame_from_slice(&rust_result, height, width); - let () = imshow("Rust", &rust_result).expect("Failure in displaying image"); - } - if let Some(ref mut output) = output_verify { - let result = frame_from_slice(&rust_result, height, width); - let () = output.write(&result).expect("Failure in writing frame"); - } - } - - if display || (verify && display_verify) { - let _ = wait_key(0); - } - } - - if let Some(mut output) = output { - let () = output.release().expect("Failure releasing output video"); - } - if let Some(mut output) = output_verify { - let () = output.release().expect("Failure releasing output video"); - } -} +use juno_edge_detection::{edge_detection_harness, EdgeDetectionInputs}; fn main() { let args = EdgeDetectionInputs::parse(); diff --git a/juno_samples/fork_join_tests/src/cpu.sch b/juno_samples/fork_join_tests/src/cpu.sch index 7c416e904ad5d43a5297496b6de40037f5b9b553..c71aec111f42aad90c383f8b42f622037efac60c 100644 --- a/juno_samples/fork_join_tests/src/cpu.sch +++ b/juno_samples/fork_join_tests/src/cpu.sch @@ -25,10 +25,14 @@ gvn(*); phi-elim(*); dce(*); +lift-dc-math(*); +loop-bound-canon(*); + fixpoint panic after 20 { forkify(*); fork-guard-elim(*); - fork-coalesce(*); + fork-coalesce(*); + dce(*); } dce(*); diff --git a/juno_samples/fork_join_tests/src/fork_join_tests.jn b/juno_samples/fork_join_tests/src/fork_join_tests.jn index 8f569cfbe75105054e60c5d9bbb84b8ebbac6041..51115f1576edd1d555395717bdc1dcb4e82a2529 100644 --- a/juno_samples/fork_join_tests/src/fork_join_tests.jn +++ b/juno_samples/fork_join_tests/src/fork_join_tests.jn @@ -12,7 +12,7 @@ fn test1(input : i32) -> i32[4, 4] { #[entry] fn test2(input : i32) -> i32[4, 5] { let arr : i32[4, 5]; - @loop1 for i = 0 to 8 { + @loop1 for i = 1 to 9 { @loop2 for k = 0 to 5 { @loop3 for j = 0 to 4 { arr[j, k] += input; diff --git a/juno_samples/matmul/src/main.rs b/juno_samples/matmul/src/main.rs index c0e228daa04704b90156a592d27b761aeba6591c..29415b511992946b08a1496f3eb92d957615d8aa 100644 --- a/juno_samples/matmul/src/main.rs +++ b/juno_samples/matmul/src/main.rs @@ -3,9 +3,7 @@ use std::iter::zip; use rand::random; -use hercules_rt::{runner, HerculesRefInto}; -#[cfg(feature = "cuda")] -use hercules_rt::{CUDABox, HerculesCPURef}; +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; juno_build::juno!("matmul"); @@ -24,24 +22,12 @@ fn main() { } } } - #[cfg(not(feature = "cuda"))] - { - let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.to(), b.to()).await; - let c = c.as_slice::<f32>(); - assert_eq!(c, &*correct_c); - } - #[cfg(feature = "cuda")] - { - let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a)); - let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); - let mut r = runner!(matmul); - let c = r - .run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()) - .await; - let mut c_cpu: Box<[f32]> = vec![0.0; correct_c.len()].into_boxed_slice(); - c.to_cpu_ref(&mut c_cpu); - assert!(zip(c_cpu, correct_c).all(|(calc, correct)| (calc - correct).abs() < 0.00001)); + let a = HerculesImmBox::from(a.as_ref()); + let b = HerculesImmBox::from(b.as_ref()); + let mut r = runner!(matmul); + let mut c = HerculesMutBox::from(r.run(I as u64, J as u64, K as u64, a.to(), b.to()).await); + for (calc, correct) in zip(c.as_slice().into_iter().map(|x: &mut f32| *x), correct_c) { + assert!((calc - correct).abs() < 0.0001, "{} != {}", calc, correct); } }); } diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index 94f900486f64a6021cc9e4ee098fd2dd2e8ecd27..b6aff26ba919c41990955db590667ab205976cc8 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -2781,29 +2781,35 @@ fn run_pass( } Pass::LoopBoundCanon => { assert_eq!(args.len(), 0); + loop { + let mut inner_changed = false; + pm.make_fork_join_maps(); + pm.make_loops(); + pm.make_control_subgraphs(); + let fork_join_maps = pm.fork_join_maps.take().unwrap(); + let loops = pm.loops.take().unwrap(); + let control_subgraphs = pm.control_subgraphs.take().unwrap(); - pm.make_fork_join_maps(); - pm.make_loops(); - pm.make_control_subgraphs(); - let fork_join_maps = pm.fork_join_maps.take().unwrap(); - let loops = pm.loops.take().unwrap(); - let control_subgraphs = pm.control_subgraphs.take().unwrap(); - - for (((func, fork_join_map), loops), control_subgraph) in - build_selection(pm, selection, false) - .into_iter() - .zip(fork_join_maps.iter()) - .zip(loops.iter()) - .zip(control_subgraphs.iter()) - { - let Some(mut func) = func else { - continue; - }; - loop_bound_canon_toplevel(&mut func, fork_join_map, control_subgraph, loops); - changed |= func.modified(); + for (((func, fork_join_map), loops), control_subgraph) in + build_selection(pm, selection.clone(), false) + .into_iter() + .zip(fork_join_maps.iter()) + .zip(loops.iter()) + .zip(control_subgraphs.iter()) + { + let Some(mut func) = func else { + continue; + }; + loop_bound_canon_toplevel(&mut func, fork_join_map, control_subgraph, loops); + changed |= func.modified(); + inner_changed |= func.modified(); + } + pm.delete_gravestones(); + pm.clear_analyses(); + if !inner_changed { + break; + } } - pm.delete_gravestones(); - pm.clear_analyses(); } } println!("Ran Pass: {:?}", pass);