diff --git a/DRED_codegen/.cproject b/.cproject similarity index 100% rename from DRED_codegen/.cproject rename to .cproject diff --git a/.idea/workspace.xml b/.idea/workspace.xml index f2f1e7de626a51c4a6b0e9df5ca421b83ce35a48..3153075588b5f6041e13da35ed568fe49126a46a 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -3,51 +3,6 @@ <component name="ChangeListManager"> <list default="true" id="b045d9d1-ede8-4e19-b09a-ddf511e7d34c" name="Default Changelist" comment=""> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/history_trace.npy" beforeDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/gened.cpp" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/gened.cpp" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.export.ll" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.export.ll" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.0.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.0.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.1.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.1.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.1.ll" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.1.ll" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.2.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.2.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.2.prechk.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.2.prechk.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.ll" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.ll" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.ll" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.ll" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.tmp.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.tmp.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.2.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.2.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.2.ll" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.2.ll" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.3.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.3.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.3.ll" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.3.ll" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.ll" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.ll" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.pp.0.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.pp.0.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.pp.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/a.pp.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/autopilot.flow.log" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/autopilot.flow.log" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/autopilot.rtl.models.txt" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/autopilot.rtl.models.txt" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/bugpoint.sh" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/bugpoint.sh" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.g.bc" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.g.bc" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-cdt.cpp" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-cdt.cpp" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.00.o" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.00.o" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.1.cpp" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.1.cpp" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.2.cpp" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.2.cpp" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/global.setting.tcl" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/global.setting.tcl" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.cpp" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.cpp" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.h" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.h" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.tcl" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.tcl" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.0.directive" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.0.directive" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.1.directive" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.1.directive" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/solution1.aps" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/solution1.aps" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/solution1.log" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/hls_proj/solution1/solution1.log" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/inference.py" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/inference.py" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/script.tcl" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/script.tcl" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/synth.log" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/synth.log" afterDir="false" /> - <change beforePath="$PROJECT_DIR$/DRED_codegen/vivado_hls.log" beforeDir="false" afterPath="$PROJECT_DIR$/DRED_codegen/vivado_hls.log" afterDir="false" /> </list> <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> <option name="SHOW_DIALOG" value="false" /> @@ -95,7 +50,7 @@ <session id="1950885685"> <usages-collector id="statistics.lifecycle.project"> <counts> - <entry key="project.closed" value="10" /> + <entry key="project.closed" value="11" /> <entry key="project.open.time.0" value="1" /> <entry key="project.open.time.15" value="1" /> <entry key="project.open.time.17" value="1" /> @@ -143,13 +98,13 @@ <entry key="py" value="4531" /> <entry key="tcl" value="30" /> <entry key="test logs" value="17" /> - <entry key="txt" value="482" /> + <entry key="txt" value="529" /> </counts> </usages-collector> <usages-collector id="statistics.file.types.edit"> <counts> <entry key="C++" value="2" /> - <entry key="PLAIN_TEXT" value="643" /> + <entry key="PLAIN_TEXT" value="690" /> <entry key="Python" value="4667" /> </counts> </usages-collector> @@ -198,8 +153,8 @@ <file pinned="false" current-in-tab="true"> <entry file="file://$PROJECT_DIR$/DRED_codegen/exhaustive_search.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="513"> - <caret line="141" column="19" selection-start-line="141" selection-start-column="19" selection-end-line="141" selection-end-column="19" /> + <state relative-caret-position="548"> + <caret line="160" column="30" lean-forward="true" selection-start-line="160" selection-start-column="30" selection-end-line="160" selection-end-column="30" /> <folding> <element signature="e#1#59#0" expanded="true" /> </folding> @@ -382,7 +337,7 @@ <option name="IS_MODULE_SDK" value="true" /> <option name="ADD_CONTENT_ROOTS" value="true" /> <option name="ADD_SOURCE_ROOTS" value="true" /> - <option name="SCRIPT_NAME" value="$PROJECT_DIR$/DRED_codegen/codegen.py" /> + <option name="SCRIPT_NAME" value="$PROJECT_DIR$/codegen.py" /> <option name="PARAMETERS" value="" /> <option name="SHOW_COMMAND_LINE" value="false" /> <option name="EMULATE_TERMINAL" value="false" /> @@ -403,7 +358,7 @@ <option name="IS_MODULE_SDK" value="true" /> <option name="ADD_CONTENT_ROOTS" value="true" /> <option name="ADD_SOURCE_ROOTS" value="true" /> - <option name="SCRIPT_NAME" value="$PROJECT_DIR$/DRED_codegen/exhaustive_search.py" /> + <option name="SCRIPT_NAME" value="$PROJECT_DIR$/exhaustive_search.py" /> <option name="PARAMETERS" value="" /> <option name="SHOW_COMMAND_LINE" value="false" /> <option name="EMULATE_TERMINAL" value="false" /> @@ -425,7 +380,7 @@ <option name="IS_MODULE_SDK" value="true" /> <option name="ADD_CONTENT_ROOTS" value="true" /> <option name="ADD_SOURCE_ROOTS" value="true" /> - <option name="SCRIPT_NAME" value="$PROJECT_DIR$/DRED_codegen/inference.py" /> + <option name="SCRIPT_NAME" value="$PROJECT_DIR$/inference.py" /> <option name="PARAMETERS" value="" /> <option name="SHOW_COMMAND_LINE" value="false" /> <option name="EMULATE_TERMINAL" value="false" /> @@ -447,7 +402,7 @@ <option name="IS_MODULE_SDK" value="true" /> <option name="ADD_CONTENT_ROOTS" value="true" /> <option name="ADD_SOURCE_ROOTS" value="true" /> - <option name="SCRIPT_NAME" value="$PROJECT_DIR$/DRED_codegen/q_learning.py" /> + <option name="SCRIPT_NAME" value="$PROJECT_DIR$/q_learning.py" /> <option name="PARAMETERS" value="" /> <option name="SHOW_COMMAND_LINE" value="false" /> <option name="EMULATE_TERMINAL" value="false" /> @@ -469,7 +424,7 @@ <option name="IS_MODULE_SDK" value="true" /> <option name="ADD_CONTENT_ROOTS" value="true" /> <option name="ADD_SOURCE_ROOTS" value="true" /> - <option name="SCRIPT_NAME" value="$PROJECT_DIR$/DRED_codegen/q_learning_v2.py" /> + <option name="SCRIPT_NAME" value="$PROJECT_DIR$/q_learning_v2.py" /> <option name="PARAMETERS" value="" /> <option name="SHOW_COMMAND_LINE" value="false" /> <option name="EMULATE_TERMINAL" value="false" /> @@ -490,7 +445,7 @@ <option name="IS_MODULE_SDK" value="true" /> <option name="ADD_CONTENT_ROOTS" value="true" /> <option name="ADD_SOURCE_ROOTS" value="true" /> - <option name="SCRIPT_NAME" value="$PROJECT_DIR$/DRED_codegen/tempate.py" /> + <option name="SCRIPT_NAME" value="$PROJECT_DIR$/tempate.py" /> <option name="PARAMETERS" value="" /> <option name="SHOW_COMMAND_LINE" value="false" /> <option name="EMULATE_TERMINAL" value="false" /> @@ -599,7 +554,14 @@ <option name="project" value="LOCAL" /> <updated>1544975212912</updated> </task> - <option name="localTasksCounter" value="11" /> + <task id="LOCAL-00011" summary="Final commit before submitting the report"> + <created>1545337067351</created> + <option name="number" value="00011" /> + <option name="presentableId" value="LOCAL-00011" /> + <option name="project" value="LOCAL" /> + <updated>1545337067351</updated> + </task> + <option name="localTasksCounter" value="12" /> <servers /> </component> <component name="TodoView"> @@ -613,13 +575,14 @@ </component> <component name="ToolWindowManager"> <frame x="121" y="13" width="3720" height="2148" extended-state="6" /> + <editor active="true" /> <layout> <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.2044077" /> <window_info id="Structure" order="1" side_tool="true" weight="0.25" /> <window_info id="Favorites" order="2" side_tool="true" /> <window_info anchor="bottom" id="Message" order="0" /> <window_info anchor="bottom" id="Find" order="1" weight="0.3296875" /> - <window_info anchor="bottom" id="Run" order="2" visible="true" weight="0.3421875" /> + <window_info anchor="bottom" id="Run" order="2" weight="0.3421875" /> <window_info anchor="bottom" id="Debug" order="3" weight="0.4" /> <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" /> <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" /> @@ -686,7 +649,8 @@ <MESSAGE value="bugs in the newly added if logic" /> <MESSAGE value="fixed history lookup feature" /> <MESSAGE value="finished training for now, ends at ~8380, traces and logs added" /> - <option name="LAST_COMMIT_MESSAGE" value="finished training for now, ends at ~8380, traces and logs added" /> + <MESSAGE value="Final commit before submitting the report" /> + <option name="LAST_COMMIT_MESSAGE" value="Final commit before submitting the report" /> </component> <component name="editorHistoryManager"> <entry file="file://$PROJECT_DIR$/Board Coverage" /> @@ -831,8 +795,8 @@ </entry> <entry file="file://$PROJECT_DIR$/DRED_codegen/exhaustive_search.py"> <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="513"> - <caret line="141" column="19" selection-start-line="141" selection-start-column="19" selection-end-line="141" selection-end-column="19" /> + <state relative-caret-position="548"> + <caret line="160" column="30" lean-forward="true" selection-start-line="160" selection-start-column="30" selection-end-line="160" selection-end-column="30" /> <folding> <element signature="e#1#59#0" expanded="true" /> </folding> diff --git a/DRED_codegen/.project b/.project similarity index 100% rename from DRED_codegen/.project rename to .project diff --git a/DRED_codegen/1k-3k_trace.log b/1k-3k_trace.log similarity index 100% rename from DRED_codegen/1k-3k_trace.log rename to 1k-3k_trace.log diff --git a/DRED_codegen/5k_7k_trace.log b/5k_7k_trace.log similarity index 100% rename from DRED_codegen/5k_7k_trace.log rename to 5k_7k_trace.log diff --git a/DRED_codegen/8k.log b/8k.log similarity index 100% rename from DRED_codegen/8k.log rename to 8k.log diff --git a/DRED_codegen/.apc/autopilot.apfmapping b/DRED_codegen/.apc/autopilot.apfmapping deleted file mode 100644 index 067af1c0b4890d04b3afda69e30a49543d0fcd95..0000000000000000000000000000000000000000 --- a/DRED_codegen/.apc/autopilot.apfmapping +++ /dev/null @@ -1,7 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<autopilotfilemapping:AutoPilotFileMapping xmlns:autopilotfilemapping="www.autoesl.com/autopilotfilemapping"> - <source> - <originFiles name="gened.cpp" path="G:\ECE527\DRED\DRED_codegen\gened.cpp"/> - </source> - <testbench/> -</autopilotfilemapping:AutoPilotFileMapping> diff --git a/DRED_codegen/.idea/modules.xml b/DRED_codegen/.idea/modules.xml deleted file mode 100644 index 7b6e3e2b610783953af28ff9b3f10ea67972b6c3..0000000000000000000000000000000000000000 --- a/DRED_codegen/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/DRED_codegen.iml" filepath="$PROJECT_DIR$/.idea/DRED_codegen.iml" /> - </modules> - </component> -</project> \ No newline at end of file diff --git a/DRED_codegen/.idea/vcs.xml b/DRED_codegen/.idea/vcs.xml deleted file mode 100644 index 6c0b8635858dc7ad44b93df54b762707ce49eefc..0000000000000000000000000000000000000000 --- a/DRED_codegen/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="VcsDirectoryMappings"> - <mapping directory="$PROJECT_DIR$/.." vcs="Git" /> - </component> -</project> \ No newline at end of file diff --git a/DRED_codegen/.settings/DRED.Debug.launch b/DRED_codegen/.settings/DRED.Debug.launch deleted file mode 100644 index 2c0e8b7de810c00076775964915b1a52df91dc03..0000000000000000000000000000000000000000 --- a/DRED_codegen/.settings/DRED.Debug.launch +++ /dev/null @@ -1,12 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" standalone="no"?> -<launchConfiguration type="org.eclipse.cdt.launch.applicationLaunchType"> -<stringAttribute key="org.eclipse.cdt.launch.COREFILE_PATH" value=""/> -<stringAttribute key="org.eclipse.cdt.launch.PROGRAM_NAME" value="Debug/a.exe"/> -<stringAttribute key="org.eclipse.cdt.launch.PROJECT_ATTR" value="DRED"/> -<booleanAttribute key="org.eclipse.cdt.launch.PROJECT_BUILD_CONFIG_AUTO_ATTR" value="false"/> -<stringAttribute key="org.eclipse.cdt.launch.PROJECT_BUILD_CONFIG_ID_ATTR" value="cdt.managedbuild.config.gnu.exe.debug.1188291541"/> -<stringAttribute key="org.eclipse.cdt.launch.WORKING_DIRECTORY" value="G:\ECE527\DRED"/> -<booleanAttribute key="org.eclipse.cdt.launch.use_terminal" value="true"/> -<stringAttribute key="org.eclipse.debug.core.source_locator_id" value="org.eclipse.cdt.debug.core.sourceLocator"/> -<stringAttribute key="org.eclipse.debug.core.source_locator_memento" value="<?xml version="1.0" encoding="UTF-8" standalone="no"?> <sourceLookupDirector> <sourceContainers duplicates="false"> <container memento="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;default/&gt;&#13;&#10;" typeId="org.eclipse.debug.core.containerType.default"/> <container memento="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;directory nest=&quot;true&quot; path=&quot;D:\Vivado_HLS\2017.2\win64\tools\systemc&quot;/&gt;&#13;&#10;" typeId="org.eclipse.debug.core.containerType.directory"/> </sourceContainers> </sourceLookupDirector> "/> -</launchConfiguration> diff --git a/DRED_codegen/.settings/DRED.Release.launch b/DRED_codegen/.settings/DRED.Release.launch deleted file mode 100644 index 451fc12a62ca5c17ea2a9ea9e0c0d3a83cd86327..0000000000000000000000000000000000000000 --- a/DRED_codegen/.settings/DRED.Release.launch +++ /dev/null @@ -1,12 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" standalone="no"?> -<launchConfiguration type="org.eclipse.cdt.launch.applicationLaunchType"> -<stringAttribute key="org.eclipse.cdt.launch.COREFILE_PATH" value=""/> -<stringAttribute key="org.eclipse.cdt.launch.PROGRAM_NAME" value="Release/a.exe"/> -<stringAttribute key="org.eclipse.cdt.launch.PROJECT_ATTR" value="DRED"/> -<booleanAttribute key="org.eclipse.cdt.launch.PROJECT_BUILD_CONFIG_AUTO_ATTR" value="false"/> -<stringAttribute key="org.eclipse.cdt.launch.PROJECT_BUILD_CONFIG_ID_ATTR" value="cdt.managedbuild.config.gnu.exe.release.200798804"/> -<stringAttribute key="org.eclipse.cdt.launch.WORKING_DIRECTORY" value="G:\ECE527\DRED"/> -<booleanAttribute key="org.eclipse.cdt.launch.use_terminal" value="true"/> -<stringAttribute key="org.eclipse.debug.core.source_locator_id" value="org.eclipse.cdt.debug.core.sourceLocator"/> -<stringAttribute key="org.eclipse.debug.core.source_locator_memento" value="<?xml version="1.0" encoding="UTF-8" standalone="no"?> <sourceLookupDirector> <sourceContainers duplicates="false"> <container memento="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;default/&gt;&#13;&#10;" typeId="org.eclipse.debug.core.containerType.default"/> <container memento="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;no&quot;?&gt;&#13;&#10;&lt;directory nest=&quot;true&quot; path=&quot;D:\Vivado_HLS\2017.2\win64\tools\systemc&quot;/&gt;&#13;&#10;" typeId="org.eclipse.debug.core.containerType.directory"/> </sourceContainers> </sourceLookupDirector> "/> -</launchConfiguration> diff --git a/DRED_codegen/main.py b/DRED_codegen/main.py deleted file mode 100644 index 1a2a34f16b5bf39691129897161fc702dc4aa32d..0000000000000000000000000000000000000000 --- a/DRED_codegen/main.py +++ /dev/null @@ -1,224 +0,0 @@ -#!/usr/bin/env python - -""" -Inspirit DNN Generator -""" - -import math, os, platform, random, re, sys, time, threading, itertools, json, argparse, collections, glob, ast - -from jinja2 import Environment, FileSystemLoader, Template - -def generate_string_from_templatefile(template_file, target_name, render_dict = None): - ''' - Generates a string from a template file. - ''' - cwd = os.getcwd() - template_dir = os.path.join(cwd, "dnngen", "templates", target_name) - config_template_env = Environment(loader = FileSystemLoader(template_dir), trim_blocks=True, keep_trailing_newline=True) - - config_template_env.filters['joinpath'] = lambda list: os.path.join(*list) - grab_template = config_template_env.get_template(template_file) - return grab_template.render(render_dict) - -def generate_script_from_templatefile(template_file, output_script_path, target_name, render_dict = None) : - ''' - Generates a test script from a template file. - ''' - - rendered_script = generate_string_from_templatefile(template_file, target_name, render_dict) - with open(output_script_path, 'w') as generate_script_file: - generate_script_file.write(rendered_script) - - return rendered_script - -def scan_for_templatefile(base_dir, templated_dir_path, target_name, render_dict = None) : - ''' - In general, it will use the name of the tool and step to scan through - the specific folder for templates that need to be rendered. - ''' - files = [] - fileslist = [] - files = glob.glob(os.path.join(base_dir, templated_dir_path, target_name, "*.jinja")) - for each in files: - fileslist.append(os.path.basename(each)) - - print "In " + base_dir + ":" - for every in fileslist: - outputFileName = os.path.splitext(every) - print "Generate " + outputFileName[0] - outputFilePath = os.path.join(base_dir, outputFileName[0]) - generate_script_from_templatefile(every, outputFilePath,target_name, render_dict) - -def validate_conv_conv ( layerA, layerB ) : - if (len(layerA["iDim"]) == 1 or len(layerB["iDim"]) == 1) : - print "Unexpected: convolution layer with one dimension" - - oDim = layerA["iDim"][0] - if (layerA["pad"] == "same"): - oDim += (layerA["window"][0] - 1) - oDim -= layerA["window"][0] - oDim /= layerA["step"][0] - oDim += 1 - - if (oDim != layerB["iDim"][0]): - print "Incompatible layer: " + layerA["name"] + ", " + layerB["name"] - print "oDim = " + str(oDim) + ", layerB iDim = " + str(layerB["iDim"][0]) - - if (layerA["filters"] != layerB["iDim"][2]): - print "Incompatible layer: " + layerA["name"] + ", " + layerB["name"] - print "filters = " + str(layerA["filters"]) + " iDim = " + str(layerB["iDim"][2]) - -def validate_conv_pool ( layerA, layerB ) : - validate_conv_conv( layerA, layerB) #for now, same implementation as conv_conv - -def validate_conv_full ( layerA, layerB ) : - oDim = layerA["iDim"][0] - if (layerA["pad"] == "same"): - oDim += (layerA["window"][0] - 1) - oDim -= layerA["window"][0] - oDim /= layerA["step"][0] - oDim += 1 - - size = oDim * oDim * layerA["filters"] - - if ( size != layerB["iDim"][0]) : - print "Incompatible layer: " + layerA["name"] + ", " + layerB["name"] - print "oDim = " + str(oDim) + ", layerB iDim = " + str(layerB["iDim"][0]) - -def validate_pool_conv ( layerA, layerB) : - oDim = layerA["iDim"][0] - if (layerA["pad"] == "same"): - oDim += (layerA["window"][0] - 1) - oDim -= layerA["window"][0] - oDim /= layerA["step"][0] - oDim += 1 - - if (oDim != layerB["iDim"][0]): - print "Incompatible layer: " + layerA["name"] + ", " + layerB["name"] - print "oDim = " + str(oDim) + ", layerB iDim = " + str(layerB["iDim"][0]) - - if (layerA["iDim"][2] != layerB["iDim"][2]) : - print "Incompatible layer: " + layerA["name"] + ", " + layerB["name"] - print "filters = " + str(layerA["iDim"][2]) + " iDim = " + str(layerB["iDim"][2]) - -def validate_pool_full( layerA, layerB) : - oDim = layerA["iDim"][0] - if (layerA["pad"] == "same"): - oDim += (layerA["window"][0] - 1) - oDim -= layerA["window"][0] - oDim /= layerA["step"][0] - oDim += 1 - - size = oDim * oDim * layerA["iDim"][2] - - if (size != layerB["iDim"][0]) : - print "Incompatible layer: " + layerA["name"] + ", " + layerB["name"] - print "size = " + str(size) + ", layerB iDim = " + str(layerB["iDim"][0]) - -def validate_full_full( layerA, layerB) : - if (layerA["filters"] != layerB["iDim"][0]) : - print "Incompatible layer: " + layerA["name"] + ", " + layerB["name"] - print "size = " + str(size) + ", layerB iDim = " + str(layerB["iDim"][0]) - -def validate_layer( layerA, layerB) : - if (layerA["layer_type"] == "conv"): - if (layerB["layer_type"] == "conv"): - validate_conv_conv( layerA, layerB) - elif (layerB["layer_type"] == "max_pool"): - validate_conv_pool( layerA, layerB) - elif (layerB["layer_type"] == "fc"): - validate_conv_full( layerA, layerB) - else : - print "Unsupported Layer connection" - elif (layerA["layer_type"] == "max_pool"): - if (layerB["layer_type"] == "conv"): - validate_pool_conv( layerA, layerB) - elif (layerB["layer_type"] == "max_pool"): - print "Unexpected -- two pool layers back to back" - elif (layerB["layer_type"] == "fc"): - validate_pool_full( layerA, layerB) - else: - print "Unsupported Layer connection" - elif (layerA["layer_type"] == "fc"): - if (layerB["layer_type"] == "conv"): - print "Unsupported layer validation" - elif (layerB["layer_type"] == "max_pool"): - print "Unsupported layer validation" - elif (layerB["layer_type"] == "fc"): - validate_full_full( layerA, layerB) - else : - print "Unsupported Layer connection" - else : - print "Unsupported Layer connection" - -def main(builtinParameters = {}): - # parse input arguments - parser = argparse.ArgumentParser(description='DNNOpt code generator') - parser.add_argument('spec', help='location of the spec file') - parser.add_argument('--debug', help='if debugging messages and features are enabled', action='store_true') - parser.add_argument('--verify', help='specify type of verification, single layer or network', action='store') - parser.add_argument('--layer_test_method', help='specify method of verification for single layer: random/golden', action='store') - parser.add_argument('--network_test_method', help='specify method of verification for the network: groundtruth/golden', action='store') - args = parser.parse_args() - cwd = os.getcwd() - - # read input spec file - with open(os.path.join(cwd, args.spec), 'r') as jsondata: - # network_spec = ast.literal_eval(jsondata) - network_spec = json.load(jsondata) - - # parsing arguments - network_spec['flags'] = dict() - # if debugging mode is on - # TODO: improve the error messages - if(args.debug): - print("INFO: Debugging mode") - network_spec['flags']['debug'] = True - if(args.verify == 'layer'): - network_spec['flags']['verify'] = 'layer' - if((args.layer_test_method != 'random') and (args.layer_test_method != 'golden')): - sys.exit('ERROR: unrecognized test method') - else: - network_spec['flags']['test_method'] = args.layer_test_method - if(len(network_spec['network']) > 1): - sys.exit('ERROR: more than one layer given for layer verification') - else: - print('INFO: Generating verification code for single layer') - elif(args.verify == 'network'): - print('INFO: Generating verification code for whole network') - if((args.network_test_method != 'groundtruth') and (args.network_test_method != 'golden')): - sys.exit('ERROR: Unrecognized test method') - else: - network_spec['flags']['test_method'] = args.network_test_method - network_spec['flags']['verify'] = 'network' - elif(args.verify is not None): - # error handling - sys.exit("ERROR: Invalid option for validation mode") - else: - print("INFO: In clean mode") - - # if clustering is enabled - try: - cluster_values_file = network_spec['global']['cluster_values_file'] - print("INFO: Clustering enabled, cluster file: " + cluster_values_file) - with open(os.path.join(cwd, cluster_values_file), 'r') as cluster_file : - #network_spec = ast.literal_eval(jsondata) - cluster = json.load(cluster_file) - except KeyError: - print("WARNNING: Cluster weight file name not found, clustering disabled") - - for idx, layer in enumerate(network_spec['network']): - layer_name = layer['name'] - if (layer['layer_type'] == 'conv') or (layer['layer_type'] == 'fc'): - if layer['preprocess']['cluster'] == True: - network_spec['network'][idx]['cluster_values'] = cluster[layer_name] - - #for index in range(len(network_spec["network"])): - # if (index != 0) : - # validate_layer(network_spec["network"][index-1], network_spec["network"][index]) - - scan_for_templatefile(cwd, "dnngen/templates", "syncpp", network_spec) - -if __name__=='__main__': - main() - diff --git a/DRED_codegen/templates/aux_funcs.inc b/DRED_codegen/templates/aux_funcs.inc deleted file mode 100644 index eb081c69216cc7d71b188ac7890294c5f06609ec..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/aux_funcs.inc +++ /dev/null @@ -1,470 +0,0 @@ -{% import 'global_macros.inc' as gm %} -{% set ident = gm.ident %} -{%- macro fcall_readW(object) -%} -read_weights<int8_t>("{{object.weights.W}}.bin", {{object.wshape[0]}}, {{object.wshape[1]}}, {{object.wshape[2]}}, {{object.wshape[3]}}, (ap_int<512>*)w); -{%- endmacro -%} - -{%- macro fcall_readW_fc(object, prev) -%} -{% if prev.layer_type == 'fc' %} -read_weights_fc<int8_t>("{{object.weights.W}}.bin", 1, 1, {{prev.wshape[1]}}, {{object.wshape[1]}}, (ap_int<512>*)w); -{% elif prev.layer_type == 'max_pool' %} -{%- set stride = prev.weights.stride[1] -%} -{%- set ksize = prev.weights.ksize[1] -%} -{%- set padsize = prev.weights.padsize -%} -{% if prev.mode == 'SAME' %} -{%- set OH = (prev.dshape[0]/stride)|round(0,'ceil')|int -%} -{%- set OW = (prev.dshape[1]/stride)|round(0,'ceil')|int -%} -{% else %} -{%- set OH = ((prev.dshape[0]-ksize+1)/stride)|round(0,'ceil')|int -%} -{%- set OW = ((prev.dshape[1]-ksize+1)/stride)|round(0,'ceil')|int -%} -{% endif %} -read_weights_fc<int8_t>("{{object.weights.W}}.bin", {{OH}}, {{OW}}, {{prev.dshape[2]}}, {{object.wshape[1]}}, (ap_int<512>*)w); -{% endif %} -{%- endmacro -%} - -{%- macro fcall_readb(object) -%} -{%- if object.layer_type == 'conv' %} -read_bias<int8_t>("{{object.weights.b}}.bin", {{object.wshape[3]}}, {{object.weights.b_width}}, {{object.loop_factor}}, {{object.interface.memory_bit_width}}, (ap_int<512>*)w); -{%- elif object.layer_type == 'max_pool' -%} -read_bias<int8_t>("{{object.weights.b}}.bin", {{object.dshape[2]}}, {{object.weights.b_width}}, {{object.loop_factor}}, {{object.interface.memory_bit_width}}, (ap_int<512>*)w); -{%- elif object.layer_type == 'fc' -%} -read_bias<int8_t>("{{object.weights.b}}.bin", {{object.dshape[0]}}, {{object.weights.b_width}}, {{object.op_loop_factor}}, {{object.interface.memory_bit_width}}, (ap_int<512>*)w); -{%- endif -%} -{%- endmacro -%} - -{%- macro fcall_readIp(object) -%} -read_input("./test_data/test_image.bin", {{object.dshape[0]}}, {{object.dshape[1]}}, {{object.dshape[2]}}, a); -{%- endmacro -%} - -{%- macro fimpl_aux() -%} -{# ATTENTION: the following parameters might need to be tweaked - when testing the framework under various bit-width #} -{% set w_bw = 8 %} {# bit-width of the weights#} -{% set b_bw = 8 %} {# bit-width of the biases#} -{% set ip_bw = 8 %}{# bit-width of the input TO THE NETWORK #} -{% set loop_factor = 32 %} {# temporary setting #} -{% set mem_bw = 512 %} {# temporary setting #} -{% set iters_weight = (w_bw*loop_factor*2/mem_bw)|round(0,'ceil')|int %} -template <class T> -void display_4d( - int N, - int C, - int H, - int W, - T arr[] -){ -{% call gm.forloop(1, 'co', 'N') %} -{% call gm.forloop(2, 'ci', 'C') %} -{% call gm.forloop(3, 'fh', 'H') %} -{% call gm.forloop(4, 'fw', 'W') %} -{{gm.ident(5)}}cout << setw(4) << arr[fh*N*C*W+fw*N*C+ci*N+co]; -{%- endcall %} -{{gm.ident(4)}}cout << endl; -{%- endcall %} -{{gm.ident(3)}}cout << endl; -{%- endcall -%} -{%- endcall %} -} - -template <class T> -void display_3d( - int H, - int W, - int C, - T arr[] -){ -{% call gm.forloop(1, 'c', 'C') %} -{% call gm.forloop(2, 'h', 'H') %} -{% call gm.forloop(3, 'w', 'W') %} -{{gm.ident(4)}}cout << setw(4) << (int)arr[c*H*W+h*W+w]; -{%- endcall -%} -{{gm.ident(3)}}cout << endl; -{%- endcall -%} -{{gm.ident(2)}}cout << endl; -{%- endcall -%} -} - -{# Golden model to compare to #} -template <class T> -void conv_2d(T output[], - T kernel[], - T input[], - int FH, //Filter height - int FW, //Filter width - int IH, //Input height - int IW, //Input width - int CI, //Input channels - int OH, //Output Height(after padding) - int OW, //Output Width (after padding) - int CO, //Output Channels - int S) //Stride -{ -T temp, local_sum; -/**************main loop body****************************/ -{% call gm.forloop(1, 'co', 'CO') %} -{% call gm.forloop(2, 'out_h', 'OH') %} -{% call gm.forloop(3, 'out_w', 'OW') %} -{{gm.ident(4)}}local_sum = 0; -{% call gm.forloop(4, 'ci', 'CI') %} -{% call gm.forloop(5, 'fh', 'FH') %} -{% call gm.forloop(6, 'fw', 'FW') %} -{# Note: The order of dimension for input is CHW and HWCN for the kernel#} -{{gm.ident(7)}}temp = input[ci*IH*IW+(out_h*S+fh)*IW+(out_w*S+fw)]; -{{gm.ident(7)}}local_sum += kernel[fh*(FW*CI*CO)+fw*(CI*CO)+ci*CO+co]*temp; -{% endcall %} -{% endcall %} -{% endcall %} -{{gm.ident(4)}}output[co*OH*OW+out_h*OW+out_w] = local_sum; -{% endcall %} -{% endcall %} -{% endcall %} -} - -{# Golden model for Fully Connected Layer, essentially a Mat-Vec Multiplication #} -template <class T> -void fc(T output[], - T weight[], - T input[], - int IW, - int OW) -{ -{% call gm.forloop(1, 'ow', 'OW') %} -{% call gm.forloop(2, 'iw', 'IW') %} -{{gm.ident(3)}}output[ow] += input[iw]*weight[iw*OW+ow]; -{%- endcall -%} -{%- endcall -%} -} - -{# since there's no need to parameterize these functions - leave them as is here #} -template <class T> -void relu_gold( - int IH, - int IW, - int IC, - int padding, - T in[], - T bias[], - T out[] - ){ - int OW = IW + 2 * padding; - int OH = IH + 2 * padding; - - for(int c = 0;c < IC;c ++){ - for(int i = 0;i < padding*OW;i ++){ - out[c*OH*OW+i] = 0; - } - for(int h = 0;h < IH;h ++){ - for(int i = 0;i < padding;i ++){ - out[c*OH*OW+(h+padding)*OW+i] = 0; - } - for(int w = 0;w < IW;w ++){ - T temp = in[c*IH*IW+h*IW+w]; - if(temp > 0){ - out[c*OH*OW+(h+padding)*OW+w+padding] = temp + bias[c]; - } else { - out[c*OH*OW+(h+padding)*OW+w+padding] = 0; - } - } - for(int i = 0;i < padding;i ++){ - out[c*OH*OW+(h+padding)*OW+IW+padding+i] = 0; - } - } - for(int i = 0;i < padding*OW;i ++){ - out[c*OH*OW+(IH+padding)*OW+i] = 0; - } - } -} - -template <class T> -void maxpool_gold( - int IH, - int IW, - int IC, - int padding, - int k_size, - int stride, - bool is_same, - T in[], - T bias[], - T out[] - ){ - int OW, OH, w_start, w_end, end; - - if(is_same){ - OW = ceil((float)IW/(float)stride) + 2 * padding; - OH = ceil((float)IH/(float)stride) + 2 * padding; - w_start = -(k_size-1)/2; - w_end = (k_size-1)/2 + 1; - end = IW; - } else { - OW = ceil((float)(IW - k_size + 1)/(float)stride + 2 * padding); - OH = ceil((float)(IH - k_size + 1)/(float)stride + 2 * padding); - w_start = 0; - w_end = k_size; - end = IW - k_size + 1; - } - - for(int c = 0;c < IC;c ++){ - for(int i = 0;i < padding*OW;i ++){ - out[c*OH*OW+i] = 0; - } - for(int h = 0;h < end;h += stride){ - for(int i = 0;i < padding;i ++){ - out[c*OH*OW+(h/stride+padding)*OW+i] = 0; - } - for(int w = 0;w < end;w += stride){ - T rst = 0; - for(int hh = w_start;hh < w_end;hh ++){ - for(int ww = w_start;ww < w_end;ww ++){ - if (!(((hh+h)<0)||((ww+w)<0)||((hh+h)>=IH)||((ww+w)>=IW))){ - T temp = in[c*IH*IW+(h+hh)*IW+(w+ww)] + bias[c]; - if(temp > rst) rst = temp; - } - } - } - out[c*OH*OW+(h/stride+padding)*OW+w/stride+padding] = rst; - } - for(int i = 0;i < padding;i ++){ - out[c*OH*OW+(h/stride+padding)*OW+OW-padding+i] = 0; - } - } - for(int i = 0;i < padding*OW;i ++){ - out[c*OH*OW+(OH-padding)*OW+i] = 0; - } - } -} - -{# Universal data read function, read in data from *.bin #} -template<class T> -int read_data(const char *path, T* array, int size){ - FILE *fp; - fp = fopen(path,"rb"); - if(fp == NULL) { - printf("Input file not found\n"); abort(); - } - int ret; - ret = fread(array,sizeof(T), size, fp); - fclose(fp); - return ret; -} - -{# Function that packs the data from a regular array - This function is also used by the validation functions #} -template<class T> -void pack_weights(int FH, int FW, int CII, int COO, T in[], ap_int<{{mem_bw}}>out[50176]){ -{{gm.ident(1)}}ap_int<{{w_bw*loop_factor}}> outPack1 = 0; -{{gm.ident(1)}}ap_int<{{w_bw*loop_factor}}> outPack2 = 0; -{% call gm.forloop(1, 'i', '50176') %} -{{gm.ident(1)}}out[i] = 0; -{%- endcall %} -{{gm.ident(1)}}int address = 0; -{% call gm.forloop(1, 'fh', 'FH') %} -{% call gm.forloop(2, 'fw', 'FW') %} -{% call gm.forloop_s(3, 'cii', 'CII', loop_factor) %} -{{gm.ident(3)}}//input is packed by {{loop_factor}} -{% call gm.forloop_s(4, 'coo', 'COO', 2) %} -{% call gm.forloop(5, 'i', loop_factor) %} -{# Since the computatino requires 2 packs of weight for each loop cycle - we need to pack 2 packs in each batch - Notice that we assumes the number of output layers to be even #} -//Pack data in the array -{{gm.ident(6)}}if(cii+i < CII){ -{{gm.ident(7)}}outPack1.{{gm.pr_range_idx(w_bw, 'i')}} = -{{gm.ident(8)}}in[fh*(FW*CII*COO)+fw*(CII*COO)+(cii+i)*COO+coo]; -{{gm.ident(7)}}outPack2.{{gm.pr_range_idx(w_bw, 'i')}} = -{{gm.ident(8)}}in[fh*(FW*CII*COO)+fw*(CII*COO)+(cii+i)*COO+coo+1]; -{{gm.ident(6)}}} else { -{{gm.ident(7)}}outPack1.{{gm.pr_range_idx(w_bw, 'i')}} = 0; //pad 0 -{{gm.ident(7)}}outPack2.{{gm.pr_range_idx(w_bw, 'i')}} = 0; //pad 0 -{{gm.ident(6)}}} -{%- endcall -%} -{{gm.ident(5)}}//write pack to the buffer according to mem bitwidth -{%for idx in range(0,iters_weight)%} -{# if this is not the last pack#} -{% if ((idx+1)*(mem_bw/2) < w_bw*loop_factor) %} -{{gm.ident(5)}}out[address+{{idx}}].{{gm.pr_range_bound((mem_bw/2-1)|int,0)}} = outPack1.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}; -{{gm.ident(5)}}out[address+{{idx}}].{{gm.pr_range_bound(mem_bw-1,(mem_bw/2)|int)}} = outPack2.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}; -{# if this is the last pack#} -{% else %} -{{gm.ident(5)}}out[address+{{idx}}].{{gm.pr_range_bound(w_bw*loop_factor-1-(idx*mem_bw/2)|int,0)}} = outPack1.{{gm.pr_range_bound(w_bw*loop_factor-1,(idx*mem_bw/2)|int)}}; -{{gm.ident(5)}}out[address+{{idx}}].{{gm.pr_range_bound(w_bw*loop_factor-1-((idx-1)*mem_bw/2)|int,(mem_bw/2)|int)}} = outPack2.{{gm.pr_range_bound(w_bw*loop_factor-1,(idx*mem_bw/2)|int)}}; -{%endif%} -{%endfor%} -{{gm.ident(5)}}address += {{iters_weight}}; -{%- endcall -%} -{%- endcall -%} -{%- endcall -%} -{%- endcall -%} -} - -template<class T> -void read_weights(const char* path, int FH, int FW, int CII, int COO, ap_int<{{mem_bw}}>out[50176]){ -{{gm.ident(1)}}T in[FH*FW*CII*COO]; -{{gm.ident(1)}}read_data<T>(path, (T*)in, FH*FW*CII*COO); -{{gm.ident(1)}}pack_weights<T>(FH, FW, CII, COO, in, out); -} - -{# this function is parameterized with runtime parameters, - so it does not need to be parameterized in template #} -template<class T> -void pack_input(int H, int W, int C, int padSize, int packSize, int bitWidth,T in[], ap_int<512> out[50176]){ - int OH = H + 2*padSize; - int OW = W + 2*padSize; - int numBufLines = ceil((float)(packSize*bitWidth)/(float)512); //512 is the bit-width of one buffer line - int numbersPerLine = 512/bitWidth; - - for (int c=0; c < C; c++) { - int packIdx = c / packSize; - int lineIdx = (c % packSize)/numbersPerLine; - int linePos = (c % packSize)%numbersPerLine; - //pad leading zero rows - for (int i=0; i < padSize*OW*numBufLines; i++) { - out[(c*OH*OW)*numBufLines+i]=0; - } - for (int h=0; h < H; h++) { - // pad zeros in front of each line - for (int i=0; i < padSize*numBufLines; i++) { - out[(c*OH*OW+(padSize+h)*OW)*numBufLines+i]=0; - } - for (int w=0; w < W; w++) { - out[(packIdx*OH*OW+(padSize+h)*OW+padSize+w)*numBufLines+lineIdx].range(bitWidth*linePos+bitWidth-1,bitWidth*linePos) = - in[h*W*C+w*C+c]; - } - //pad zeros at the end of each line - for (int i=0; i < padSize*numBufLines; i++) { - out[(c*OH*OW+(padSize+h)*OW+W+padSize)*numBufLines+i]=0; - } - - } - for (int i=0; i < padSize*OW*numBufLines; i++) { - out[(c*OH*OW+(padSize+H)*OW)*numBufLines+i]=0; - } - - } -} - -template<class T> -void read_input(const char* path, int IH, int IW, int CII, int pad, ap_int<512> out[50176]){ -{{gm.ident(1)}}T in[CII*IH*IW]; -{{gm.ident(1)}}read_data<T>(path, (T*)in, IH*IW*CII); -{{gm.ident(1)}}//display_3d<T>(IH, IW, CII, in); -{{gm.ident(1)}}/* ATTENTION: -{{gm.ident(1)}} * pad size set to 2 for LeNet, remember to change this for other nets -{{gm.ident(1)}} * input loop factor is set to 32, bitWidth of input is 8 -{{gm.ident(1)}} */ -{{gm.ident(1)}}pack_input<T>(IH, IW, CII, pad, 32, 8, in, out); -} - -template<class T> -void pack_weights_fc(int IH, int IW, int CII, int COO, T in[], ap_int<512>out[50176]){ -{{gm.ident(1)}}int address = 0; -{{gm.ident(1)}}ap_int<{{w_bw*loop_factor}}> outPack1 = 0; -{{gm.ident(1)}}ap_int<{{w_bw*loop_factor}}> outPack2 = 0; -{% call gm.forloop(1, 'i', '50176') %} -{{gm.ident(1)}}out[i] = 0; -{%- endcall %} -{% call gm.forloop_s(1, 'cii', 'CII', loop_factor) %} -{% call gm.forloop(2, 'h', 'IH') %} -{% call gm.forloop(3, 'w', 'IW') %} -{{gm.ident(3)}}//input is packed by {{loop_factor}} -{% call gm.forloop_s(4, 'coo', 'COO', 2) %} -{% call gm.forloop(5, 'ci', loop_factor) %} -{# Same as read_weights function for conv above#} -{{gm.ident(6)}}if(cii+ci < CII){ -{{gm.ident(7)}}outPack1.{{gm.pr_range_idx(w_bw, 'ci')}}=in[h*CII*COO*IW+w*CII*COO+(cii+ci)*COO+coo]; -{{gm.ident(7)}}outPack2.{{gm.pr_range_idx(w_bw, 'ci')}}=in[h*CII*COO*IW+w*CII*COO+(cii+ci)*COO+coo+1]; -{{gm.ident(6)}}} else { -{{gm.ident(7)}}outPack1.{{gm.pr_range_idx(w_bw, 'ci')}} = 0; -{{gm.ident(7)}}outPack2.{{gm.pr_range_idx(w_bw, 'ci')}} = 0; -{{gm.ident(6)}}} -{%- endcall -%} -{{gm.ident(5)}}//write pack -{% set iters_weight = (w_bw*loop_factor*2/mem_bw)|round(0,'ceil')|int %} -{%for idx in range(0,iters_weight)%} -{# if this is not the last pack#} -{% if ((idx+1)*(mem_bw/2) < w_bw*loop_factor) %} -{{gm.ident(5)}}out[address+{{idx}}].{{gm.pr_range_bound((mem_bw/2-1)|int,0)}} = outPack1.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}; -{{gm.ident(5)}}out[address+{{idx}}].{{gm.pr_range_bound(mem_bw-1,(mem_bw/2)|int)}} = outPack2.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}; -{# if this is the last pack#} -{% else %} -{{gm.ident(5)}}out[address+{{idx}}].{{gm.pr_range_bound(w_bw*loop_factor-1-(idx*mem_bw/2)|int,0)}} = outPack1.{{gm.pr_range_bound(w_bw*loop_factor-1,(idx*mem_bw/2)|int)}}; -{{gm.ident(5)}}out[address+{{idx}}].{{gm.pr_range_bound(w_bw*loop_factor-1-((idx-1)*mem_bw/2)|int,(mem_bw/2)|int)}} = outPack2.{{gm.pr_range_bound(w_bw*loop_factor-1,(idx*mem_bw/2)|int)}}; -{%endif%} -{%endfor%} -{{gm.ident(5)}}address += {{iters_weight}}; -{% endcall -%} -{% endcall -%} -{% endcall -%} -{% endcall -%} -} - -template <class T> -void read_weights_fc(const char* path, int IH, int IW, int CII, int COO, ap_int<512>out[50176]){ -{{gm.ident(1)}}T in[IH*IW*CII*COO]; //notice that IW IH CII are for the layer before flatten for convenience -{{gm.ident(1)}}read_data<T>(path, (T*)in, IH*IW*CII*COO); -{{gm.ident(1)}}pack_weights_fc(IH, IW, CII, COO, in, out); -} - -template <class T> -void read_bias(const char* path, int COO, int bw, int loop_factor, int mem_bw, ap_int<512>out[50176]){ - T array[COO]; - ap_int<1024> outPack = 0; //a relatively big buffer - read_data<T>(path, (T*)array, COO); - - for (int i=0;i<50176;i++){ - out[i] = 0; - } - int address = 0; - for(int coo=0;coo<COO;coo+=loop_factor){ - for(int i=0;i<loop_factor;i++){ - if(coo+i < COO){ - outPack.range(bw-1+i*bw,i*bw) = array[coo+i]; - } else { - outPack.range(bw-1+i*bw,i*bw) = 0; //pad 0 - } - } - int iters_bias = ceil((float)(bw*loop_factor)/(float)mem_bw); - for(int i = 0;i < iters_bias;i ++){ - if((i+1)*mem_bw <= bw*loop_factor){ - out[address].range(mem_bw-1,0) = outPack.range((i+1)*mem_bw-1,i*mem_bw); - } else { - out[address].range(bw*loop_factor-i*mem_bw-1,0) = outPack.range(bw*loop_factor-1, i*mem_bw); - } - address ++; - } - } -} - -template<int mem_bw, int bw> -bool compare_almost_equal(int H, int W, int C, int loop_factor, int ref[], xcelo_uint<512> real[]){ - int iters = ceil((float)(bw*loop_factor)/(float)mem_bw); - int num_per_pack = mem_bw/bw; - //CC is number of channels after packing - int CC = ceil((float)C/(float)loop_factor); - bool flag = false; - - for(int cc = 0;cc < CC;cc ++){ - for(int i = 0;i < loop_factor;i ++){ - for(int h = 0;h < H;h ++){ - for(int w = 0;w < W;w ++){ - //NOTE: assume mem_bw is 512 for now, might change this value in the future - ap_int<mem_bw> temp = ((ap_int<mem_bw>*)real)[(cc*H*W+h*W+w)*iters]; - if((cc*loop_factor + i) < C){ - ap_int<mem_bw> temp = ((ap_int<mem_bw>*)real)[(cc*H*W+h*W+w)*iters + i/num_per_pack]; - ap_int<bw> temp_real = temp.range(i*bw+(bw-1), i*bw); - ap_int<bw> temp_ref = ref[(cc*loop_factor+i)*H*W+h*W+w]; - //NOTE: for now fix the error threshold at 2, might decide to make this configurable in the future - if(abs(temp_real - temp_ref) > 2){ - cout << "[" << (cc*loop_factor+i) << "][" << h << "][" << w << "]"; - cout << "Diff: Real--" << temp_real << " Ref--" << temp_ref << endl; - flag = true; - } - } - } - } - } - } - return flag; -} -{% endmacro %} \ No newline at end of file diff --git a/DRED_codegen/templates/avg_pool.inc b/DRED_codegen/templates/avg_pool.inc deleted file mode 100644 index f2a060ecb1f664c1e989140831e913fc1ea3fe03..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/avg_pool.inc +++ /dev/null @@ -1,35 +0,0 @@ -{%- macro handle_preprocess(object, name) -%} - {%- if name in object.keys() %} - {%- for key, value in object.iteritems() %} - {%- if key == name %} - {%- if value > 0 %} - {%- endif %} - {%- endif %} - {%- endfor %} - {%- endif %} -{%- endmacro %} - -{%- macro fdecl(object) -%} -void {{object.name}}_avg_pool(arguments); -{%- endmacro %} - -{%- macro var_decl(object) -%} - {%- if object.preprocess %} - {%- else %} - {%- endif %} -{%- endmacro %} - -{%- macro fvalid_call(object) %} -{%- endmacro %} - -{%- macro fcall(object) -%} -{{object.name}}_avg_pool(); -{%- endmacro %} - -{%- macro fimpl(object) -%} -void {{object.name}}_avg_pool(arguments) {}; -{%- endmacro %} - - -{%- macro fvalid(object) -%} -{%- endmacro %} \ No newline at end of file diff --git a/DRED_codegen/templates/batch_norm.inc b/DRED_codegen/templates/batch_norm.inc deleted file mode 100644 index abc4bc0ac6d31e0ee7874f00d3235ffaaa55e12a..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/batch_norm.inc +++ /dev/null @@ -1,161 +0,0 @@ -{% import "global_macros.inc" as gm %} -{% import "aux_funcs.inc" as aux %} - -{%- macro handle_preprocess(object, name) -%} - {%- if name in object.keys() %} - {%- for key, value in object.iteritems() %} - {%- if key == name %} - {%- if value > 0 %} - {%- endif %} - {%- endif %} - {%- endfor %} - {%- endif %} -{%- endmacro %} - -{%- macro fdecl(object) -%} -{%- endmacro %} - -{%- macro var_decl(object) -%} - {%- if object.preprocess %} - {%- else %} - {%- endif %} -{%- endmacro %} - -{% macro fvalid_call(object) %} -{{gm.ident(1)}}{{object.name}}_test_pool(); -{{gm.ident(1)}}{{object.name}}_test_pool_flatten(); -{% endmacro %} - -{# defines paramters passed in when it get called#} -{%- macro fcall(object, debug) -%} -{%- set loop_factor = object.loop_factor -%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set channels = (object.dshape[2]/loop_factor)|round(0,'ceil')|int-%} -{{gm.ident(1)}}{{object.name}}_layer ({{object.dshape[0]}}, {{object.dshape[1]}}, {{channels}}, {{object.weights.mean}}, {{object.weights.dev_inv}}, {{object.weights.gamma}}, {{object.weights.beta}}, -{%- if object.interface.in == 'dram' %} - a, -{%- elif object.interface.in == 'onchip' %} - a_onchip, -{%- endif %} -{%- if object.interface.int == 'dram' %} - c -{%- elif object.interface.int == 'onchip' %} - c_onchip -{%- endif %} -); -{%- endmacro %} - -{% macro readFunc(layer, name, mem, bw, loop_factor)%} -{%- set iters = (bw*loop_factor/mem)|round(0,'ceil')|int -%} -int read{{name}}_{{layer}}({{gm.fixed_int(mem)}} *mem, - {{gm.fixed_int(bw*loop_factor)}} &input, - int address) -{ - int index=0; - //Read requires {{iters}} iterations -{%for idx in range(0,iters)%} -{# if this is not the last pack#} -{%if ((idx+1)*mem < bw*loop_factor) %} - input.{{gm.pr_range_bound((idx+1)*mem-1,idx*mem)}}=mem[address+index].{{gm.pr_range_bound((mem-1),0)}}; - index ++; -{# if this is the last pack#} -{% else %} - input.{{gm.pr_range_bound(bw*loop_factor-1,idx*mem)}}=mem[address+index].{{gm.pr_range_bound(bw*loop_factor-idx*mem-1,0)}}; -{%endif%} -{%endfor%} - return address; -} -{% endmacro %} - -{% macro writeFunc(layer, name, mem, bw, loop_factor)%} -{%- set iters = (bw*loop_factor/mem)|round(0,'ceil')|int -%} -int write{{name}}_{{layer}}({{gm.fixed_int(mem)}} *mem, - {{gm.fixed_int(bw*loop_factor)}} output, - int address) -{ - int index=0; - //Write requires {{iters}} iterations -{%for idx in range(0,iters)%} -{# if this is not the last pack#} -{%if ((idx+1)*mem < bw*loop_factor) %} - mem[address+index].{{gm.pr_range_bound((mem-1),0)}} = output.{{gm.pr_range_bound((idx+1)*mem-1,idx*mem)}}; - index ++; -{# if this is the last pack#} -{% else %} - mem[address+index].{{gm.pr_range_bound(bw*loop_factor-idx*mem-1,0)}}=output.{{gm.pr_range_bound(bw*loop_factor-1,idx*mem)}}; -{%endif%} -{%endfor%} - return address; -} -{% endmacro %} - -{%- macro fimpl(object) -%} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set op_shift = object.preprocess.op_shift -%} -{%- set loop_factor = object.loop_factor -%} -{%- set mem_bw = object.interface.memory_bit_width -%} -{# internal bw is the "safe" bit width for the result of multiplication #} -{%- set iters_input = (ip_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_output = (op_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} - -{{readFunc(object.name, "Input", mem_bw, ip_bw, loop_factor)}} -{{writeFunc(object.name, "Output", mem_bw, op_bw, loop_factor)}} - -void {{object.name}}_kernel ( -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'input', loop_factor)}}, -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'output', loop_factor)}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'mean')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'dev_inv')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'gamma')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'beta')}}) -{ -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'numerator', loop_factor)}}; -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'quotient', loop_factor)}}; - -{{gm.ident(1)}}// Here dev_inv is 1/sqrt(std.deviation + epsilon) -{{gm.ident(1)}}// this value can be taken as std.deviation, we need to calculate this outside this module and then pass it to here -{{gm.ident(1)}}// because it involves sqrt operation and division. -{{gm.ident(1)}}// the multiplication with dev_inv is equivalent to division by sqrt(std.deviation + epsilon) -{% call gm.forloop(1, 'i', loop_factor) %} -{{gm.ident(2)}}numerator[i] = input[i] - mean; -{{gm.ident(2)}}quotient[i] = (numerator[i]*dev_inv); //DATA RANGE ERROR!! -{{gm.ident(2)}}output[i] = gamma*(quotient[i])+beta; -{% endcall %} -} - -void {{object.name}}_layer( -{{gm.ident(1)}}int IH, -{{gm.ident(1)}}int IW, -{{gm.ident(1)}}int IC, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'mean')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'dev_inv')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'gamma')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'beta')}}, -{{gm.ident(1)}}ap_int<{{mem_bw}}>input[50176], -{{gm.ident(1)}}ap_int<{{mem_bw}}>output[50176]) -{ -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'inTemp', loop_factor)}}; -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'outTemp', loop_factor)}}; -{{gm.ident(1)}}{{gm.fi_vdecl(ip_bw*loop_factor, 'inPackTemp')}}; -{{gm.ident(1)}}{{gm.fi_vdecl(mem_bw, 'outPackTemp')}}; -{% call gm.forloop(1, 'i', 'IH*IW*IC') %} //FLATTENED -{{gm.ident(2)}}readInput_{{object.name}}(input,inPackTemp,i*{{iters_input}}); -{% call gm.forloop(2, 'j', loop_factor) %} {# assume loop factor is the same for all BN layers#} -{{gm.ident(3)}}inTemp[i].{{gm.pr_range(ip_bw)}}=inPackTemp.{{gm.pr_range_idx(ip_bw, 'j')}}; -{%- endcall -%} -{{gm.ident(2)}}{{object.name}}_kernel(inTemp, outTemp, mean, dev_inv, gamma, beta);// perform batch normalization -{% call gm.forloop(2, 'j', loop_factor) %} -{{gm.ident(3)}}outPackTemp.{{gm.pr_range_idx(ip_bw, 'j')}} = outTemp[i].{{gm.pr_range(ip_bw)}}; -{%- endcall -%} -{{gm.ident(2)}}writeOutput_{{object.name}}(output,outPackTemp,i*{{iters_output}}); -{%- endcall -%} -} -{%- endmacro -%} - -{%- macro fvalid(object) -%} -void {{object.name}}_test_BN() { -//Not yet implemented -} -{%- endmacro %} \ No newline at end of file diff --git a/DRED_codegen/templates/conv.jinja b/DRED_codegen/templates/conv.jinja deleted file mode 100644 index 2accb3f8a23d4e61bbfdb9f718c90f6edb5d3e84..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/conv.jinja +++ /dev/null @@ -1,278 +0,0 @@ -{% import 'global_macros.inc' as gm %} -{% import 'aux_funcs.inc' as aux %} - -{%- macro gen_red_tree_vdecl(size, dtype) %} -{%- for level in range(size, 1, -1) %} -{% if (size % level) == 0 %} -{{gm.ident(1)}}{{dtype}} inter{{level}}[2][{{level}}]; -{{gm.ident(1)}}#pragma HLS ARRAY_PARTITION variable=inter{{level}} complete dim=1 -{{gm.ident(1)}}#pragma HLS ARRAY_PARTITION variable=inter{{level}} complete dim=2 -{% endif %} -{%- endfor %} -{%- endmacro %} - -{%- macro gen_red_tree_impl(size, dtype) %} -{%- for level in range(size, 1, -1) %} -{%- if (size % level) == 0 %} -{% if (level != size) %} -{{gm.ident(1)}}for (int i=0; i<{{level}}; i++) { -{{gm.ident(2)}}#pragma HLS unroll -{{gm.ident(2)}}inter{{level}}[0][i]=inter{{level*2}}[0][i*2]+inter{{level*2}}[0][i*2+1]; -{{gm.ident(2)}}inter{{level}}[1][i]=inter{{level*2}}[1][i*2]+inter{{level*2}}[1][i*2+1]; -{{gm.ident(1)}}} -{%- endif %} -{%- endif %} -{%- endfor %} -{%- endmacro %} - -{% macro readFunc(layer, name, mem, bw, loop_factor)%} -{# Number of packs required to read one pack#} -{%- set iters = (bw*loop_factor/mem)|round(0,'ceil')|int -%} -int read{{name}}_{{layer}}({{gm.fixed_int(mem)}} *mem, - {{gm.fixed_int(bw*loop_factor)}} &input, - int address) -{ - int index=0; - //Read requires {{iters}} iterations -{%for idx in range(0,iters)%} -{# if this IS NOT the last pack#} -{%if ((idx+1)*mem < bw*loop_factor) %} - input.{{gm.pr_range_bound((idx+1)*mem-1,idx*mem)}}=mem[address+index].{{gm.pr_range_bound((mem-1),0)}}; - index ++; -{# if this IS the last pack#} -{% else %} - input.{{gm.pr_range_bound(bw*loop_factor-1,idx*mem)}}=mem[address+index].{{gm.pr_range_bound(bw*loop_factor-idx*mem-1,0)}}; -{%endif%} -{%endfor%} - return address; -} -{% endmacro %} - -{# Macro that implement the memory write & pack function #} -{% macro writeFunc(layer, name, mem, bw, loop_factor)%} -{%- set iters = (bw*loop_factor/mem)|round(0,'ceil')|int -%} -int write{{name}}_{{layer}}({{gm.fixed_int(mem)}} *mem, - {{gm.fixed_int(bw*loop_factor)}} output, - int address) -{ - int index=0; - //Write requires {{iters}} iterations -{%for idx in range(0,iters)%} -{# if this IS NOT the last pack#} -{%if ((idx+1)*mem < bw*loop_factor) %} - mem[address+index].{{gm.pr_range_bound((mem-1),0)}} = output.{{gm.pr_range_bound((idx+1)*mem-1,idx*mem)}}; - index ++; -{# if this IS the last pack#} -{% else %} - mem[address+index].{{gm.pr_range_bound(bw*loop_factor-idx*mem-1,0)}}=output.{{gm.pr_range_bound(bw*loop_factor-1,idx*mem)}}; -{%endif%} -{%endfor%} - return address; -} -{% endmacro %} - -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set int_bw = object.preprocess.int_bit_width-%} -{%- set int_shift = object.preprocess.int_shift-%} -{%- set w_bw = object.weights.W_width-%} -{%- set w_shift = object.weights.W_shift-%} - -{%- set ip_loop_factor = object.ip_loop_factor -%} -{%- set op_loop_factor = object.op_loop_factor -%} -{%- set op_unroll_factor = object.op_unroll_factor -%} -{%- set converge_unroll_factor = object.converge_unroll_factor-%} -{%- set compengine_unroll_factor = object.compengine_unroll_factor -%} - - -{%- set IH = object.IH -%} -{%- set IW= object.IW -%} -{%- set CII = object.CII -%} -{%- set OH = object.OH -%} -{%- set OW = object.OW -%} -{%- set COO = object.COO -%} -{%- set FH = object.FH -%} -{%- set FW = object.FW -%} -{%- set stride = object.stride -%} - -{%- set mem_bw = object.memory_bit_width -%} -{%- set internal_bw = w_bw+ip_bw -%} - -{%- set iters_weight = (w_bw*ip_loop_factor*2/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_bias = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_input = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_itrm = (int_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} - -{# Implement the memory read & unpack function #} -{# see the overview doc for more detail #} - -#include "ap_int.h" - -int {{object.name}}_read_weight({{gm.fixed_int(mem_bw)}} *mem, - {{gm.fixed_int(w_bw*ip_loop_factor)}} &weight1, - {{gm.fixed_int(w_bw*ip_loop_factor)}} &weight2, - int address) -{ - int index=0; - //Input loop factor is {{ip_loop_factor}} - //Length of pack is {{w_bw*ip_loop_factor}} - //Read requires {{iters_weight}} iterations -{%for idx in range(0,iters_weight)%} -{# if this is not the last pack#} -{%if ((idx+1)*(mem_bw/2) < w_bw*ip_loop_factor) %} - weight1.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound((mem_bw/2-1)|int,0)}}; - weight2.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(mem_bw-1,(mem_bw/2)|int)}}; - index+=1; -{# if this is the last pack#} -{% else %} - weight1.{{gm.pr_range_bound(w_bw*ip_loop_factor-1,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(w_bw*ip_loop_factor-1-(idx*mem_bw/2)|int,0)}}; - weight2.{{gm.pr_range_bound(w_bw*ip_loop_factor-1,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(w_bw*ip_loop_factor-1-((idx-1)*mem_bw/2)|int,(mem_bw/2)|int)}}; -{%endif%} -{%endfor%} - return address; -} - -{# Declare Read functions for this layer#} -{{readFunc(object.name, "Input", mem_bw, ip_bw, ip_loop_factor)}} -{{readFunc(object.name,"Itrm", mem_bw, int_bw, op_loop_factor)}} -{{writeFunc(object.name,"Itrm", mem_bw, int_bw, op_loop_factor)}} - -void {{object.name}}_pack_dsp( - {{gm.f_vdecl(internal_bw, internal_bw,'&rst1')}}, - {{gm.f_vdecl(internal_bw, internal_bw,'&rst2')}}, - {{gm.fi_vdecl(w_bw, 'a')}}, - {{gm.fi_vdecl(w_bw, 'b')}}, - {{gm.fi_vdecl(ip_bw, 'c')}} -) { - {{gm.fi_vdecl(18+w_bw, 'ai')}} = 0; - {{gm.fi_vdecl(w_bw, 'bi')}}; - {{gm.fi_vdecl(ip_bw, 'ci')}}; - {{gm.fi_vdecl(45, 'rst')}}; - {{gm.fui_vdecl(19, 'cst')}}; - - #pragma HLS RESOURCE variable=rst core=DSP48 - #pragma HLS pipeline II=1 - ai.{{gm.pr_range(w_bw)}}=a.{{gm.pr_range(w_bw)}}; - bi.{{gm.pr_range(w_bw)}}=b.{{gm.pr_range(w_bw)}}; - ci.{{gm.pr_range(ip_bw)}}=c.{{gm.pr_range(ip_bw)}}; - - if ( (ci!=0) && (bi!=0) && (ci.sign() != bi.sign())) { - cst=0x40000; - } else { - cst=0; - } - rst = ( (ai<<18) + bi) * ci + cst; - rst1.{{gm.pr_range(internal_bw)}}=rst.{{gm.pr_range_bound(18+internal_bw-1,18)}}; - rst2.{{gm.pr_range(internal_bw)}}=rst.{{gm.pr_range(internal_bw)}}; -} - -void {{object.name}}_out_converge({{gm.fi_vdecl(op_loop_factor*int_bw, '&outpack')}}, {{gm.f_vdecl_arr(internal_bw,internal_bw,'out_temp',op_loop_factor)}}, int right_shift) { - #pragma HLS pipeline - #pragma HLS ARRAY_PARTITION variable=out_temp complete dim=1 - {{gm.f_vdecl_arr(int_bw, int_bw,'temp_1',op_loop_factor)}}; - {{gm.f_vdecl_arr(internal_bw, internal_bw,'temp_2',op_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=temp_1 complete dim=1 - #pragma HLS ARRAY_PARTITION variable=temp_2 complete dim=1 -{% call gm.forloop(1,'i',op_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll factor={{converge_unroll_factor}} -{{gm.ident(2)}}temp_1[i].{{gm.pr_range(int_bw)}}=outpack.{{gm.pr_range_idx(int_bw, 'i')}}; -{{gm.ident(2)}}temp_2[i].{{gm.pr_range(internal_bw)}}=out_temp[i].{{gm.pr_range(internal_bw)}}; -{{gm.ident(2)}}temp_1[i]=temp_1[i] + (temp_2[i] >> right_shift); -{%- endcall -%} -{% call gm.forloop(1,'i',op_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll factor={{converge_unroll_factor}} -{{gm.ident(2)}}outpack.{{gm.pr_range_idx(int_bw, 'i')}}=temp_1[i].{{gm.pr_range(int_bw)}}; -{%- endcall -%} -} - -void {{object.name}}_compengine_row( - {{gm.f_vdecl(internal_bw,internal_bw,'&out1')}}, - {{gm.f_vdecl(internal_bw,internal_bw,'&out2')}}, - {{gm.fi_vdecl(ip_loop_factor*ip_bw, 'InPack')}}, - {{gm.fi_vdecl(w_bw*ip_loop_factor, 'weightPack1')}}, - {{gm.fi_vdecl(w_bw*ip_loop_factor, 'weightPack2')}} -) { - {{gm.fi_vdecl_arr(w_bw, 'weight1', ip_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=weight1 complete dim=1 - {{gm.fi_vdecl_arr(w_bw, 'weight2', ip_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=weight2 complete dim=1 - {{gm.fi_vdecl_arr(ip_bw, 'in', ip_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=in complete dim=1 - {{gen_red_tree_vdecl(ip_loop_factor, gm.fixed(internal_bw,internal_bw))}} - #pragma HLS pipeline II=2 -{% call gm.forloop(1, 'i', ip_loop_factor) %} -{{gm.ident(2)}}in[i].{{gm.pr_range(ip_bw)}}=InPack.{{gm.pr_range_idx(ip_bw,'i')}}; -{{gm.ident(2)}}weight1[i].{{gm.pr_range(w_bw)}}=weightPack1.{{gm.pr_range_idx(w_bw,'i')}}; -{{gm.ident(2)}}weight2[i].{{gm.pr_range(w_bw)}}=weightPack2.{{gm.pr_range_idx(w_bw,'i')}}; -{%- endcall %} -{%- call gm.forloop(1, 'i', ip_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll factor={{compengine_unroll_factor}} -{{gm.ident(2)}}{{object.name}}_pack_dsp(inter{{ip_loop_factor}}[0][i],inter{{ip_loop_factor}}[1][i],weight1[i],weight2[i],in[i]); -{%- endcall %} -{{gen_red_tree_impl(ip_loop_factor, gm.fixed(internal_bw,internal_bw))}} - out1=inter2[0][0]+inter2[0][1]; - out2=inter2[1][0]+inter2[1][1]; -} - - -void {{object.name}}_layer( - /* - int IW, //image width with padding - int IH, //image height with padding - int CII,//image channels - int OW, //output width - int OH, //output height - int COO, //output channels - int FW, //filter width - int FH, //filter height - int stride, - */ - int right_shift, - {{gm.fi_vdecl(mem_bw, 'weight[100352]')}}, - {{gm.fi_vdecl(mem_bw, 'in[100352]')}}, - {{gm.fi_vdecl(mem_bw, 'out[100352]')}} -){ - #pragma HLS RESOURCE variable=out core=XPM_MEMORY uram - #pragma HLS INTERFACE bram port=in - - {{gm.f_vdecl_arr(internal_bw,internal_bw, 'out_temp', op_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=out_temp - -{{gm.ident(1)}}//Clear initial values of output buffers -{% call gm.forloop(1, 'i', COO*OW*OH*iters_itrm) %} -{{gm.ident(2)}}#pragma HLS pipeline -{{gm.ident(2)}}out[i] = 0; -{% endcall %} -{{gm.ident(1)}}int weight_idx=0; -{{gm.ident(1)}}//First set of loops: Iterate over filter -{% call gm.forloop(1, 'fh', FH) %} -{% call gm.forloop(2, 'fw', FW) %} -{% call gm.forloop(3, 'cii', CII) %} -{% call gm.forloop(4, 'coo', COO) %} -{{gm.ident(5)}}for (int h=0,ih=0;h<{{OH}};h++,ih+={{stride}}) { -{{gm.ident(6)}}for (int w=0,iw=0;w<{{OW}};w++,iw+={{stride}}) { -{{gm.ident(7)}}#pragma HLS pipeline II=2 -{{gm.ident(7)}}{{gm.fi_vdecl(ip_bw*ip_loop_factor,'intemp0')}}; -{{gm.ident(7)}}int address=(cii*{{IW*IH}}+(ih+fh)*{{IW}}+iw+fw)*{{iters_input}}; -{{gm.ident(7)}}readInput_{{object.name}}(in,intemp0,address); -{% call gm.forloop_s(7, 'i', op_loop_factor, 2) %} -{{gm.ident(8)}}{{gm.fi_vdecl(w_bw*ip_loop_factor,'weight1')}}; -{{gm.ident(8)}}{{gm.fi_vdecl(w_bw*ip_loop_factor,'weight2')}}; -{{gm.ident(8)}}int address = weight_idx+i/2*{{iters_weight}}; -{{gm.ident(8)}}{{object.name}}_read_weight(weight, weight1, weight2, address); -{{gm.ident(8)}}#pragma HLS unroll factor={{op_unroll_factor}} -{{gm.ident(8)}}{{object.name}}_compengine_row(out_temp[i],out_temp[i+1],intemp0,weight1,weight2); -{% endcall %} -{{gm.ident(7)}}{{gm.fi_vdecl(int_bw*op_loop_factor,'outPackTemp')}}; -{{gm.ident(7)}}address=(coo*{{OW*OH}}+h*{{OW}}+w)*{{iters_itrm}}; -{{gm.ident(7)}}readItrm_{{object.name}}(out, outPackTemp, address); -{{gm.ident(7)}}{{object.name}}_out_converge(outPackTemp, out_temp, right_shift); -{{gm.ident(7)}}writeItrm_{{object.name}}(out, outPackTemp, address); -{{gm.ident(6)}}} -{{gm.ident(5)}}} -{{gm.ident(5)}}weight_idx += ({{op_loop_factor}}/2)*{{iters_weight}}; -{% endcall %} -{% endcall %} -{% endcall %} -{% endcall %} -} \ No newline at end of file diff --git a/DRED_codegen/templates/convolution.inc b/DRED_codegen/templates/convolution.inc deleted file mode 100644 index c1ddfb760961a66874b973d675bf7eb74b1479a2..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/convolution.inc +++ /dev/null @@ -1,761 +0,0 @@ -{% import 'global_macros.inc' as gm %} -{% import 'aux_funcs.inc' as aux %} - -{%- macro gen_red_tree_vdecl(size, dtype) %} -{%- for level in range(size, 1, -1) %} -{% if (size % level) == 0 %} -{{gm.ident(1)}}{{dtype}} inter{{level}}[2][{{level}}]; -{{gm.ident(1)}}#pragma HLS ARRAY_PARTITION variable=inter{{level}} complete dim=1 -{{gm.ident(1)}}#pragma HLS ARRAY_PARTITION variable=inter{{level}} complete dim=2 -{% endif %} -{%- endfor %} -{%- endmacro %} - -{%- macro gen_red_tree_impl(size, dtype) %} -{%- for level in range(size, 1, -1) %} -{%- if (size % level) == 0 %} -{% if (level != size) %} -{{gm.ident(1)}}for (int i=0; i<{{level}}; i++) { -{{gm.ident(2)}}#pragma HLS unroll -{{gm.ident(2)}}inter{{level}}[0][i]=inter{{level*2}}[0][i*2]+inter{{level*2}}[0][i*2+1]; -{{gm.ident(2)}}inter{{level}}[1][i]=inter{{level*2}}[1][i*2]+inter{{level*2}}[1][i*2+1]; -{{gm.ident(1)}}} -{%- endif %} -{%- endif %} -{%- endfor %} -{%- endmacro %} - -{%- macro handle_preprocess(object, name) -%} - {%- if name in object.keys() %} - {%- for key, value in object.iteritems() %} - {%- if key == name %} - {%- if value > 0 %} - {%- endif %} - {%- endif %} - {%- endfor %} - {%- endif %} -{%- endmacro %} - -{%- macro fdecl(object) -%} -{%- endmacro %} - -{%- macro var_decl(object) -%} -{%- endmacro %} - -{% macro fvalid_call(object) %} -{{gm.ident(1)}}{{object.name}}_test_conv(); -{{gm.ident(1)}}{{object.name}}_test_relu(); -{% endmacro -%} - -{# Function Call #} -{%- macro fcall(object, debug) -%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set int_shift = object.preprocess.int_shift-%} -{%- set w_shift = object.weights.W_shift-%} -{%- set op_shift = object.preprocess.op_shift -%} -{%- set b_shift = object.weights.b_shift-%} -{%- set op_loop_factor = object.op_loop_factor -%} -{%- set ip_loop_factor = object.ip_loop_factor -%} -{%- set right_shift = (ip_shift+w_shift-int_shift) -%} -{%- set right_shift_relu = int_shift - op_shift -%} -{%- set left_shift_relu = int_shift-b_shift-%} -{%- if debug -%} -{{gm.ident(1)}}{{aux.fcall_readW(object)}} -{% endif %} -{# decide the size of output depending on type of padding #} -{{gm.ident(1)}}{{object.name}}_layer( -{%- if object.weights["padding"] == 'VALID' %} -{{object.dshape[0]}}, {{object.dshape[1]}}, {{(object.dshape[2]/ip_loop_factor)|round(0,ceil)|int}}, {{object.dshape[0]-(object.wshape[0]-1)}},{{object.dshape[0]-(object.wshape[0]-1)}}, -{%- else %} -{{object.dshape[0]+(object.wshape[0]-1)}}, {{object.dshape[1]+(object.wshape[0]-1)}}, {{(object.dshape[2]/ip_loop_factor)|round(0,'ceil')|int}}, {{object.dshape[0]}}, {{object.dshape[1]}}, -{%- endif %} - {{(object.wshape[3]/op_loop_factor)|round(0,'ceil')|int}}, {{object.wshape[0]}}, {{object.wshape[1]}}, -{%- if object.weights["stride"] %} {#TODO: give more flexibility? #} - {{object.weights["stride"][1]}}, -{%- else %} - 1, -{%- endif %} - {{right_shift}}, w, -{%- if object.interface.in == 'dram' %} - a, -{%- elif object.interface.in == 'onchip' %} - a_onchip, -{%- endif %} -{%- if object.interface.int == 'dram' %} - c -{%- elif object.interface.int == 'onchip' %} - c_onchip -{%- endif -%} -); -{# if activation function is required #} -{% if object.activation == 'ReLU' %} -{%- if debug -%} -{{aux.fcall_readb(object)}} -{%- endif -%} -{{gm.ident(1)}}{{object.name}}_bias_relu( -{%- if object.weights["padding"] == 'VALID' -%} -0, -{%- else %} - {{object.weights.padsize}}, -{%- endif %} - {{object.dshape[0]}}, {{object.dshape[1]}}, {{(object.wshape[3]/op_loop_factor)|round(0,'ceil')|int}}, {{object.dshape[0]+2*object.weights.padsize}}, {{object.dshape[1]+2*object.weights.padsize}}, {{left_shift_relu}}, {{right_shift_relu}}, w, -{%- if object.interface.out == 'dram' %} -a, -{%- elif object.interface.out == 'onchip' %} -a_onchip, -{% endif %} -{%- if object.interface.int == 'dram' %} -c -{%- elif object.interface.int == 'onchip' %} -c_onchip -{%- endif -%} -); -{%- endif -%} -{%- endmacro -%} - -{# Macro that implement the memory read & unpack function #} -{# TODO: share instances of read function between layers #} -{% macro readFunc(layer, name, mem, bw, loop_factor)%} -{# Number of packs required to read one pack#} -{%- set iters = (bw*loop_factor/mem)|round(0,'ceil')|int -%} -int read{{name}}_{{layer}}({{gm.fixed_int(mem)}} *mem, - {{gm.fixed_int(bw*loop_factor)}} &input, - int address) -{ - int index=0; - //Read requires {{iters}} iterations -{%for idx in range(0,iters)%} -{# if this IS NOT the last pack#} -{%if ((idx+1)*mem < bw*loop_factor) %} - input.{{gm.pr_range_bound((idx+1)*mem-1,idx*mem)}}=mem[address+index].{{gm.pr_range_bound((mem-1),0)}}; - index ++; -{# if this IS the last pack#} -{% else %} - input.{{gm.pr_range_bound(bw*loop_factor-1,idx*mem)}}=mem[address+index].{{gm.pr_range_bound(bw*loop_factor-idx*mem-1,0)}}; -{%endif%} -{%endfor%} - return address; -} -{% endmacro %} - -{# Macro that implement the memory write & pack function #} -{# TODO: share instances of read function between layers #} -{% macro writeFunc(layer, name, mem, bw, loop_factor)%} -{%- set iters = (bw*loop_factor/mem)|round(0,'ceil')|int -%} -int write{{name}}_{{layer}}({{gm.fixed_int(mem)}} *mem, - {{gm.fixed_int(bw*loop_factor)}} output, - int address) -{ - int index=0; - //Write requires {{iters}} iterations -{%for idx in range(0,iters)%} -{# if this IS NOT the last pack#} -{%if ((idx+1)*mem < bw*loop_factor) %} - mem[address+index].{{gm.pr_range_bound((mem-1),0)}} = output.{{gm.pr_range_bound((idx+1)*mem-1,idx*mem)}}; - index ++; -{# if this IS the last pack#} -{% else %} - mem[address+index].{{gm.pr_range_bound(bw*loop_factor-idx*mem-1,0)}}=output.{{gm.pr_range_bound(bw*loop_factor-1,idx*mem)}}; -{%endif%} -{%endfor%} - return address; -} -{% endmacro %} - - -{%- macro fimpl(object) -%} -{# declare a set of internal variables in Jinja2 for convenience#} -{# parameters related to quantization of data #} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set int_bw = object.preprocess.int_bit_width-%} -{%- set int_shift = object.preprocess.int_shift-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set op_shift = object.preprocess.op_shift -%} -{% if object.preprocess.cluster == true %} -{%- set index_width = object.preprocess.index_width -%} -{%- set num_clusters = object.preprocess.num_clusters -%} -{% endif %} -{%- set w_bw = object.weights.W_width-%} -{%- set w_shift = object.weights.W_shift-%} -{%- set b_bw = object.weights.b_width-%} -{%- set b_shift = object.weights.b_shift-%} -{# ip_loop_factor has to be the same as the op_loop_factor of the previous layer #} -{%- set ip_loop_factor = object.ip_loop_factor -%} -{%- set op_loop_factor = object.op_loop_factor -%} -{# mem_bw and onchip_bw can be the same, in that case #} -{%- set mem_bw = object.interface.memory_bit_width -%} -{%- set onchip_bw = object.interface.onchip_bit_width -%} -{# internal bw is the "safe" bit width for the result of multiplication #} -{%- set internal_bw = w_bw+ip_bw -%} -{# iterations of read/write required for datapacks #} -{%- set iters_weight = (w_bw*ip_loop_factor*2/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_bias = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_input = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_output = (op_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_itrm = (int_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} - -{# Implement the memory read & unpack function #} -{# see the overview doc for more detail #} -{% if object.preprocess.cluster == true%} -int {{object.name}}_read_weight({{gm.fixed_int(mem_bw)}} *mem, - {{gm.fixed_int(index_width*ip_loop_factor)}} &weight1, - {{gm.fixed_int(index_width*ip_loop_factor)}} &weight2, - int address) -{ - int index=0; - //Input loop factor is {{ip_loop_factor}} - //Length of pack is {{index_width*ip_loop_factor}} - //Read requires {{iters_weight}} iterations -{%for idx in range(0,iters_weight)%} -{# if this is not the last pack#} -{%if ((idx+1)*(mem_bw/2) < index_width*ip_loop_factor) %} - weight1.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound((mem_bw/2-1)|int,0)}}; - weight2.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(mem_bw-1,(mem_bw/2)|int)}}; - index+=1; -{# if this is the last pack#} -{% else %} - weight1.{{gm.pr_range_bound(index_width*ip_loop_factor-1,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(index_width*ip_loop_factor-1-(idx*mem_bw/2)|int,0)}}; - weight2.{{gm.pr_range_bound(index_width*ip_loop_factor-1,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(index_width*ip_loop_factor-1-((idx-1)*mem_bw/2)|int,(mem_bw/2)|int)}}; -{%endif%} -{%endfor%} - return address; -} -{% else %} -int {{object.name}}_read_weight({{gm.fixed_int(mem_bw)}} *mem, - {{gm.fixed_int(w_bw*ip_loop_factor)}} &weight1, - {{gm.fixed_int(w_bw*ip_loop_factor)}} &weight2, - int address) -{ - int index=0; - //Input loop factor is {{ip_loop_factor}} - //Length of pack is {{w_bw*ip_loop_factor}} - //Read requires {{iters_weight}} iterations -{%for idx in range(0,iters_weight)%} -{# if this is not the last pack#} -{%if ((idx+1)*(mem_bw/2) < w_bw*ip_loop_factor) %} - weight1.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound((mem_bw/2-1)|int,0)}}; - weight2.{{gm.pr_range_bound(((idx+1)*(mem_bw/2)-1)|int,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(mem_bw-1,(mem_bw/2)|int)}}; - index+=1; -{# if this is the last pack#} -{% else %} - weight1.{{gm.pr_range_bound(w_bw*ip_loop_factor-1,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(w_bw*ip_loop_factor-1-(idx*mem_bw/2)|int,0)}}; - weight2.{{gm.pr_range_bound(w_bw*ip_loop_factor-1,(idx*mem_bw/2)|int)}}=mem[address+index].{{gm.pr_range_bound(w_bw*ip_loop_factor-1-((idx-1)*mem_bw/2)|int,(mem_bw/2)|int)}}; -{%endif%} -{%endfor%} - return address; -} -{% endif %} - -{# Declare Read functions for this layer#} -{{readFunc(object.name, "Input", mem_bw, ip_bw, ip_loop_factor)}} -{{readFunc(object.name,"Itrm", mem_bw, int_bw, op_loop_factor)}} -{{readFunc(object.name,"Bias", mem_bw, b_bw, op_loop_factor)}} -{{writeFunc(object.name,"Itrm", mem_bw, int_bw, op_loop_factor)}} -{{writeFunc(object.name,"Output", mem_bw, op_bw, op_loop_factor)}} - - -{% if object.preprocess.cluster == true %} -{{gm.fixed(w_bw, w_bw)}} weightTable({{gm.fui_vdecl(index_width, 'index')}}) -{ -{{gm.ident(1)}}const {{gm.f_vdecl_arr(w_bw, w_bw, 'table', num_clusters)}} = -{{gm.ident(1)}}{ -{%- for weight in object.cluster_values -%} -{# this if is just to avoid that ',' for the last iteration #} -{%- if loop.index == num_clusters -%} -{{weight}} -{% else %} -{{weight}}, -{%- endif -%} -{% endfor %} -{{gm.ident(1)}}}; -{{gm.ident(1)}}return table[index]; -} - -void {{object.name}}_decode_weight({{gm.fi_vdecl_arr(w_bw, 'weightArray', ip_loop_factor)}}, {{gm.fi_vdecl(ip_loop_factor*index_width, 'indexPack')}}) -{ -{{gm.ident(1)}}#pragma HLS ARRAYPARTITION variable = weightArray complete -{% call gm.forloop(1, 'i', ip_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll -{{gm.ident(2)}}ap_uint<{{index_width}}> indexTmp; -{{gm.ident(2)}}indexTmp.{{gm.pr_range(index_width)}}=indexPack.{{gm.pr_range_idx(index_width, 'i')}}; -{{gm.ident(2)}}weightArray[i]=weightTable(indexTmp); -{%- endcall -%} -} -{% endif %} - -{# determine whether multiplication can be packed#} -{%- if ((ip_bw > 8) or (w_bw > 8) or (internal_bw > 16)) -%} -{# currently we only allow 8bit, might expand in the future#} -{# simple compute function when we cannot pack 2 multiplications #} -void {{object.name}}_pack_dsp ( - {{gm.f_vdecl(internal_bw, internal_bw,'&rst1')}}, - {{gm.f_vdecl(internal_bw, internal_bw,'&rst2')}}, - {{gm.fi_vdecl(w_bw, 'a')}}, - {{gm.fi_vdecl(w_bw, 'b')}}, - {{gm.fi_vdecl(ip_bw, 'c')}} -) { - rst1 = a * c; - rst2 = b * c; -} -{%- else -%} -void {{object.name}}_pack_dsp( - {{gm.f_vdecl(internal_bw, internal_bw,'&rst1')}}, - {{gm.f_vdecl(internal_bw, internal_bw,'&rst2')}}, - {{gm.fi_vdecl(w_bw, 'a')}}, - {{gm.fi_vdecl(w_bw, 'b')}}, - {{gm.fi_vdecl(ip_bw, 'c')}} -) { - {{gm.fi_vdecl(18+w_bw, 'ai')}} = 0; - {{gm.fi_vdecl(w_bw, 'bi')}}; - {{gm.fi_vdecl(ip_bw, 'ci')}}; - {{gm.fi_vdecl(45, 'rst')}}; - {{gm.fui_vdecl(19, 'cst')}}; - - #pragma HLS RESOURCE variable=rst core=DSP48 - #pragma HLS pipeline II=1 - ai.{{gm.pr_range(w_bw)}}=a.{{gm.pr_range(w_bw)}}; - bi.{{gm.pr_range(w_bw)}}=b.{{gm.pr_range(w_bw)}}; - ci.{{gm.pr_range(ip_bw)}}=c.{{gm.pr_range(ip_bw)}}; - - if ( (ci!=0) && (bi!=0) && (ci.sign() != bi.sign())) { - cst=0x40000; - } else { - cst=0; - } - rst = ( (ai<<18) + bi) * ci + cst; - rst1.{{gm.pr_range(internal_bw)}}=rst.{{gm.pr_range_bound(18+internal_bw-1,18)}}; - rst2.{{gm.pr_range(internal_bw)}}=rst.{{gm.pr_range(internal_bw)}}; -} -{% endif %} - -void {{object.name}}_out_converge({{gm.fi_vdecl(op_loop_factor*int_bw, '&outpack')}}, {{gm.f_vdecl_arr(internal_bw,internal_bw,'out_temp',op_loop_factor)}}, int right_shift) { - #pragma HLS pipeline - #pragma HLS ARRAY_PARTITION variable=out_temp complete dim=1 - {{gm.f_vdecl_arr(int_bw, int_bw,'temp_1',op_loop_factor)}}; - {{gm.f_vdecl_arr(internal_bw, internal_bw,'temp_2',op_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=temp_1 complete dim=1 - #pragma HLS ARRAY_PARTITION variable=temp_2 complete dim=1 -{% call gm.forloop(1,'i',op_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll factor=64 -{{gm.ident(2)}}temp_1[i].{{gm.pr_range(int_bw)}}=outpack.{{gm.pr_range_idx(int_bw, 'i')}}; -{{gm.ident(2)}}temp_2[i].{{gm.pr_range(internal_bw)}}=out_temp[i].{{gm.pr_range(internal_bw)}}; -{{gm.ident(2)}}temp_1[i]=temp_1[i] + (temp_2[i] >> right_shift); -{%- endcall -%} -{% call gm.forloop(1,'i',op_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll factor=64 -{{gm.ident(2)}}outpack.{{gm.pr_range_idx(int_bw, 'i')}}=temp_1[i].{{gm.pr_range(int_bw)}}; -{%- endcall -%} -} - -void {{object.name}}_compengine_row( - {{gm.f_vdecl(internal_bw,internal_bw,'&out1')}}, - {{gm.f_vdecl(internal_bw,internal_bw,'&out2')}}, - {{gm.fi_vdecl(ip_loop_factor*ip_bw, 'InPack')}}, - {% if object.preprocess.cluster == true -%} - {{gm.fi_vdecl(index_width*ip_loop_factor, 'weightPack1')}}, - {{gm.fi_vdecl(index_width*ip_loop_factor, 'weightPack2')}} - {%- else -%} - {{gm.fi_vdecl(w_bw*ip_loop_factor, 'weightPack1')}}, - {{gm.fi_vdecl(w_bw*ip_loop_factor, 'weightPack2')}} - {%- endif -%} -) { - {% if object.preprocess.cluster == true %} - {{gm.fi_vdecl_arr(w_bw, 'weight1', ip_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=weight1 complete dim=1 - {{gm.fi_vdecl_arr(w_bw, 'weight2', ip_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=weight2 complete dim=1 - {% else %} - {{gm.fi_vdecl_arr(index_width, 'weight1', ip_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=weight1 complete dim=1 - {{gm.fi_vdecl_arr(index_width, 'weight2', ip_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=weight2 complete dim=1 - {% endif %} - {{gm.fi_vdecl_arr(ip_bw, 'in', ip_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=in complete dim=1 - {{gen_red_tree_vdecl(ip_loop_factor, gm.fixed(internal_bw,internal_bw))}} - #pragma HLS pipeline II=2 -{% call gm.forloop(1, 'i', ip_loop_factor) %} -{{gm.ident(2)}}in[i].{{gm.pr_range(ip_bw)}}=InPack.{{gm.pr_range_idx(ip_bw,'i')}}; -{# if cluster is disabled #} -{% if object.preprocess.cluster == false %} -{{gm.ident(2)}}weight1[i].{{gm.pr_range(w_bw)}}=weightPack1.{{gm.pr_range_idx(w_bw,'i')}}; -{{gm.ident(2)}}weight2[i].{{gm.pr_range(w_bw)}}=weightPack2.{{gm.pr_range_idx(w_bw,'i')}}; -{% endif %} -{%- endcall %} -{# if cluster is enabled #} -{%- if object.preprocess.cluster == true -%} -{{gm.ident(1)}}//Decode clustered input -{{gm.ident(1)}}{{object.name}}_decode_weight(weight1, weightPack1); -{{gm.ident(1)}}{{object.name}}_decode_weight(weight2, weightPack2); -{% endif %} -{%- call gm.forloop(1, 'i', ip_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll -{{gm.ident(2)}}{{object.name}}_pack_dsp(inter{{ip_loop_factor}}[0][i],inter{{ip_loop_factor}}[1][i],weight1[i],weight2[i],in[i]); -{%- endcall %} -{{gen_red_tree_impl(ip_loop_factor, gm.fixed(internal_bw,internal_bw))}} - out1=inter2[0][0]+inter2[0][1]; - out2=inter2[1][0]+inter2[1][1]; -} - -{{gm.fixed_int(op_bw*op_loop_factor)}} {{object.name}}_bias_relu( - {{gm.f_vdecl_arr(int_bw, int_bw,'out1',op_loop_factor)}}, - {{gm.fi_vdecl(b_bw*op_loop_factor, 'bias')}}, - int left_shift, - int right_shift -){ - {{gm.f_vdecl_arr(op_bw, op_bw, 'in1', op_loop_factor)}}; - {{gm.fi_vdecl(op_bw*op_loop_factor, 'temp')}}; - #pragma HLS inline off - #pragma HLS array_partition variable=out1 dim=1 -{% call gm.forloop(1, 'j', op_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll -{{gm.ident(2)}}{{gm.f_vdecl(b_bw, b_bw ,'btemp')}}; -{{gm.ident(2)}}{{gm.f_vdecl(int_bw, int_bw,'otemp')}}; -{{gm.ident(2)}}btemp.{{gm.pr_range(b_bw)}} = bias.{{gm.pr_range_idx(b_bw,'j')}}; -{{gm.ident(2)}}otemp=(out1[j] + (({{gm.fixed(int_bw, int_bw)}})btemp << left_shift))>>right_shift; -{{gm.ident(2)}}if(otemp>0) -{{gm.ident(3)}}in1[j]=otemp; -{{gm.ident(2)}}else -{{gm.ident(3)}}in1[j]=0.0; -{{gm.ident(2)}}temp.{{gm.pr_range_idx(op_bw,'j')}}=in1[j].{{gm.pr_range(op_bw)}}; -{%- endcall -%} - return temp; -} - -void {{object.name}}_layer( - int IW, //image width with padding - int IH, //image height with padding - int CII,//image channels - int OW, //output width - int OH, //output height - int COO, //output channels - int FW, //filter width - int FH, //filter height - int stride, - int right_shift, - {{gm.fi_vdecl(mem_bw, '*weight')}}, - {% if object.interface.in == 'dram' %} - {{gm.fi_vdecl(mem_bw, '*in')}}, - {% elif object.interface.in == 'onchip' %} - {{gm.fi_vdecl_arr(onchip_bw, 'in', 50176)}}, - {% endif %} - {% if object.interface.int == 'dram' %} - {{gm.fi_vdecl(mem_bw, '*out')}} - {% elif object.interface.int == 'onchip' %} - {{gm.fi_vdecl_arr(onchip_bw, 'out', 50176)}} - {% endif %} -){ - #pragma HLS RESOURCE variable=out core=XPM_MEMORY uram - #pragma HLS INTERFACE bram port=in - - {{gm.f_vdecl_arr(internal_bw,internal_bw, 'out_temp', op_loop_factor)}}; - #pragma HLS ARRAY_PARTITION variable=out_temp - -{{gm.ident(1)}}//Clear initial values of output buffers -{% if (object.interface.int == 'dram'): %} -{% call gm.forloop(1, 'i', 'COO*OW*OH*%d'|format(iters_itrm)) %} -{{gm.ident(2)}}#pragma HLS pipeline -{{gm.ident(2)}}out[i] = 0; -{% endcall %} -{% elif (object.interface.int == 'onchip'): %} -{% call gm.forloop(1, 'i', 'COO*OW*OH*%d'|format(iters_itrm)) %} -{{gm.ident(2)}}#pragma HLS pipeline -{{gm.ident(2)}}out[i] = 0; -{% endcall %} -{% endif %} -{{gm.ident(1)}}int weight_idx=0; -{{gm.ident(1)}}//First set of loops: Iterate over filter -{% call gm.forloop(1, 'fh', 'FH') %} -{% call gm.forloop(2, 'fw', 'FW') %} -{% call gm.forloop(3, 'cii', 'CII') %} -{% call gm.forloop(4, 'coo', 'COO') %} -{{gm.ident(5)}}#pragma HLS dataflow -{{gm.ident(5)}}for (int h=0,ih=0;h<OH;h++,ih+=stride) { -{{gm.ident(6)}}for (int w=0,iw=0;w<OW;w++,iw+=stride) { -{{gm.ident(7)}}#pragma HLS pipeline II=2 -{{gm.ident(7)}}{{gm.fi_vdecl(ip_bw*ip_loop_factor,'intemp0')}}; -{% if object.interface.in == 'dram' %} -{{gm.ident(7)}}int address=(cii*IW*IH+(ih+fh)*IW+iw+fw)*{{iters_input}}; -{{gm.ident(7)}}readInput_{{object.name}}(in,intemp0,address); -{% elif object.interface.in == 'onchip' %} -{{gm.ident(7)}}int address=(cii*IW*IH+(ih+fh)*IW+iw+fw)*{{iters_input}}; -{{gm.ident(7)}}readInput_{{object.name}}(in,intemp0,address); -{% endif %} -{% call gm.forloop_s(7, 'i', op_loop_factor, 2) %} -{% if object.preprocess.cluster == true %} -{{gm.ident(8)}}{{gm.fi_vdecl(index_width*ip_loop_factor,'weight1')}}; -{{gm.ident(8)}}{{gm.fi_vdecl(index_width*ip_loop_factor,'weight2')}}; -{% else %} -{{gm.ident(8)}}{{gm.fi_vdecl(w_bw*ip_loop_factor,'weight1')}}; -{{gm.ident(8)}}{{gm.fi_vdecl(w_bw*ip_loop_factor,'weight2')}}; -{% endif%} -{{gm.ident(8)}}int address = weight_idx+i/2*{{iters_weight}}; -{{gm.ident(8)}}{{object.name}}_read_weight(weight, weight1, weight2, address); -{{gm.ident(8)}}#pragma HLS unroll factor=32 -{{gm.ident(8)}}{{object.name}}_compengine_row(out_temp[i],out_temp[i+1],intemp0,weight1,weight2); -{% endcall %} -{{gm.ident(7)}}{{gm.fi_vdecl(int_bw*op_loop_factor,'outPackTemp')}}; -{% if object.interface.int == 'dram' %} -{{gm.ident(7)}}address=(coo*OW*OH+h*OW+w)*{{iters_itrm}}; -{{gm.ident(7)}}readItrm_{{object.name}}(out, outPackTemp, address); -{{gm.ident(7)}}{{object.name}}_out_converge(outPackTemp, out_temp, right_shift); -{{gm.ident(7)}}writeItrm_{{object.name}}(out, outPackTemp, address); -{% elif object.interface.int == 'onchip' %} -{{gm.ident(7)}}address=(coo*OW*OH+h*OW+w)*{{iters_itrm}}; -{{gm.ident(7)}}readItrm_{{object.name}}(out, outPackTemp, address); -{{gm.ident(7)}}{{object.name}}_out_converge(outPackTemp, out_temp, right_shift); -{{gm.ident(7)}}writeItrm_{{object.name}}(out, outPackTemp, address); -{% endif %} -{{gm.ident(6)}}} -{{gm.ident(5)}}} -{{gm.ident(5)}}weight_idx += ({{op_loop_factor}}/2)*{{iters_weight}}; -{% endcall %} -{% endcall %} -{% endcall %} -{% endcall %} -} - -void {{object.name}}_bias_relu( - int padsize, - int OH, //size of output of conv_layer - int OW, //size of output of conv_layer - int COO, //channels - int IH, //size of output of relu layer - int IW, //size of output of relu layer - int left_shift, - int right_shift, - {{gm.fi_vdecl(mem_bw, '*weight')}}, - {% if object.interface.out == 'dram' %} - {{gm.fi_vdecl(mem_bw, '*in')}}, - {% elif object.interface.out == 'onchip' %} - {{gm.fi_vdecl_arr(onchip_bw, 'in', 50176)}}, - {% endif %} - {% if object.interface.int == 'dram' %} - {{gm.fi_vdecl(mem_bw, '*out')}} - {% elif object.interface.int == 'onchip' %} - {{gm.fi_vdecl_arr(onchip_bw, 'out', 50176)}} - {% endif %} -) { - {{gm.fi_vdecl(op_loop_factor*int_bw, 'temp')}}; - {{gm.fi_vdecl(op_loop_factor*b_bw, 'bias')}}; - int weight_idx = 0; - int address = 0; - {{gm.fi_vdecl(op_loop_factor*op_bw,"outPackTemp")}}; -{% call gm.forloop(1,'c', 'COO') %} -{{gm.ident(2)}}readBias_{{object.name}}(weight, bias, weight_idx*{{iters_bias}}); -{{gm.ident(2)}}weight_idx ++; -{% call gm.forloop(2, 'i', 'padsize*IW') %} -{{gm.ident(3)}}#pragma HLS pipeline -{% if object.interface.out == 'dram' %} -{{gm.ident(3)}}address = (c*IH*IW+i)*{{iters_output}}; -{{gm.ident(3)}}writeOutput_{{object.name}}(in, 0, address); -{% elif object.interface.out == 'onchip' %} -{{gm.ident(3)}}address = (c*IH*IW+i)*{{iters_output}}; -{{gm.ident(3)}}writeOutput_{{object.name}}(in, 0, address); -{% endif %} -{% endcall %} -{% call gm.forloop(2, 'h', 'OH') %} -{{gm.ident(3)}}// pad zeros in front of each line -{% call gm.forloop(3, 'i', 'padsize') %} -{{gm.ident(4)}}#pragma HLS pipeline -{% if object.interface.out == 'dram' %} -{{gm.ident(4)}}address = (c*IH*IW+(padsize+h)*IW+i)*{{iters_output}}; -{{gm.ident(4)}}writeOutput_{{object.name}}(in, 0, address); -{% elif object.interface.out == 'onchip' %} -{{gm.ident(4)}}address = (c*IH*IW+(padsize+h)*IW+i)*{{iters_output}}; -{{gm.ident(4)}}writeOutput_{{object.name}}(in, 0, address); -{% endif %} -{% endcall %} -{{gm.ident(3)}}// perform relu & bias -{% call gm.forloop(3, 'w', 'OW') %} -{{gm.ident(4)}}#pragma HLS pipeline -{{gm.ident(4)}}{{gm.f_vdecl_arr(int_bw, int_bw, 'out_temp', op_loop_factor)}}; -{{gm.ident(4)}}#pragma HLS array_partition variable=out_temp -{% if object.interface.int == 'dram' %} -{{gm.ident(4)}}address = (c*OW*OH+h*OW+w)*{{iters_itrm}}; -{{gm.ident(4)}}readItrm_{{object.name}}(out, temp, address); -{% elif object.interface.int == 'onchip' %} -{{gm.ident(4)}}address = (c*OW*OH+h*OW+w)*{{iters_itrm}}; -{{gm.ident(4)}}readItrm_{{object.name}}(out, temp, address); -{% endif %} -{% call gm.forloop(4,'j',op_loop_factor) %} -{{gm.ident(5)}}#pragma HLS unroll -{{gm.ident(5)}}out_temp[j].{{gm.pr_range(int_bw)}}=temp.{{gm.pr_range_idx(int_bw,'j')}}; -{% endcall %} -{% if object.interface.out == 'dram' %} -{{gm.ident(4)}}address = (c*IH*IW+(padsize+h)*IW+w+padsize)*{{iters_output}}; -{{gm.ident(4)}}outPackTemp = {{object.name}}_bias_relu(out_temp,bias,left_shift,right_shift); -{{gm.ident(4)}}writeOutput_{{object.name}}(in, outPackTemp, address); -{% elif object.interface.out == 'onchip' %} -{{gm.ident(4)}}address = (c*IH*IW+(padsize+h)*IW+w+padsize)*{{iters_output}}; -{{gm.ident(4)}}outPackTemp = {{object.name}}_bias_relu(out_temp,bias,left_shift,right_shift); -{{gm.ident(4)}}writeOutput_{{object.name}}(in, outPackTemp, address); -{% endif %} -{% endcall %} -{{gm.ident(3)}}// pad zeros at end of each line -{% call gm.forloop(3, 'i', 'padsize') %} -{{gm.ident(4)}}#pragma HLS pipeline -{% if object.interface.out == 'dram' %} -{{gm.ident(4)}}address = (c*IH*IW+(padsize+h)*IW+OW+padsize+i)*{{iters_output}}; -{{gm.ident(4)}}writeOutput_{{object.name}}(in, 0, address); -{% elif object.interface.out == 'onchip' %} -{{gm.ident(4)}}address = (c*IH*IW+(padsize+h)*IW+OW+padsize+i)*{{iters_output}}; -{{gm.ident(4)}}writeOutput_{{object.name}}(in, 0, address); -{% endif %} -{% endcall %} -{% endcall %} -{{gm.ident(2)}}// pad trailing zero rows -{% call gm.forloop(2, 'i', 'padsize*IW') %} -{{gm.ident(3)}}#pragma HLS pipeline -{% if object.interface.out == 'dram' %} -{{gm.ident(3)}}address = (c*IH*IW+(padsize+OH)*IW+i); -{{gm.ident(3)}}writeOutput_{{object.name}}(in, 0, address); -{% elif object.interface.out == 'onchip' %} -{{gm.ident(3)}}address = (c*IH*IW+(padsize+OH)*IW+i); -{{gm.ident(3)}}writeOutput_{{object.name}}(in, 0, address); -{% endif %} -{% endcall %} -{% endcall %} -} -{%- endmacro %} - -{%- macro fvalid(object) -%} -{%- set H = object.dshape[0] -%} -{%- set W = object.dshape[1] -%} -{%- set FH = object.wshape[0] -%} -{%- set FW = object.wshape[1] -%} -{%- set IC = object.wshape[2] -%} -{%- set OC = object.wshape[3] -%} -{%- set padsize = ((FH-1)/2)|int -%} -{%- set IH = H+2*padsize -%} -{%- set IW = W+2*padsize -%} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set int_bw = object.preprocess.int_bit_width-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set w_bw = object.weights.W_width-%} -{%- set b_bw = object.weights.b_width-%} -{%- set ip_loop_factor = object.ip_loop_factor -%} -{%- set op_loop_factor = object.op_loop_factor -%} -{%- set mem_bw = object.interface.memory_bit_width -%} -{%- set onchip_bw = object.interface.onchip_bit_width -%} -{%- set iters_weight = (w_bw*ip_loop_factor*2/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_bias = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_input = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_output = (op_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_itrm = (int_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} -void {{object.name}}_test_conv() { - srand(17); - //test framework - int input_buf[{{IH}}*{{IW}}*{{IC}}]; - int weight_buf[{{FH}}*{{FW}}*{{IC}}*{{OC}}]; - int output_buf[{{H}}*{{W}}*{{OC}}] = {0}; -{% call gm.forloop(1, 'i', IC) %} -{% call gm.forloop(2, 'j', IH) %} -{% call gm.forloop(3, 'k', IW) %} -{{gm.ident(4)}}if(!(k < {{padsize}} || k >= {{W+padsize}} || j < {{padsize}} || j >= {{H+padsize}})){ -{{gm.ident(5)}}input_buf[i*{{IH}}*{{IW}}+j*{{IW}}+k] = rand()%2; -{{gm.ident(4)}}} else { -{{gm.ident(5)}}input_buf[i*{{IH}}*{{IW}}+j*{{IW}}+k] = 0; -{{gm.ident(4)}}} -{% endcall %} -{% endcall %} -{% endcall %} - -{% call gm.forloop(1, 'i', (IC/ip_loop_factor)|round(0,'ceil')|int) %} -{% call gm.forloop(2, 'j', IH) %} -{% call gm.forloop(3, 'k', IW) %} -{{gm.ident(4)}}{{gm.fi_vdecl(512, 'temp_a')}} = 0; -{% call gm.forloop(4, 'ii', ip_loop_factor) %} -{{gm.ident(5)}}if(ii*{{ip_loop_factor}}+i < {{IC}}){ -{{gm.ident(6)}}temp_a.{{gm.pr_range_idx(w_bw,'ii')}} = input_buf[(i*{{ip_loop_factor}}+ii)*{{IH}}*{{IW}}+j*{{IW}}+k]; -{{gm.ident(5)}}} else { -{{gm.ident(6)}}temp_a.{{gm.pr_range_idx(w_bw,'ii')}} = 0; -{{gm.ident(5)}}} -{% endcall %} -{{gm.ident(4)}}a[i*{{IH}}*{{IW}}+j*{{IW}}+k]=temp_a; -{% endcall %} -{% endcall %} -{% endcall %} - -{% call gm.forloop(1, 'i', FH*FW*IC*OC) %} -{{gm.ident(2)}}weight_buf[i] = rand()%2; -{% endcall %} -pack_weights<int>({{FH}}, {{FW}}, {{IC}}, {{OC}}, weight_buf, w); - -{% call gm.forloop(1, 'i', 50176) %} -{{gm.ident(2)}}c[i] = 0; -{% endcall %} - -{{gm.ident(1)}}conv_2d(output_buf, weight_buf, input_buf, {{FH}}, {{FW}}, {{IH}}, {{IW}}, {{IC}}, {{H}}, {{W}}, {{OC}}, 1); -{{gm.ident(1)}}{{object.name}}_layer( {{IW}}, {{IH}}, {{(IC/ip_loop_factor)|round(0,'ceil')|int}} , {{W}}, {{H}}, {{(OC/op_loop_factor)|round(0,'ceil')|int}}, {{FW}}, {{FH}}, 1, 0, w, a, c); -{% call gm.forloop(1, 'i', 1) %} -{% call gm.forloop(2, 'ii', op_loop_factor) %} -{% call gm.forloop(3, 'j', H) %} -{% call gm.forloop(4, 'k', W) %} -{{gm.ident(5)}}{{gm.fi_vdecl(512, 'temp_c')}} = c[(i*{{H}}*{{W}}+j*{{W}}+k)*{{iters_itrm}}]; -{{gm.ident(5)}}{{gm.fi_vdecl(16, 'out_layer')}} = temp_c.{{gm.pr_range_idx(16,'ii')}}; -{{gm.ident(5)}}int out_golden = output_buf[(i*{{op_loop_factor}}+ii)*{{H}}*{{W}}+j*{{W}}+k]; -{{gm.ident(5)}}if (out_layer != out_golden) { -{{gm.ident(5)}}cout << "[" << (i*{{op_loop_factor}}+ii) << "][" << j << "][" << k << "]"; -{{gm.ident(5)}}cout << "Diff: Layer - " << out_layer << "Golden - " << out_golden << endl; -{{gm.ident(5)}}} -{%- endcall -%} -{{gm.ident(3)}}//cout << endl; -{%- endcall -%} -{{gm.ident(2)}}//cout << endl; -{%- endcall -%} -{%- endcall -%} -} - - -void {{object.name}}_test_relu() { - //test framework - int a_buf[D1][D2][D3]; - const int padsize = {{((object.wshape[0]-1)//2)}}; //for 3x3 kernel -{% call gm.forloop(1, 'i', 'D1') %} -{% call gm.forloop(2, 'j', 'D2') %} -{% call gm.forloop(3, 'k', 'D3') %} -{{gm.ident(4)}}a_buf[i][j][k] = (i+j-k); -{% endcall %} -{% endcall %} -{% endcall %} - -{% call gm.forloop(1, 'i', 'D1') %} -{% call gm.forloop(2, 'j', 'D2') %} -{% call gm.forloop(3, 'k', 'D3/64') %} -{{gm.ident(4)}}{{gm.fi_vdecl(1024, 'temp_c')}} = 0; -{% call gm.forloop(4, 'ii', 64) %} -{{gm.ident(5)}}temp_c.{{gm.pr_range_idx(16,'ii')}} = a_buf[i][j][k*64+ii]; -{% endcall %} -{{gm.ident(4)}}c[k*(D1*D2)+(j)*D1+i]=temp_c; -{% endcall %} -{% endcall %} -{% endcall %} - {{gm.fi_vdecl(512, 'tmp')}}; -{% call gm.forloop(1, 'i', 64) %} -{{gm.ident(2)}}tmp.{{gm.pr_range_idx(8, 'i')}} = 1; -{% endcall %} -{% call gm.forloop(1, 'i', 'F*F*D4') %} -{{gm.ident(2)}}w[i] = tmp; -{% endcall %} -{% call gm.forloop(1, 'i', 50176) %} -{{gm.ident(2)}}a[i] = 0; -{% endcall %} - //{{object.name}}_bias_relu(padsize, D1, D2, D4/64, D1+2*padsize, D2+2*padsize, w, a, c); - -{% call gm.forloop(1, 'i', 'D1+padsize*2') %} -{% call gm.forloop(2, 'j', 'D2+padsize*2') %} -{% call gm.forloop(3, 'k', 'D4/64') %} -{{gm.ident(4)}}{{gm.fi_vdecl(512, 'temp_a')}} = a[k*(D1+2*padsize)*(D2+2*padsize)+j*(D1+2*padsize)+i]; -{{gm.ident(4)}}cout << "[" << i << "][" << j << "][" << k << "]"; -{% call gm.forloop(4, 'ii', 64) %} -{{gm.ident(5)}}{{gm.fi_vdecl(8, 't')}} = temp_a.{{gm.pr_range_idx(8,'ii')}}; -{{gm.ident(5)}}cout << t << ","; -{% endcall %} -{{gm.ident(4)}}cout << endl; -{% endcall %} -{% endcall %} -{% endcall %} -} -{%- endmacro %} \ No newline at end of file diff --git a/DRED_codegen/templates/fc.inc b/DRED_codegen/templates/fc.inc deleted file mode 100644 index b294b2f678025f87e04b5fa0e3870a026af6b9bb..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/fc.inc +++ /dev/null @@ -1,781 +0,0 @@ -{# fc.inc #} -{# Jinja2 template for HLS synthesizable C++ code #} -{# Inspirit IoT, Inc. 2018 #} -{# Contributers: #} -{# Mang Yu #} -{# Kyle Rupnow #} -{# Xinheng Liu #} - -{% import "global_macros.inc" as gm %} -{% set ident = gm.ident %} -{% import 'aux_funcs.inc' as aux %} - -{# Description: generate intermediate buffer for adder tree #} -{# ------ #} -{# Params: #} -{# size -- number of items for the input to adder tree #} -{# dtype -- datatype of the input #} -{%- macro gen_red_tree_vdecl(size, dtype) %} -{%- for level in range(size, 1, -1) %} -{% if (size % level) == 0 %} -{{gm.ident(1)}}{{dtype}} inter{{level}}[2][{{level}}]; -{{gm.ident(1)}}#pragma HLS ARRAY_PARTITION variable=inter{{level}} complete dim=1 -{{gm.ident(1)}}#pragma HLS ARRAY_PARTITION variable=inter{{level}} complete dim=2 -{% endif %} -{%- endfor %} -{%- endmacro %} - -{# Description: generate cascaded adding for adder tree #} -{# ------ #} -{# Params: #} -{# size -- number of items for the input to adder tree #} -{# dtype -- datatype of the input #} -{%- macro gen_red_tree_impl(size, dtype) %} -{%- for level in range(size, 1, -1) %} -{%- if (size % level) == 0 %} -{% if (level != size) %} -{{gm.ident(1)}}for (int i=0; i<{{level}}; i++) { -{{gm.ident(2)}}#pragma HLS unroll -{{gm.ident(2)}}inter{{level}}[0][i]=inter{{level*2}}[0][i*2]+inter{{level*2}}[0][i*2+1]; -{{gm.ident(2)}}inter{{level}}[1][i]=inter{{level*2}}[1][i*2]+inter{{level*2}}[1][i*2+1]; -{{gm.ident(1)}}} -{%- endif %} -{%- endif %} -{%- endfor %} -{%- endmacro %} - -{# Description: Handle preprocess -- NOT USED #} -{# ------ #} -{# Params: #} -{%- macro handle_preprocess(object, name) -%} - {%- if name in object.keys() %} - {%- for key, value in object.iteritems() %} - {%- if key == name %} - {%- if value > 0 %} - {%- endif %} - {%- endif %} - {%- endfor %} - {%- endif %} -{%- endmacro %} - -{# Description: declare the layer functions--NOT USED #} -{# ------ #} -{# Params: #} -{%- macro fdecl(object) -%} -{%- endmacro %} - -{# Description: declare global variabels -- NOT USED #} -{# ------ #} -{# Params: #} -{%- macro var_decl(object) -%} -{%- endmacro %} - -{# Description: call validation function #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{% macro fvalid_call(object) %} -{{gm.ident(1)}}{{object.name}}_test_fc(); -{% if object.activation == 'ReLU' %} -{{gm.ident(1)}}{{object.name}}_test_relu(); -{% endif %} -{% endmacro %} - -{# Description: call layer and activation functions #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{# flags -- flags for debugging, verifcation mode, etc. #} -{%- macro fcall(object, prev, flags) -%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set int_shift = object.preprocess.int_shift-%} -{%- set w_shift = object.weights.W_shift-%} -{%- set op_shift = object.preprocess.op_shift -%} -{%- set b_shift = object.weights.b_shift-%} -{%- set op_loop_factor = object.op_loop_factor -%} -{%- set ip_loop_factor = object.ip_loop_factor -%} -{%- set right_shift = (ip_shift+w_shift-int_shift) -%} -{%- set right_shift_relu = int_shift - op_shift -%} -{%- set left_shift_relu = int_shift - b_shift-%} -{# read weights from file and put into memory if in verification mode #} -{%- if flags.verify -%} -//previous layer is {{prev.name}} -{{gm.ident(1)}}{{aux.fcall_readW_fc(object, prev)}} -{% endif %} -{{gm.ident(1)}}{{object.name}}_layer({{(object.dshape[0]/ip_loop_factor)|round(0,'ceil')|int}}, {{(object.wshape[1]/op_loop_factor)|round(0,'ceil')|int}}, {{right_shift}}, w, -{# placeholder, in case we need to be able to switch between offchip memory and onchip memory #} -{%- if object.interface.in == 'dram' %} - a, -{%- elif object.interface.in == 'onchip' %} - a_onchip, -{%- endif %} -{%- if object.interface.int == 'dram' %} - c -{%- elif object.interface.int == 'onchip' %} - c_onchip -{%- endif %} -); -{# if activation function is required instantiate accordingly #} -{% if object.activation == 'ReLU' %} -{# read bias as needed #} -{%- if flags.verify -%} -{{gm.ident(1)}}{{aux.fcall_readb(object)}} -{% endif %} -{{gm.ident(1)}}{{object.name}}_bias_relu({{(object.wshape[1]/op_loop_factor)|round(0,'ceil')|int}}, {{left_shift_relu}}, {{right_shift_relu}}, w, -{%- if object.interface.in == 'dram' %} - c, -{%- elif object.interface.in == 'onchip' %} - c_onchip, -{%- endif %} -{%- if object.interface.int == 'dram' %} - a -{%- elif object.interface.int == 'onchip' %} - a_onchip -{%- endif %} -); -{% endif %} -{%- endmacro %} - -{# Description: implement the memory read & unpack function #} -{# ------ #} -{# Params: #} -{# layer -- name of the layer #} -{# name -- name of the buffer to read: output/itrm,etc. #} -{# mem_bw -- bit width of a single memory line #} -{# bw -- bit width of the data we want to read #} -{# loop_factor -- loop_factor, also the pack size #} -{# TODO: share instances of read function between layers #} -{% macro readFunc(layer, name, mem_bw, bw, loop_factor)%} -void {{layer}}_read_{{name}}( - {{gm.Xfui(mem_bw)}} *mem, - {{gm.Xfixed_arr(bw-1, 0, 'inputToLayer', loop_factor)}}, - int address) { -{% if mem_bw >= (loop_factor*bw) %} -{% call gm.forloop(1, 'i', loop_factor) %} -{%- if bw == 8-%} -{{gm.ident(2)}}inputToLayer[i]={{gm.Xfixed(bw-1,0)}}::createRaw(mem[address].getChar(i)); -{%- elif bw == 16 -%} -{{gm.ident(2)}}inputToLayer[i]={{gm.Xfixed(bw-1,0)}}::createRaw(mem[address].getShort(i)); -{%- endif -%} -{% endcall %} -{%- else -%} -{% set mem_num = (mem_bw/bw)|int %} -{% call gm.forloop_s(1, 'i', loop_factor, mem_num) %} -{% call gm.forloop(2, 'j', mem_num) %} -{%- if bw == 8-%} -{{gm.ident(3)}}inputToLayer[i+j]={{gm.Xfixed(bw-1,0)}}::createRaw(mem[address].getChar(i)); -{%- elif bw == 16 -%} -{{gm.ident(3)}}inputToLayer[i+j]={{gm.Xfixed(bw-1,0)}}::createRaw(mem[address].getShort(i)); -{%- endif -%} -{%- endcall -%} -{%- endcall -%} -{%- endif -%} -} -{% endmacro %} - -{# Description: implement the memory write & pack function #} -{# ------ #} -{# Params: #} -{# layer -- name of the layer #} -{# name -- name of the buffer to write: output/itrm,etc.#} -{# mem_bw -- bit width of a single memory line #} -{# bw -- bit width of the data we want to read #} -{# loop_factor -- loop_factor, also the pack size #} -{# TODO: share instances of read function between layers #} -{% macro writeFunc(layer, name, mem_bw, bw, loop_factor)%} -void {{layer}}_write_{{name}}( - {{gm.Xfui(mem_bw)}} *mem, - {{gm.Xfixed_arr(bw-1, 0, 'outputFromLayer', loop_factor)}}, - int address) { -{% if mem_bw >= (loop_factor*bw) %} -{% call gm.forloop(1, 'i', loop_factor) %} -{% if bw == 16 %} -{{gm.ident(2)}}mem[address].setValue(i, outputFromLayer[i].getRaw()); -{% elif bw == 8 %} -{{gm.ident(2)}}mem[address].setValue(i, (char)outputFromLayer[i].getRaw()); -{% endif %} -{% endcall %} -{%- else -%} -{% set mem_num = (mem_bw/bw)|int %} -{% call gm.forloop_s(1, 'i', loop_factor, mem_num) %} -{% call gm.forloop(2, 'j', mem_num) %} -{% if bw == 16 %} -{{gm.ident(3)}}mem[address].setValue(j, outputFromLayer[i+j].getRaw()); -{% elif bw == 8 %} -{{gm.ident(3)}}mem[address].setValue(j, (char)outputFromLayer[i+j].getRaw()); -{% endif %} -{%- endcall -%} -{%- endcall -%} -{%- endif -%} -} -{% endmacro %} - -{# Description: implement the layer functions #} -{# #} -{# ------ #} -{# Params: #} -{# object: layer object that contains all the info #} -{%- macro fimpl(object) -%} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set int_bw = object.preprocess.int_bit_width-%} -{%- set int_shift = object.preprocess.int_shift-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set op_shift = object.preprocess.op_shift -%} -{# parameters for clustering to define if clustering is enabled #} -{% if object.preprocess.cluster == true %} -{%- set index_width = object.preprocess.index_width -%} -{%- set num_clusters = object.preprocess.num_clusters -%} -{% endif %} -{%- set w_bw = object.weights.W_width-%} -{%- set w_shift = object.weights.W_shift-%} -{%- set b_bw = object.weights.b_width-%} -{%- set b_shift = object.weights.b_shift-%} -{%- set ip_loop_factor = object.ip_loop_factor -%} -{%- set op_loop_factor = object.op_loop_factor -%} -{%- set mem_bw = object.interface.memory_bit_width -%} -{# internal bw is the "safe" bit width for the result of multiplication #} -{%- set internal_bw = w_bw+ip_bw -%} -{%- set iters_weight = (w_bw*ip_loop_factor*2/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_bias = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_input = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_output = (op_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_itrm = (int_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} - -{# Implement the memory read & unpack function #} -{# see the overview doc for more detail #} -/* -Name: {{object.name}}_read_weight -Description: read from memory and write to the two weight buffers. - each of the buffers holds one 'pack' of 'loop_factor' - numbers of data. Notice that we read two packs in at - the same time because the compengine will consume - two weight packs in one iteration. When clustering is - enabled, the input bit-width will be replaced by the - bit-width of the indices. -Params: - mem -- the memory interface to read from - weight1 -- weight pack 1, to be passed to the compengine - weight2 -- weight pack 2, to be passed to the compengine -*/ -{% if object.preprocess.cluster == true%} -void {{object.name}}_read_weight( - {{gm.Xfui(mem_bw)}} *mem, - {{gm.Xfixed_arr(index_width-1, 0, 'weight1', ip_loop_factor)}}, - {{gm.Xfixed_arr(index_width-1, 0, 'weight2', ip_loop_factor)}}, - int address) { -{%- if mem_bw >= (2*ip_loop_factor*index_width) -%} -{% call gm.forloop(1, 'i', ip_loop_factor) %} -{%- if index_width == 8-%} -{{gm.ident(2)}}weight1[i]={{gm.Xfixed(index_width-1,0)}}::createRaw(mem[address].getChar(i)); -{{gm.ident(2)}}weight2[i]={{gm.Xfixed(index_width-1,0)}}::createRaw(mem[address].getChar(i+{{ip_loop_factor}})); -{%- elif index_width == 16 -%} -{{gm.ident(2)}}weight1[i]={{gm.Xfixed(index_width-1,0)}}::createRaw(mem[address].getChar(i)); -{{gm.ident(2)}}weight2[i]={{gm.Xfixed(index_width-1,0)}}::createRaw(mem[address].getChar(i+{{ip_loop_factor}})); -{%- endif -%} -{% endcall %} -{%- else -%} -{%- set mem_num = (mem_bw/2/index_width)|int -%} -{%- call gm.forloop_s(1, 'i', ip_loop_factor, mem_num) -%} -{%- call gm.forloop(2, 'j', mem_num) -%} -{%- if index_width == 8-%} -{{gm.ident(3)}}weight1[i+j]={{gm.Xfixed(index_width-1,0)}}::createRaw(mem[address].getChar(i)); -{{gm.ident(3)}}weight2[i+j]={{gm.Xfixed(index_width-1,0)}}::createRaw(mem[address].getChar(i+{{mem_num}})); -{%- elif index_width == 16 -%} -{{gm.ident(3)}}weight1[i+j]={{gm.Xfixed(index_width-1,0)}}::createRaw(mem[address].getChar(i)); -{{gm.ident(3)}}weight2[i+j]={{gm.Xfixed(index_width-1,0)}}::createRaw(mem[address].getChar(i+{{mem_num}})); -{%- endif -%} -{%- endcall -%} -{%- endcall -%} -{%- endif -%} -} -{% else %} -void {{object.name}}_read_weight( - {{gm.Xfui(mem_bw)}} *mem, - {{gm.Xfixed_arr(w_bw-1, 0, 'weight1', ip_loop_factor)}}, - {{gm.Xfixed_arr(w_bw-1, 0, 'weight2', ip_loop_factor)}}, - int address) { -{%- if mem_bw >= (2*ip_loop_factor*w_bw) -%} -{% call gm.forloop(1, 'i', ip_loop_factor) %} -{%- if w_bw == 8-%} -{{gm.ident(2)}}weight1[i]={{gm.Xfixed(w_bw-1,0)}}::createRaw(mem[address].getChar(i)); -{{gm.ident(2)}}weight2[i]={{gm.Xfixed(w_bw-1,0)}}::createRaw(mem[address].getChar(i+{{ip_loop_factor}})); -{%- elif w_bw == 16 -%} -{{gm.ident(2)}}weight1[i]={{gm.Xfixed(w_bw-1,0)}}::createRaw(mem[address].getChar(i)); -{{gm.ident(2)}}weight2[i]={{gm.Xfixed(w_bw-1,0)}}::createRaw(mem[address].getChar(i+{{ip_loop_factor}})); -{%- endif -%} -{% endcall %} -{%- else -%} -{%- set mem_num = (mem_bw/2/w_bw)|int -%} -{%- call gm.forloop_s(1, 'i', ip_loop_factor, mem_num) -%} -{%- call gm.forloop(2, 'j', mem_num) -%} -{%- if w_bw == 8-%} -{{gm.ident(3)}}weight1[i+j]={{gm.Xfixed(w_bw-1,0)}}::createRaw(mem[address].getChar(i)); -{{gm.ident(3)}}weight2[i+j]={{gm.Xfixed(w_bw-1,0)}}::createRaw(mem[address].getChar(i+{{mem_num}})); -{%- elif w_bw == 16 -%} -{{gm.ident(3)}}weight1[i+j]={{gm.Xfixed(w_bw-1,0)}}::createRaw(mem[address].getChar(i)); -{{gm.ident(3)}}weight2[i+j]={{gm.Xfixed(w_bw-1,0)}}::createRaw(mem[address].getChar(i+{{mem_num}})); -{%- endif -%} -{%- endcall -%} -{%- endcall -%} -{%- endif -%} -} -{% endif %} - -{{readFunc(object.name, "input", mem_bw, ip_bw, ip_loop_factor)}} -{{readFunc(object.name,"itrm", mem_bw, int_bw, op_loop_factor)}} -{{readFunc(object.name,"bias", mem_bw, b_bw, op_loop_factor)}} -{{writeFunc(object.name,"itrm", mem_bw, int_bw, op_loop_factor)}} -{{writeFunc(object.name,"output", mem_bw, op_bw, op_loop_factor)}} - -{% if object.preprocess.cluster == true %} -/* -Name: {{object.name}}_weight_table -Description: store the lookup table for clustering and performs the lookup -Params: the parameters -Return: the actual weight get after lookup -*/ -{{gm.Xfixed(w_bw-1, 0)}} weightTable(int index) -{ -{{gm.ident(1)}}{{gm.Xfixed_arr(w_bw-1, 0, 'table', num_clusters)}}; -{%- for weight in object.cluster_values -%} -{{gm.ident(1)}}table[{{loop.index-1}}]={{weight}}; -{% endfor %} -{{gm.ident(1)}}return table[index]; -} - -/* -Name: {{object.name}}_decode_weight -Description: Decode a pack of indices and put into an array of weights, it will call the *_weight_table function -Params: - weightArray -- an array of weights that can be dierectly - indexPack -- an array of indicies that needs to be decoded with the lookup table -*/ -void {{object.name}}_decode_weight({{gm.Xfixed_arr(w_bw-1, 0, 'weightArray', ip_loop_factor)}}, {{gm.Xfixed_arr(index_width-1, 0, 'indices', ip_loop_factor)}}) -{ -{% call gm.forloop(1, 'i', ip_loop_factor) %} -{{gm.ident(2)}}int indexTmp; -{{gm.ident(2)}}indexTmp = indices[i].getRaw(); -{{gm.ident(2)}}weightArray[i]=weightTable(indexTmp); -{%- endcall -%} -} -{% endif %} - -/* -Name: {{object.name}}_saturate_add -Description: this function performs add with a saturation, - it checks the result of addition and clamp the - output to MAX/MIN value of the datatype -Params: a -- input value a - b -- input value b -Return: c -- output value c after saturation -*/ -{{gm.Xfixed(int_bw-1,0)}}{{object.name}}_saturate_add( -{{gm.Xfixed_vdecl(int_bw-1, 0, 'a')}}, -{{gm.Xfixed_vdecl(int_bw-1, 0, 'b')}}) -{ -{{ident(1)}}float OVERFLOWMAX = {{2.0**(int_bw - 1) - 1.0}}; -{{ident(1)}}float OVERFLOWMIN = -{{2.0**(int_bw - 1)}}; - -{{ident(1)}}{{gm.Xfixed_vdecl(int_bw-1, 0, 'c')}}; -{{ident(1)}}c = a + b; -{{ident(1)}}if((a > 0)&&(b > 0)&&(c < 0)){ -{{ident(2)}}c = OVERFLOWMAX; -{{ident(1)}}} else if ((a < 0)&&(b < 0)&&(c > 0)) { -{{ident(2)}}c = OVERFLOWMIN; -{{ident(1)}}} -{{ident(1)}}return c; -} - -{# determine whether multiplication can be packed #} -/* -Name: {{object.name}}_pack_dsp -Description: Core arithmetic operation, do two multiplications and store in rst1 & rst2. - NOTE: for now (8/16/2018) this is just a simple version. In the future we - may utilize the function of DSP slices of the FPGA (depending on what FPGA - we want ot use) to pack two multiplications in one operation. -Params: - rst1 -- result 1 - rst2 -- result 2 - a -- multiplicand - from weight - b -- multiplicand - from weight - c -- multiplier - from input -*/ -void {{object.name}}_pack_dsp ( - {{gm.Xfixed_vdecl(int_bw-1, 0, '&rst1')}}, - {{gm.Xfixed_vdecl(int_bw-1, 0, '&rst2')}}, - {{gm.Xfixed_vdecl(w_bw-1, 0, 'a')}}, - {{gm.Xfixed_vdecl(w_bw-1, 0, 'b')}}, - {{gm.Xfixed_vdecl(ip_bw-1, 0, 'c')}} -) { - rst1 = a * c; - rst2 = b * c; -} - -/* -Name: {{object.name}}_compengine -Description: Computational engine that multiply two packs of weights with one packs of input, - compute the sum of the result of multiplications and generate two numbers as resuts. -Params: - out1 -- result 1 - out2 -- result 2 - in -- array of numbers from input - weight1 -- array of weights 1 - weight2 -- array of weights 2 -*/ -void {{object.name}}_compengine_row( - {{gm.Xfixed_vdecl(int_bw-1, 0,'&out1')}}, - {{gm.Xfixed_vdecl(int_bw-1, 0,'&out2')}}, - {{gm.Xfixed_arr(ip_bw-1, 0, 'in', ip_loop_factor)}}, -{% if object.preprocess.cluster == true %} - {{gm.Xfixed_arr(index_width-1, 0, 'indices1', ip_loop_factor)}}, - {{gm.Xfixed_arr(index_width-1, 0, 'indices2', ip_loop_factor)}} -{%- else %} - {{gm.Xfixed_arr(w_bw-1, 0, 'weight1', ip_loop_factor)}}, - {{gm.Xfixed_arr(w_bw-1, 0, 'weight2', ip_loop_factor)}} -{%- endif %} -) { - -{% if object.preprocess.cluster == true %} - {{gm.Xfixed_arr(w_bw-1, 0, 'weight1', ip_loop_factor)}}; - {{gm.Xfixed_arr(w_bw-1, 0, 'weight2', ip_loop_factor)}}; -{% endif %} -{{gen_red_tree_vdecl(ip_loop_factor, gm.Xfixed(int_bw-1,0))}} -{# if cluster is enabled #} -{%- if object.preprocess.cluster == true -%} -{{gm.ident(1)}}//Decode clustered input -{{gm.ident(1)}}{{object.name}}_decode_weight(weight1, indices1); -{{gm.ident(1)}}{{object.name}}_decode_weight(weight2, indices2); -{% endif %} -{%- call gm.forloop(1, 'i', ip_loop_factor) %} -{{gm.ident(2)}}#pragma HLS unroll -{{gm.ident(2)}}{{object.name}}_pack_dsp(inter{{ip_loop_factor}}[0][i],inter{{ip_loop_factor}}[1][i],weight1[i],weight2[i],in[i]); -{%- endcall %} -{{gen_red_tree_impl(ip_loop_factor, gm.Xfixed(int_bw-1,0))}} - out1=inter2[0][0]+inter2[0][1]; - out2=inter2[1][0]+inter2[1][1]; -} - -/* -Name: {{object.name}}_out_converge -Description: Accumulation operation that add the partial result of current iteration to - the final result. -Params: outPack -- final result of the convolution - outTemp -- output from the current iteration (for 1 pixel in the filter) -*/ -void {{object.name}}_out_converge({{gm.Xfixed_arr(int_bw-1, 0, 'outPack', op_loop_factor)}}, {{gm.Xfixed_arr(int_bw-1, 0,'outTemp',op_loop_factor)}}, int right_shift) { -{% call gm.forloop(1,'i',op_loop_factor) %} -{{gm.ident(2)}}outPack[i]={{object.name}}_saturate_add(outPack[i], (outTemp[i] >> right_shift)); -{%- endcall -%} -} - -/* -Name: {{object.name}}_layer -Description: top level function of the layer. Since fully connected layer - can be seen as a special case of fully connected layer. The - structure of them are very similar. The difference is that in - FC layer, we removed the loop for height and weight, because - for FC they are both 1. -*/ -void {{object.name}}_layer( - int CII,//image channels - int COO, //output channels - int right_shift, - {{gm.Xfui_vdecl(mem_bw, '*weight')}}, - {%- if object.interface.in == 'dram' %} - {{gm.Xfui_vdecl(mem_bw, '*in')}}, - {%- elif object.interface.in == 'onchip' %} - {{gm.Xfui_vdecl_arr(onchip_bw, 'in', 50176)}}, - {%- endif %} - {%- if object.interface.int == 'dram' %} - {{gm.Xfui_vdecl(mem_bw, '*out')}} - {%- elif object.interface.int == 'onchip' %} - {{gm.Xfui_vdecl_arr(onchip_bw, 'out', 50176)}} - {%- endif %} -){ -{{gm.ident(1)}}//Clear initial values of output buffers -{% if (object.interface.int == 'dram'): %} -{% call gm.forloop(1, 'i', 'COO*%d'|format(iters_itrm)) %} -{{gm.ident(2)}}{{gm.Xfui_vdecl(mem_bw, 'temp')}}((unsigned int) 0); -{{gm.ident(2)}}out[i] = temp; -{% endcall %} -{% elif (object.interface.int == 'onchip'): %} -{% call gm.forloop(1, 'i', 'COO*%d'|format(iters_itrm)) %} -{{gm.ident(2)}}{{gm.Xfui_vdecl(onchip_bw, 'temp')}}((unsigned int) 0); -{{gm.ident(2)}}out[i] = temp; -{% endcall %} -{% endif %} -{{gm.ident(1)}}int weight_idx=0; -{{gm.ident(1)}}//First set of loops: Iterate over filter -{% call gm.forloop(1, 'cii', 'CII') %} -{% call gm.forloop(2, 'coo', 'COO') %} -{{gm.ident(3)}}{{gm.Xfixed_arr(ip_bw-1, 0,'inTemp', ip_loop_factor)}}; -{{gm.ident(3)}}{{gm.Xfixed_arr(int_bw-1, 0,'outTemp', op_loop_factor)}}; -{% if object.interface.in == 'dram' %} -{{gm.ident(3)}}int address=cii*{{iters_input}}; -{{gm.ident(3)}}{{object.name}}_read_input(in, inTemp, address); -{% elif object.interface.in == 'onchip' %} -{{gm.ident(3)}}int address=cii*{{iters_input}}; -{{gm.ident(3)}}{{object.name}}_read_input(in, inTemp, address); -{% endif %} -{#if the output length is shorter than the op_loop_factor#} -{% if (object.wshape[1] >= op_loop_factor)%} {# op_loop_factor #} -{% call gm.forloop_s(3, 'i', op_loop_factor, 2) %} -{% if object.preprocess.cluster == true %} -{{gm.ident(4)}}{{gm.Xfixed_arr(index_width-1, 0, 'weight1', ip_loop_factor)}}; -{{gm.ident(4)}}{{gm.Xfixed_arr(index_width-1, 0, 'weight2', ip_loop_factor)}}; -{% else %} -{{gm.ident(4)}}{{gm.Xfixed_arr(w_bw-1, 0, 'weight1', ip_loop_factor)}}; -{{gm.ident(4)}}{{gm.Xfixed_arr(w_bw-1, 0, 'weight2', ip_loop_factor)}}; -{% endif%} -{{gm.ident(4)}}int address = weight_idx+i/2*{{iters_weight}}; -{{gm.ident(4)}}{{object.name}}_read_weight(weight, weight1, weight2, address); -{{gm.ident(4)}}{{object.name}}_compengine_row(outTemp[i], outTemp[i+1], inTemp, weight1, weight2); -{%- endcall -%} -{{gm.ident(3)}}{{gm.Xfixed_arr(int_bw-1, 0, 'outPackTemp', op_loop_factor)}}; -{% if object.interface.int == 'dram' %} -{{gm.ident(3)}}address=coo*{{iters_itrm}}; -{{gm.ident(3)}}{{object.name}}_read_itrm(out, outPackTemp, address); -{{gm.ident(3)}}{{object.name}}_out_converge(outPackTemp, outTemp, right_shift); -{{gm.ident(3)}}{{object.name}}_write_itrm(out, outPackTemp, address); -{{gm.ident(3)}}weight_idx += ({{op_loop_factor}}/2)*{{iters_weight}}; -{% elif object.interface.int == 'onchip' %} -{{gm.ident(3)}}address=coo*{{iters_itrm}}; -{{gm.ident(3)}}{{object.name}}_read_itrm(out, outPackTemp, address); -{{gm.ident(3)}}{{object.name}}_out_converge(outPackTemp, outTemp, right_shift); -{{gm.ident(3)}}{{object.name}}_write_itrm(out, outPackTemp, address); -{{gm.ident(3)}}weight_idx += ({{op_loop_factor}}/2)*{{iters_weight}}; -{% endif %} -{# if the channels of output is smaller than loop factor #} -{# it need some special attention #} -{% else %} -{% call gm.forloop_s(4, 'i', object.wshape[1], 2) %} -{% if object.preprocess.cluster == true %} -{{gm.ident(8)}}{{gm.Xfixed_arr(index_width-1, 0, 'weight1', ip_loop_factor)}}; -{{gm.ident(8)}}{{gm.Xfixed_arr(index_width-1, 0, 'weight2', ip_loop_factor)}}; -{% else %} -{{gm.ident(8)}}{{gm.Xfixed_arr(w_bw-1, 0, 'weight1', ip_loop_factor)}}; -{{gm.ident(8)}}{{gm.Xfixed_arr(w_bw-1, 0, 'weight2', ip_loop_factor)}}; -{% endif%} -{{gm.ident(8)}}int address = weight_idx+i/2*{{iters_weight}}; -{{gm.ident(8)}}{{object.name}}_read_weight(weight, weight1, weight2, address); -{{gm.ident(8)}}{{object.name}}_compengine_row(outTemp[i], outTemp[i+1], inTemp, weight1, weight2); -{% endcall %} -{% if object.interface.int == 'dram' %} -{{gm.ident(7)}}{{gm.Xfixed_arr(int_bw-1, 0, 'outPackTemp', op_loop_factor)}}; -{{gm.ident(3)}}address=coo*{{iters_itrm}}; -{{gm.ident(3)}}{{object.name}}_read_itrm(out, outPackTemp, address); -{{gm.ident(3)}}{{object.name}}_out_converge(outPackTemp, outTemp, right_shift); -{{gm.ident(3)}}{{object.name}}_write_itrm(out, outPackTemp, address); -{{gm.ident(3)}}weight_idx += ({{object.wshape[1]}}/2)*{{iters_weight}}; -{% elif object.interface.int == 'onchip' %} -{{gm.ident(7)}}{{gm.Xfixed_arr(int_bw-1, 0, 'outPackTemp', op_loop_factor)}}; -{{gm.ident(3)}}address=coo*{{iters_itrm}}; -{{gm.ident(3)}}{{object.name}}_read_itrm(out, outPackTemp, address); -{{gm.ident(3)}}{{object.name}}_out_converge(outPackTemp, outTemp, right_shift); -{{gm.ident(3)}}{{object.name}}_write_itrm(out, outPackTemp, address); -{{gm.ident(3)}}weight_idx += ({{object.wshape[1]}}/2)*{{iters_weight}}; -{%- endif -%} -{%- endif -%} -{%- endcall -%} -{%- endcall -%} -} - -{% if object.activation == 'ReLU' %} -/* -Name: {{object.name}}_saturate_relu -Description: performs saturation when converting from int_bw to op_bw -Params: - in -- number to be converted to output bit-width -Return: - temp -- result after saturation in output bit-width -*/ -{{gm.Xfixed(op_bw-1,0)}} {{object.name}}_saturate_relu( -{{gm.ident(1)}}{{gm.Xfixed_vdecl(int_bw-1, 0, 'in')}}) -{ -{{gm.ident(1)}}float OVERFLOWMAX = {{2.0**(op_bw - 1) - 1.0}}; -{{gm.ident(1)}}float OVERFLOWMIN = -{{2.0**(op_bw - 1)}}; - -{{gm.ident(1)}}{{gm.Xfixed_vdecl(op_bw-1, 0, 'temp')}}; -{{gm.ident(1)}}if(in >= OVERFLOWMAX){ -{{gm.ident(2)}}temp = OVERFLOWMAX; -{{gm.ident(1)}}} else if (in <= OVERFLOWMIN) { -{{gm.ident(2)}}temp = OVERFLOWMIN; -{{gm.ident(1)}}} else { -{{gm.ident(2)}}temp=in; -{{gm.ident(1)}}} -{{gm.ident(1)}}return temp; -} - -/* -Name: {{object.name}}_bias_relu -Description: computational kernel of bias relu layer, - also performs saturation when adding bias - to the pre-activation result as well as - converting it to outputs with smaller - bit-width. -Params: - in -- array of intermediate/pre-activation results - bias -- array of biases - out -- output of the bias & activation layer -*/ -void {{object.name}}_bias_relu( - {{gm.Xfixed_arr(int_bw-1, 0,'in',op_loop_factor)}}, - {{gm.Xfixed_arr(b_bw-1, 0,'bias',op_loop_factor)}}, - {{gm.Xfixed_arr(op_bw-1, 0,'out',op_loop_factor)}}, - int left_shift, - int right_shift -){ -{% call gm.forloop(1, 'j', op_loop_factor) %} -{{gm.ident(2)}}{{gm.Xfixed_vdecl(int_bw-1, 0,'temp')}}; -{{gm.ident(2)}}{{gm.Xfixed_vdecl(int_bw-1, 0,'tempBias')}}; -{{gm.ident(2)}}tempBias = bias[j]; -{{gm.ident(2)}}temp={{object.name}}_saturate_add(in[j], (tempBias << left_shift)); -{{gm.ident(2)}}temp = temp >> right_shift; -{{gm.ident(2)}}if(temp > 0) -{{gm.ident(3)}}out[j] = {{object.name}}_saturate_relu(temp); -{{gm.ident(2)}}else -{{gm.ident(3)}}out[j] = 0.0; -{%- endcall -%} -} - -/* -Name: {{object.name}}_bias_relu -Description: top level function for the ReLU activation layer. This layer also - performs "adding bias" and padding for the next layer. -Params: see below -*/ -void {{object.name}}_bias_relu( - int COO, - int left_shift, - int right_shift, - {{gm.Xfui_vdecl(mem_bw, '*weight')}}, - {%- if object.interface.in == 'dram' %} - {{gm.Xfui_vdecl(mem_bw, '*in')}}, - {%- elif object.interface.in == 'onchip' %} - {{gm.Xfui_vdecl_arr(onchip_bw, 'in', 50176)}}, - {%- endif %} - {%- if object.interface.int == 'dram' %} - {{gm.Xfui_vdecl(mem_bw, '*out')}} - {%- elif object.interface.int == 'onchip' %} - {{gm.Xfui_vdecl_arr(onchip_bw, 'out', 50176)}} - {%- endif %} -) { -{{gm.ident(1)}}{{gm.Xfixed_arr(int_bw-1, 0, 'inTemp', op_loop_factor)}}; -{{gm.ident(1)}}{{gm.Xfixed_arr(op_bw-1, 0, 'outTemp', op_loop_factor)}}; -{{gm.ident(1)}}{{gm.Xfixed_arr(b_bw-1, 0, 'bias', op_loop_factor)}}; -{{gm.ident(1)}}int weight_idx = 0; -{{gm.ident(1)}}int address = 0; -{% call gm.forloop(1, 'co', 'COO') %} -{{gm.ident(2)}}{{object.name}}_read_bias(weight, bias, co*{{iters_bias}}); -{{gm.ident(2)}}{{object.name}}_read_itrm(in, inTemp, co*{{iters_itrm}}); -{{gm.ident(2)}}{{object.name}}_bias_relu(inTemp, bias, outTemp,left_shift, right_shift); -{{gm.ident(2)}}{{object.name}}_write_output(out, outTemp, co*{{iters_output}}); -{% endcall %} -} -{% endif %} -{%- endmacro %} - -{% macro fvalid(object, prev, flags) %} -{%- set IC = object.wshape[0] -%} -{%- set OC = object.wshape[1] -%} -{%- set ip_loop_factor = object.ip_loop_factor -%} -{%- set op_loop_factor = object.op_loop_factor -%} -{# ---------------------------- #} -{# the following code determins the correct size of output of previous layer#} -{% if prev.layer_type == 'fc' %} -{%- set H = 1 -%} -{%- set W = 1 -%} -{%- set C = prev.wshape[1] -%} -{% elif prev.layer_type == 'max_pool' %} -{%- set stride = prev.weights.stride[1] -%} -{%- set ksize = prev.weights.ksize[1] -%} -{%- set padsize = prev.weights.padsize -%} -{%- set C = prev.dshape[2] -%} -{% if prev.mode == 'SAME' %} -{%- set H = (prev.dshape[0]/stride)|round(0,'ceil')|int -%} -{%- set W = (prev.dshape[1]/stride)|round(0,'ceil')|int -%} -{% else %} -{%- set H = ((prev.dshape[0]-ksize+1)/stride)|round(0,'ceil')|int -%} -{%- set W = ((prev.dshape[1]-ksize+1)/stride)|round(0,'ceil')|int -%} -{% endif %} -{% endif %} -{#----------------------------#} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set int_bw = object.preprocess.int_bit_width-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set w_bw = object.weights.W_width-%} -{%- set b_bw = object.weights.b_width-%} -{%- set mem_bw = object.interface.memory_bit_width -%} -{# internal bw is the "safe" bit width for the result of multiplication #} -{%- set iters_weight = (w_bw*ip_loop_factor*2/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_bias = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_input = (ip_bw*ip_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_output = (op_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_itrm = (int_bw*op_loop_factor/mem_bw)|round(0,'ceil')|int -%} - -void {{object.name}}_test_fc() { - //test framework - srand(17); - bool flag = false; -//Allocate buffers for test data -{{gm.ident(1)}}int* input_buf = new int[{{IC}}]; -{{gm.ident(1)}}int* weight_buf = new int[{{IC}}*{{OC}}]; -{{gm.ident(1)}}int* output_buf = new int[{{OC}}]; -//Generate random input -{% call gm.forloop(1, 'c', IC) %} -{{gm.ident(2)}}input_buf[c] = rand()%2; -{%- endcall -%} - -//transform HWC order into CHW order -{% call gm.forloop(1, 'c', (C/ip_loop_factor)|round(0,'ceil')|int) %} -{% call gm.forloop(2, 'h', H) %} -{% call gm.forloop(3, 'w', W) %} -{% call gm.forloop(4, 'ci', ip_loop_factor) %} -{{gm.ident(5)}}((ap_int<512>*)a)[c*{{H}}*{{W}}+h*{{W}}+w].{{gm.pr_range_idx(ip_bw, 'ci')}} = input_buf[h*{{W}}*{{C}}+w*{{C}}+(c*{{ip_loop_factor}}+ci)]; -{%- endcall -%} -{%- endcall -%} -{%- endcall -%} -{%- endcall -%} - -//generate random weights and pack -{% call gm.forloop(1, 'i', OC) %} -{% call gm.forloop(2, 'j', IC) %} -{{gm.ident(2)}}weight_buf[i*{{IC}}+j] = rand()%2; -{% endcall %} -{% endcall %} -pack_weights_fc<int>({{H}}, {{W}}, {{C}}, {{OC}}, weight_buf, (ap_int<512>*)w); - -//compute gold -fc(output_buf, weight_buf, input_buf, {{IC}}, {{OC}}); -//compute real -{{object.name}}_layer({{(IC/ip_loop_factor)|round(0,'ceil')|int}}, {{(OC/op_loop_factor)|round(0,'ceil')|int}}, 0, w, a, c); - -//compare -{% call gm.forloop(1, 'i', (OC/op_loop_factor)|round(0,'ceil')|int) %} -{% call gm.forloop(2, 'ii', [OC, op_loop_factor]|min) %} -{{gm.ident(5)}}{{gm.fi_vdecl(mem_bw, 'temp_c')}} = ((ap_int<512>*)c)[i*{{iters_itrm}}]; -{{gm.ident(5)}}{{gm.fi_vdecl(int_bw, 'out_layer')}} = temp_c.{{gm.pr_range_idx(16,'ii')}}; -{{gm.ident(5)}}int out_golden = output_buf[(i*{{op_loop_factor}}+ii)]; -{{gm.ident(5)}}if (out_layer != out_golden) { -{{gm.ident(5)}}cout << "[" << (i*{{op_loop_factor}}+ii) << "]"; -{{gm.ident(5)}}cout << "Diff: Layer - " << out_layer << "Golden - " << out_golden << endl; -{{gm.ident(5)}}flag = true; -{{gm.ident(5)}}} -{%- endcall -%} -{{gm.ident(2)}}//cout << endl; -{%- endcall -%} - -{{gm.ident(1)}}delete [] input_buf; -{{gm.ident(1)}}delete [] output_buf; -{{gm.ident(1)}}delete [] weight_buf; -{{gm.ident(1)}}assert(!flag); -} - -{% if object.activation == 'ReLU' %} -void {{object.name}}_test_relu() { -//Dummy function -//ReLU for FC is very simple so we omitted the validation function for now -} -{%- endif -%} -{% endmacro %} \ No newline at end of file diff --git a/DRED_codegen/templates/global_macros.inc b/DRED_codegen/templates/global_macros.inc deleted file mode 100644 index 9ce4e993c710b85f8d920e28983660e126f30bef..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/global_macros.inc +++ /dev/null @@ -1,131 +0,0 @@ -{# indentation #} -{%- macro ident(lvl) %} -{% for i in range(lvl) %} {% endfor %} -{%- endmacro %} - -{# basic for loop, start from 0 #} -{% macro forloop(lvl, idx, bound) %} -{{ident(lvl)}}for (int {{idx}}=0; {{idx}} < {{bound}}; {{idx}}++) { -{{ caller() }} -{{ident(lvl)}}} -{% endmacro %} - -{# for loop with non-uniform stride #} -{% macro forloop_s(lvl, idx, bound, stride) %} -{{ident(lvl)}}for (int {{idx}}=0; {{idx}} < {{bound}}; {{idx}}+={{stride}}) { -{{ caller() }} -{{ident(lvl)}}} -{% endmacro %} - -{# for loop with configurable boundaries and stride #} -{% macro forloop_i(lvl, init, idx, bound, stride) %} -{{ident(lvl)}}for (int {{idx}}={{init}}; {{idx}} < {{bound}}; {{idx}}+={{stride}}) { -{{ caller() }} -{{ident(lvl)}}} -{% endmacro %} - -{# select bit range from 0 to len #} -{%- macro pr_range(len) -%} -range({{len - 1}},0) -{%- endmacro %} - -{# select bit range from starat to end #} -{%- macro pr_range_bound(start,end) -%} -range({{start}},{{end}}) -{%- endmacro %} - -{# select bit range from starat to end wiht index #} -{%- macro pr_range_idx(len, id) -%} -range({{len}}*{{id}}+{{len-1}},{{len}}*{{id}}) -{%- endmacro %} - -{# declare arbitrary length int #} -{%- macro fixed_int(size) %} -ap_int<{{size}}> -{%- endmacro %} - -{# declare arbitrary length unsigned int #} -{%- macro fixed_uint(size) %} -ap_uint<{{size}}> -{%- endmacro %} - -{# declare fixed point number with int_size number of integer bits #} -{%- macro fixed(size, int_size) %} -ap_fixed<{{size}},{{int_size}},AP_RND_CONV,AP_SAT> -{%- endmacro %} - -{# declare an integer with parameterized name #} -{%- macro fi_vdecl(size, name) %} -{{fixed_int(size)}} {{name}} -{%- endmacro %} - -{# declare an integer with parameterized name #} -{%- macro fi_vdecl_arr(size, name, asize) %} -{{fixed_int(size)}} {{name}}[{{asize}}] -{%- endmacro %} - -{# declare an integer with parameterized name #} -{%- macro fui_vdecl(size, name) %} -{{fixed_uint(size)}} {{name}} -{%- endmacro %} - -{# declare an array of unsigned integer with parameterized name #} -{%- macro fui_vdecl_arr(size, name, asize) %} -{{fixed_uint(size)}} {{name}}[{{asize}}] -{%- endmacro %} - -{# declare an array of fixed point number #} -{%- macro f_vdecl(size, int_size, name) %} -{{fixed(size, int_size)}} {{name}} -{%- endmacro %} - -{# declare an array of fixed point number with parameterized name#} -{%- macro f_vdecl_arr(size, int_size, name, asize) %} -{{fixed(size, int_size)}} {{name}}[{{asize}}] -{%- endmacro %} - -{# declare an 2d array of fixed point number with parameterized name#} -{%- macro f_vdecl_2darr(size, int_size, name, a1size, a2size) %} -{{fixed(size, int_size)}} {{name}}[{{a1size}}][{{a2size}}] -{%- endmacro %} - -{# declare a streaming interface #} -{%- macro fi_streamdecl(size, name) %} -hls::stream< {{fixed_int(size)}} > {{name}} -{%- endmacro %} - -{# the following macros are defined for Xcelo datatypes #} -{# declare fixed point number with int_size number of integer bits #} -{%- macro Xfixed(size, int_size) %} -fp_int<{{size}},{{int_size}}> -{%- endmacro %} - -{%- macro Xfixed_vdecl(size, int_size, name) %} -fp_int<{{size}},{{int_size}}> {{name}} -{%- endmacro %} - -{%- macro Xfixed_arr(size, int_size, name, asize) %} -fp_int<{{size}},{{int_size}}> {{name}}[{{asize}}] -{%- endmacro %} - -{%- macro Xfixed_2darr(size, int_size, name, asize1, asize2) %} -fp_int<{{size}},{{int_size}}> {{name}}[{{asize1}}][{{asize2}}] -{%- endmacro %} - -{%- macro Xfui(size) %} -xcelo_uint<{{size}}> -{%- endmacro %} - -{%- macro Xfui_vdecl(size, name) %} -xcelo_uint<{{size}}> {{name}} -{%- endmacro %} - -{%- macro Xfui_vdecl_arr(size, name, asize) %} -xcelo_uint<{{size}}> {{name}}[{{asize}}] -{%- endmacro %} - -{%- macro Xfui_arr(size, name, asize) %} -xcelo_uint<{{size}}> {{name}}[{{asize}}] -{%- endmacro %} - - diff --git a/DRED_codegen/templates/lrn.inc b/DRED_codegen/templates/lrn.inc deleted file mode 100644 index 4923ad355575229f4c81fa2eeb24007ecbacce56..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/lrn.inc +++ /dev/null @@ -1,242 +0,0 @@ -{% import "global_macros.inc" as gm %} -{% import "aux_funcs.inc" as aux %} - -{%- macro handle_preprocess(object, name) -%} - {%- if name in object.keys() %} - {%- for key, value in object.iteritems() %} - {%- if key == name %} - {%- if value > 0 %} - {%- endif %} - {%- endif %} - {%- endfor %} - {%- endif %} -{%- endmacro %} - -{%- macro fdecl(object) -%} -{%- endmacro %} - -{%- macro var_decl(object) -%} - {%- if object.preprocess %} - {%- else %} - {%- endif %} -{%- endmacro %} - -{% macro fvalid_call(object) %} -{{gm.ident(1)}}{{object.name}}_test_pool(); -{{gm.ident(1)}}{{object.name}}_test_pool_flatten(); -{% endmacro %} - -{# defines paramters passed in when it get called#} -{%- macro fcall(object, debug) -%} -{%- set loop_factor = object.loop_factor -%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set channels = (object.dshape[2]/loop_factor)|round(0,'ceil')|int-%} -{{gm.ident(1)}}{{object.name}}_layer ({{object.dshape[0]}}, {{object.dshape[1]}}, {{channels}}, {{object.weights.adjKerNum}}, {{object.weights.K}}, {{object.weights.alpha}}, {{object.weights.beta}}, -{%- if object.interface.in == 'dram' %} - a, -{%- elif object.interface.in == 'onchip' %} - a_onchip, -{%- endif %} -{%- if object.interface.int == 'dram' %} - c -{%- elif object.interface.int == 'onchip' %} - c_onchip -{%- endif %} -); -{%- endmacro %} - -{% macro readFunc(layer, name, mem, bw, loop_factor)%} -{%- set iters = (bw*loop_factor/mem)|round(0,'ceil')|int -%} -int read{{name}}_{{layer}}({{gm.fixed_int(mem)}} *mem, - {{gm.fixed_int(bw*loop_factor)}} &input, - int address) -{ - int index=0; - //Read requires {{iters}} iterations -{%for idx in range(0,iters)%} -{# if this is not the last pack#} -{%if ((idx+1)*mem < bw*loop_factor) %} - input.{{gm.pr_range_bound((idx+1)*mem-1,idx*mem)}}=mem[address+index].{{gm.pr_range_bound((mem-1),0)}}; - index ++; -{# if this is the last pack#} -{% else %} - input.{{gm.pr_range_bound(bw*loop_factor-1,idx*mem)}}=mem[address+index].{{gm.pr_range_bound(bw*loop_factor-idx*mem-1,0)}}; -{%endif%} -{%endfor%} - return address; -} -{% endmacro %} - -{% macro writeFunc(layer, name, mem, bw, loop_factor)%} -{%- set iters = (bw*loop_factor/mem)|round(0,'ceil')|int -%} -int write{{name}}_{{layer}}({{gm.fixed_int(mem)}} *mem, - {{gm.fixed_int(bw*loop_factor)}} output, - int address) -{ - int index=0; - //Write requires {{iters}} iterations -{%for idx in range(0,iters)%} -{# if this is not the last pack#} -{%if ((idx+1)*mem < bw*loop_factor) %} - mem[address+index].{{gm.pr_range_bound((mem-1),0)}} = output.{{gm.pr_range_bound((idx+1)*mem-1,idx*mem)}}; - index ++; -{# if this is the last pack#} -{% else %} - mem[address+index].{{gm.pr_range_bound(bw*loop_factor-idx*mem-1,0)}}=output.{{gm.pr_range_bound(bw*loop_factor-1,idx*mem)}}; -{%endif%} -{%endfor%} - return address; -} -{% endmacro %} - -{%- macro fimpl(object) -%} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set op_shift = object.preprocess.op_shift -%} -{%- set loop_factor = object.loop_factor -%} -{%- set mem_bw = object.interface.memory_bit_width -%} -{%- set channels = (object.dshape[2]/loop_factor)|round(0,'ceil')|int-%} -{%- set channel_buf_size = 512 -%} -{# internal bw is the "safe" bit width for the result of multiplication #} -{%- set iters_input = (ip_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_output = (op_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} - -{{readFunc(object.name, "Input", mem_bw, ip_bw, loop_factor)}} -{{writeFunc(object.name, "Output", mem_bw, op_bw, loop_factor)}} - -void {{object.name}}_read_input_channel( -{{gm.ident(1)}}int IH, -{{gm.ident(1)}}int IW, -{{gm.ident(1)}}int IC, -{{gm.ident(1)}}ap_int<{{mem_bw}}> *mem, -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'buffer', channel_buf_size)}}, -{{gm.ident(1)}}int address) -{ -{{gm.ident(1)}}ap_int<{{ip_bw*loop_factor}}> inPackTemp; -{% call gm.forloop(1, 'c', 'IC') %} -{{gm.ident(2)}}readInput_{{object.name}}(mem, inPackTemp, address); -{% call gm.forloop(2, 'ci', loop_factor) %} -{{gm.ident(3)}}buffer[c*{{loop_factor}}+ci].{{gm.pr_range(ip_bw)}} = inPackTemp.{{gm.pr_range_idx(ip_bw, 'ci')}}; -{%- endcall -%} -{{gm.ident(2)}}address += IH*IW*{{iters_input}}; -{%- endcall -%} -} - -void {{object.name}}_write_output_channel( -{{gm.ident(1)}}int IH, -{{gm.ident(1)}}int IW, -{{gm.ident(1)}}int IC, -{{gm.ident(1)}}ap_int<{{mem_bw}}> *mem, -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'buffer', channel_buf_size)}}, -{{gm.ident(1)}}int address) -{ -{{gm.ident(1)}}ap_int<{{ip_bw*loop_factor}}> outPackTemp; -{% call gm.forloop(1, 'c', 'IC') %} -{% call gm.forloop(2, 'ci', loop_factor) %} -{{gm.ident(3)}}outPackTemp.{{gm.pr_range_idx(ip_bw, 'ci')}} = buffer[c*{{loop_factor}}+ci].{{gm.pr_range(ip_bw)}}; -{%- endcall -%} -{{gm.ident(2)}}writeOutput_{{object.name}}(mem, outPackTemp, address); -{{gm.ident(2)}}address += IH*IW*{{iters_output}}; -{%- endcall -%} -} - -void {{object.name}}_kernel ( -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'in', channel_buf_size)}}, -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'out', channel_buf_size)}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'kernelNum')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'adjKerNum')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'K')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'alpha')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'beta')}}) -{ -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'zero')}} = 0; -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'calKernel')}} = kernelNum -1; - -{% call gm.forloop(1, 'i', 'kernelNum') %} -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw, ip_bw, 'comp1')}} = i-(adjKerNum/2); -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw, ip_bw, 'comp2')}} = i+(adjKerNum/2); -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw, ip_bw, 'lowerBound')}} = (comp1 > zero) ? (comp1) : zero; -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw, ip_bw, 'upperBound')}} = (calKernel < comp2) ? (calKernel) : comp2; -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw*2, ip_bw*2, 'power')}}; -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw*3, ip_bw*3, 'sum')}} = 0; -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw*3, ip_bw*3, 'denominator')}}; -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw*3, ip_bw*3, 'scaledSum')}}; -{% call gm.forloop_i(2, 'lowerBound', 'j', '=upperBound', 1)%} -{{gm.ident(3)}}sum += in[j]*in[j]; -{%- endcall -%} -{{gm.ident(2)}}scaledSum = K + (alpha * sum); -{{gm.ident(2)}}//TODO: the data range of this temp should be determined by analyzing the data range -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw*3, ip_bw, 'temp')}}; -{{gm.ident(2)}}temp.{{gm.pr_range(ip_bw*3)}} = scaledSum.{{gm.pr_range(ip_bw*3)}}; -{{gm.ident(2)}}float tempfloat = temp; -{{gm.ident(2)}}denominator = pow(tempfloat, (double)beta); //TODO: directly use beta????? -{{gm.ident(2)}}{{gm.f_vdecl(ip_bw, ip_bw, 'shift_norm_output')}}; -{{gm.ident(2)}}shift_norm_output = in[i]/denominator; -{{gm.ident(2)}}out = shift_norm_output[i] >> 8; -{%- endcall -%} -} - -void {{object.name}}_layer( -{{gm.ident(1)}}int IH, -{{gm.ident(1)}}int IW, -{{gm.ident(1)}}int IC, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'adjKerNum')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'K')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'alpha')}}, -{{gm.ident(1)}}{{gm.f_vdecl(ip_bw, ip_bw, 'beta')}}, -{{gm.ident(1)}}ap_int<{{mem_bw}}>input[50176], -{{gm.ident(1)}}ap_int<{{mem_bw}}>output[50176]) -{ -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'inTemp', channel_buf_size)}}; -{{gm.ident(1)}}{{gm.f_vdecl_arr(ip_bw, ip_bw, 'outTemp', channel_buf_size)}}; -{% call gm.forloop(1, 'i', 'IH*IW') %} -{{gm.ident(2)}}//loop index i is also the address of first pack of that fibre -{{gm.ident(2)}}{{object.name}}_read_input_channel(IH, IW, IC, input, inTemp, i); -{{gm.ident(2)}}{{object.name}}_kernel(inTemp, outTemp, IC*{{loop_factor}}, adjKerNum, K, alpha, beta); -{{gm.ident(2)}}{{object.name}}_write_output_channel(IH, IW, IC, output, outTemp, i); -{%- endcall -%} -} -{%- endmacro -%} - -{%- macro fvalid(object) -%} -{%- set IC = object.wshape[2] -%} -{%- set IH = H+2*padsize -%} -{%- set IW = W+2*padsize -%} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set mem_bw = object.interface.memory_bit_width -%} -{%- set onchip_bw = object.interface.onchip_bit_width -%} -{%- set channels = (object.dshape[2]/loop_factor)|round(0,'ceil')|int-%} -void {{object.name}}_test_LRN() { -{{gm.ident(1)}}srand(17); -{{gm.ident(1)}}//test framework -{{gm.ident(1)}}int* input_buf = new int[{{IH}}*{{IW}}*{{IC}}]; -{{gm.ident(1)}}int* output_buf = new int[{{IH}}*{{IW}}*{{IC}}]; -{% call gm.forloop(1, 'i', IH*IW*IC) %} -{{gm.ident(2)}}input_buf[i] = rand()%5; -{%- endcall -%} - -{{gm.ident(1)}}pack_input<int>({{IH}}, {{IW}}, {{IC}}, 0, input_buf, a); -{{gm.ident(1)}}LRN1_layer({{IH}}, {{IW}}, {{channels}}, {{object.weights.adjKerNum}}, {{object.weights.K}}, {{object.weights.alpha}}, {{object.weights.beta}}, a, c); -{{gm.ident(1)}}lrn_gold({{IH}}, {{IW}}, {{IC}}, {{object.weights.adjKerNum}}, {{object.weights.K}}, {{object.weights.alpha}}, {{object.weights.beta}}, input_buf, output_buf); - -{% call gm.forloop(1, 'i', channels) %} -{% call gm.forloop(2, 'ii', op_loop_factor) %} -{% call gm.forloop(3, 'j', H) %} -{% call gm.forloop(4, 'k', W) %} -{{gm.ident(5)}}{{gm.fi_vdecl(512, 'temp_c')}} = c[(i*{{H}}*{{W}}+j*{{W}}+k)*{{iters_itrm}}]; -{{gm.ident(5)}}{{gm.fi_vdecl(16, 'out_layer')}} = temp_c.{{gm.pr_range_idx(16,'ii')}}; -{{gm.ident(5)}}int out_golden = output_buf[(i*{{op_loop_factor}}+ii)*{{H}}*{{W}}+j*{{W}}+k]; -{{gm.ident(5)}}if (out_layer != out_golden) { -{{gm.ident(5)}}cout << "[" << (i*{{op_loop_factor}}+ii) << "][" << j << "][" << k << "]"; -{{gm.ident(5)}}cout << "Diff: Layer - " << out_layer << "Golden - " << out_golden << endl; -{{gm.ident(5)}}} -{%- endcall -%} -{{gm.ident(3)}}//cout << endl; -{%- endcall -%} -{{gm.ident(2)}}//cout << endl; -{%- endcall -%} -{%- endcall -%} -} -{%- endmacro %} \ No newline at end of file diff --git a/DRED_codegen/templates/max_pool.inc b/DRED_codegen/templates/max_pool.inc deleted file mode 100644 index 29051212d562e41f122a85c785ff8c1b08c3d3d2..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/max_pool.inc +++ /dev/null @@ -1,897 +0,0 @@ -{# max_pool.inc #} -{# Jinja2 template for HLS synthesizable C++ code #} -{# Inspirit IoT, Inc. 2018 #} -{# Contributers: #} -{# Mang Yu #} -{# Kyle Rupnow #} -{# Xinheng Liu #} - -{% import "global_macros.inc" as gm %} -{% set ident = gm.ident %} -{% import "aux_funcs.inc" as aux %} - -{# Description: Handle preprocess -- NOT USED #} -{# ------ #} -{# Params: #} -{%- macro handle_preprocess(object, name) -%} - {%- if name in object.keys() %} - {%- for key, value in object.iteritems() %} - {%- if key == name %} - {%- if value > 0 %} - {%- endif %} - {%- endif %} - {%- endfor %} - {%- endif %} -{%- endmacro %} - -{# Description: declare the layer functions--NOT USED #} -{# ------ #} -{# Params: #} -{%- macro fdecl(object) -%} -{%- endmacro %} - -{# Description: declare global variabels -- NOT USED #} -{# ------ #} -{# Params: #} -{%- macro var_decl(object) -%} - {%- if object.preprocess %} - {%- else %} - {%- endif %} -{%- endmacro %} - -{# Description: call validation function #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{% macro fvalid_call(object) %} -{% if object.flatten %} -{{ident(1)}}{{object.name}}_test_pool_flatten(); -{% else %} -{{ident(1)}}{{object.name}}_test_pool(); -{% endif %} -{% endmacro %} - -{# Description: call layer and activation functions #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{# flags -- flags for debugging, verifcation mode, etc. #} -{%- macro fcall(object, flags) -%} -{%- set loop_factor = object.loop_factor -%} -{%- set stride = object.weights.stride[1] -%} -{%- set ksize = object.weights.ksize[1] -%} -{%- set padsize = object.weights.padsize -%} -{%- set mode = object.mode -%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set op_shift = object.preprocess.op_shift -%} -{%- set b_shift = object.weights.b_shift-%} -{%- set right_shift = ip_shift - op_shift -%} -{%- set left_shift = ip_shift - b_shift-%} -{%- set channels = (object.dshape[2]/loop_factor)|round(0,'ceil')|int-%}{# #packets depth-wise#} -{# read biases from file and put into memory if in verification mode #} -{% if flags.verify %} -{{ident(1)}}{{aux.fcall_readb(object)}} -{% endif %} -{% if not object.flatten == true %} -{# decide the size of output depending on type of padding #} -{# for padding strategy refer to: "https://www.tensorflow.org/api_guides/python/nn#Convolution" #} -{# TODO TODO TODO: the behavior differs from TensorFlow (8/16/2018) #} -{% if object.mode == 'SAME' %} -{%- set OH = (object.dshape[0]/stride+2*padsize)|round(0,'ceil')|int -%} -{%- set OW = (object.dshape[1]/stride+2*padsize)|round(0,'ceil')|int -%} -{% else %} -{%- set OH = ((object.dshape[0]-ksize+1)/stride+2*padsize)|round(0,'ceil')|int -%} -{%- set OW = ((object.dshape[1]-ksize+1)/stride+2*padsize)|round(0,'ceil')|int -%} -{% endif %} -{{ident(1)}}{{object.name}}_bias_relu_pool( -{%- if object.weights["padding"] == 'SAME' %} -{{object.weights.padsize}}, -{%- else %} - 0, -{%- endif %} -{{object.dshape[0]}}, {{object.dshape[1]}}, {{channels}}, {{OH}}, {{OW}}, {{left_shift}}, {{right_shift}}, w, -{# placeholder, in case we need to be able to switch between offchip memory and onchip memory #} -{%- if object.interface.in == 'dram' %} - c, -{%- elif object.interface.in == 'onchip' %} - c_onchip, -{%- endif %} -{%- if object.interface.int == 'dram' %} - a -{%- elif object.interface.int == 'onchip' %} - a_onchip -{%- endif %} -); -{% else %} -{# if flatten version is required #} -{% if object.mode == 'SAME' %} -{%- set OH = (object.dshape[0]/stride)|round(0,'ceil')|int -%} -{%- set OW = (object.dshape[1]/stride)|round(0,'ceil')|int -%} -{% else %} -{%- set OH = ((object.dshape[0]-ksize+1)/stride)|round(0,'ceil')|int -%} -{%- set OW = ((object.dshape[1]-ksize+1)/stride)|round(0,'ceil')|int -%} -{% endif %} -{{ident(1)}}{{object.name}}_bias_relu_pool_flatten({{object.dshape[0]}}, {{object.dshape[1]}}, {{channels}}, {{OH}}, {{OW}}, {{left_shift}}, {{right_shift}}, w, -{%- if object.interface.in == 'dram' %} - c, -{%- elif object.interface.in == 'onchip' %} - c_onchip, -{%- endif %} -{%- if object.interface.int == 'dram' %} - a -{%- elif object.interface.int == 'onchip' %} - a_onchip -{%- endif %} -); -{% endif %} -{%- endmacro %} - -{# Description: implement the memory read & unpack function #} -{# ------ #} -{# Params: #} -{# layer -- name of the layer #} -{# name -- name of the buffer to read: output/itrm,etc. #} -{# mem_bw -- bit width of a single memory line #} -{# bw -- bit width of the data we want to read #} -{# loop_factor -- loop_factor, also the pack size #} -{# TODO: share instances of read function between layers #} -{% macro readFunc(layer, name, mem_bw, bw, loop_factor)%} -void {{layer}}_read_{{name}}( - {{gm.Xfui(mem_bw)}} *mem, - {{gm.Xfixed_arr(bw-1, 0, 'inputToLayer', loop_factor)}}, - int address) { -{% if mem_bw >= (loop_factor*bw) %} -{% call gm.forloop(1, 'i', loop_factor) %} -{%- if bw == 8-%} -{{ident(2)}}inputToLayer[i]={{gm.Xfixed(bw-1,0)}}::createRaw(mem[address].getChar(i)); -{%- elif bw == 16 -%} -{{ident(2)}}inputToLayer[i]={{gm.Xfixed(bw-1,0)}}::createRaw(mem[address].getShort(i)); -{%- endif -%} -{% endcall %} -{%- else -%} -{% set mem_num = (mem_bw/bw)|int %} -{% call gm.forloop_s(1, 'i', loop_factor, mem_num) %} -{% call gm.forloop(2, 'j', mem_num) %} -{%- if bw == 8-%} -{{ident(3)}}inputToLayer[i+j]={{gm.Xfixed(bw-1,0)}}::createRaw(mem[address].getChar(i)); -{%- elif bw == 16 -%} -{{ident(3)}}inputToLayer[i+j]={{gm.Xfixed(bw-1,0)}}::createRaw(mem[address].getShort(i)); -{%- endif -%} -{%- endcall -%} -{%- endcall -%} -{%- endif -%} -} -{% endmacro %} - -{# Description: implement the memory write & pack function #} -{# ------ #} -{# Params: #} -{# layer -- name of the layer #} -{# name -- name of the buffer to write: output/itrm,etc.#} -{# mem_bw -- bit width of a single memory line #} -{# bw -- bit width of the data we want to read #} -{# loop_factor -- loop_factor, also the pack size #} -{# TODO: share instances of read function between layers #} -{% macro writeFunc(layer, name, mem_bw, bw, loop_factor)%} -void {{layer}}_write_{{name}}( - {{gm.Xfui(mem_bw)}} *mem, - {{gm.Xfixed_arr(bw-1, 0, 'outputFromLayer', loop_factor)}}, - int address) { -{% if mem_bw >= (loop_factor*bw) %} -{% call gm.forloop(1, 'i', loop_factor) %} -{% if bw == 16 %} -{{ident(2)}}mem[address].setValue(i, outputFromLayer[i].getRaw()); -{% elif bw == 8 %} -{{ident(2)}}mem[address].setValue(i, (char)outputFromLayer[i].getRaw()); -{% endif %} -{% endcall %} -{%- else -%} -{% set mem_num = (mem_bw/bw)|int %} -{% call gm.forloop_s(1, 'i', loop_factor, mem_num) %} -{% call gm.forloop(2, 'j', mem_num) %} -{% if bw == 16 %} -{{ident(3)}}mem[address].setValue(j, outputFromLayer[i+j].getRaw()); -{% elif bw == 8 %} -{{ident(3)}}mem[address].setValue(j, (char)outputFromLayer[i+j].getRaw()); -{% endif %} -{%- endcall -%} -{%- endcall -%} -{%- endif -%} -} -{% endmacro %} - -{# Description: implement the layer functions #} -{# #} -{# ------ #} -{# Params: #} -{# object: layer object that contains all the info #} -{%- macro fimpl(object) -%} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set op_shift = object.preprocess.op_shift -%} -{%- set b_bw = object.weights.b_width-%} -{%- set b_shift = object.weights.b_shift-%} -{%- set loop_factor = object.loop_factor -%} -{%- set mem_bw = object.memory_bit_width -%} -{%- set stride = object.weights.stride[1] -%} -{%- set ksize = object.weights.ksize[1] -%} -{%- set mode = object.mode -%} -{# internal bw is the "safe" bit width for the result of multiplication #} -{%- set iters_bias = (b_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_input = (ip_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_output = (op_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} - -{# implement memory read/write functions #} -{{readFunc(object.name, "input", mem_bw, ip_bw, loop_factor)}} -{{readFunc(object.name, "bias", mem_bw, b_bw, loop_factor)}} -{{writeFunc(object.name, "output", mem_bw, op_bw, loop_factor)}} - -//Implementing {{mode}} mode -//size {{ksize}}x{{ksize}} stride {{stride}} - -/* -Name: {{object.name}}_saturate -Description: performs saturation when converting from int_bw to op_bw -Params: - in -- number to be converted to output bit-width -Return: - temp -- result after saturation in output bit-width -*/ -{{gm.Xfixed(op_bw-1,0)}} {{object.name}}_saturate( -{{ident(1)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'in')}}) -{ -{{ident(1)}}float OVERFLOWMAX = {{2.0**(op_bw - 1) - 1.0}}; -{{ident(1)}}float OVERFLOWMIN = -{{2.0**(op_bw - 1)}}; - -{{ident(1)}}{{gm.Xfixed_vdecl(op_bw-1, 0, 'temp')}}; -{{ident(1)}}if(in >= OVERFLOWMAX){ -{{ident(2)}}temp = OVERFLOWMAX; -{{ident(1)}}} else if (in <= OVERFLOWMIN) { -{{ident(2)}}temp = OVERFLOWMIN; -{{ident(1)}}} else { -{{ident(2)}}temp=in; -{{ident(1)}}} -{{ident(1)}}return temp; -} - -/* -Name: {{object.name}}_write_clear -Description: write 0 to the memory address specified -Params: - mem -- memory to write to - address -- address that is to be cleared -*/ -void {{object.name}}_write_clear( - {{gm.Xfui(mem_bw)}} *mem, - int address) { -{{ident(1)}}{{gm.Xfui(mem_bw)}} temp((unsigned int) 0); -{% if mem_bw >= (loop_factor*op_bw) %} -{{ident(2)}}mem[address] = temp; -{%- else -%} -{% set mem_num = (mem_bw/op_bw)|int %} -{% call gm.forloop_s(1, 'i', loop_factor, mem_num) %} -{{ident(2)}}mem[address+i] = temp; -{%- endcall -%} -{%- endif -%} -} - -/* -Name: {{object.name}}_saturate_add -Description: this function performs add with a saturation, - it checks the result of addition and clamp the - output to MAX/MIN value of the datatype -Params: a -- input value a - b -- input value b -Return: c -- output value c after saturation -TODO: share instances of this between layers?? -*/ -{{gm.Xfixed(ip_bw-1,0)}}{{object.name}}_saturate_add( -{{gm.Xfixed_vdecl(ip_bw-1, 0, 'a')}}, -{{gm.Xfixed_vdecl(ip_bw-1, 0, 'b')}}) -{ -{{ident(1)}}float OVERFLOWMAX = {{2.0**(ip_bw - 1) - 1.0}}; -{{ident(1)}}float OVERFLOWMIN = -{{2.0**(ip_bw - 1)}}; - -{{ident(1)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'c')}}; -{{ident(1)}}c = a + b; -{{ident(1)}}if((a > 0)&&(b > 0)&&(c < 0)){ -{{ident(2)}}c = OVERFLOWMAX; -{{ident(1)}}} else if ((a < 0)&&(b < 0)&&(c > 0)) { -{{ident(2)}}c = OVERFLOWMIN; -{{ident(1)}}} -{{ident(1)}}return c; -} - -{# NOTE: since 2*2 stride 2 is the most common setting for max pooling #} -{# we have a optimized version for it #} -{% if (ksize == 2) and (stride == 2) %} -//NOTE: 2*2 stride 2 version is selected -/* -Name: {{object.name}}_maxpool4 -Description: performs max pooling for a single 2*2 window -Params: - lat -- data from the input window - bias -- pack of biases - rst -- result array - left_shift -- left shift needed for the bias to align with - pre-activation - right_shift -- right shift needed for the pre-activation to - align wiht the output -*/ -void {{object.name}}_maxpool4( - {{gm.Xfixed_2darr(ip_bw-1, 0, 'lat', 4, loop_factor)}}, - {{gm.Xfixed_arr(b_bw-1, 0, 'bias', loop_factor)}}, - {{gm.Xfixed_arr(op_bw-1, 0, 'rst', loop_factor)}}, - int left_shift, int right_shift -) { -{% call gm.forloop(1,'j',loop_factor) %} -{{ident(2)}}#pragma HLS unroll -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'tempBias')}}; -{{ident(2)}}//unpack bias -{{ident(2)}}tempBias = bias[j]; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'a')}}; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'b')}}; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'c')}}; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'd')}}; -{{ident(2)}}//TODO: use _saturate_add() to prevent overflow -{{ident(2)}}a={{object.name}}_saturate_add(lat[0][j], (tempBias << left_shift)) -{{ident(2)}}a= a >> right_shift; -{{ident(2)}}b={{object.name}}_saturate_add(lat[1][j], (tempBias << left_shift)) -{{ident(2)}}b= b >> right_shift; -{{ident(2)}}c={{object.name}}_saturate_add(lat[2][j], (tempBias << left_shift)) -{{ident(2)}}c= c >> right_shift; -{{ident(2)}}d={{object.name}}_saturate_add(lat[3][j], (tempBias << left_shift)) -{{ident(2)}}d= d >> right_shift; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'x')}}; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0,'y')}}; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0,'z')}}; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0,'s')}}; -{{ident(2)}}//perform comparison -{{ident(2)}}x=a>b?a:b; -{{ident(2)}}y=c>d?c:d; -{{ident(2)}}z=x>y?x:y; -{{ident(2)}}s = z; -{{ident(2)}}//perform ReLU activation -{{ident(2)}}if(s > 0.0) -{{ident(3)}}rst[j] = {{object.name}}_saturate(s); -{{ident(2)}}else -{{ident(3)}}rst[j] = 0; -{% endcall %} -} - -{# flatten and non-flatten version of maxpool are separate #} -/* -Name: {{object.name}}_bias_relu_pool -Description: top level function of max pooling layer. this - layer performs 3 actions, 1- adding bias, 2- - max pooling, 3- ReLU activation. -{% if not object.flatten %} - This implementation performs 2*2 max poolling, - wiht stride 2, and will pad the output for the - next layer according to the padsize parameter. -Params: See below -*/ -void {{object.name}}_bias_relu_pool( - int padsize, - int IH, //Input height - int IW, //Input width - int COO, //number of packed channels - int OH, //Output height - int OW, //Output width - int left_shift, //left shift needed to align bias with the pre-activation - int right_shift, //right shift needed to align pre-activation with output - {{gm.Xfui_arr(mem_bw, 'weight', 50176)}}, - {{gm.Xfui_arr(mem_bw, 'in', 50176)}}, - {{gm.Xfui_arr(mem_bw,'out', 50176)}} -) { - - {{gm.Xfixed_arr(b_bw-1, 0, 'bias', loop_factor)}}; - int address = 0; - int weight_idx = 0; -{% call gm.forloop(1, 'c', 'COO') %} -{{ident(2)}}//bias = weight[weight_idx++]; -{{ident(2)}}{{object.name}}_read_bias(weight, bias, weight_idx*{{iters_bias}}); -{{ident(2)}}weight_idx ++; -{{ident(2)}}//pad leading zero rows -{% call gm.forloop(2, 'i', 'padsize*OW') %} -{{ident(3)}}address = (c*OH*OW+i)*{{iters_output}}; -{{ident(3)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{% call gm.forloop_s(2, 'h', 'IH', 2) %} -{{ident(3)}}// pad zeros in front of each line -{% call gm.forloop(3, 'i', 'padsize') %} -{{ident(4)}}address = (c*OH*OW+(padsize+h/2)*OW+i)*{{iters_output}}; -{{ident(3)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{{ident(3)}}// perform relu/bias/pooling -{% call gm.forloop_s(3, 'w', 'IW', 2) %} -{{ident(4)}}{{gm.Xfixed_2darr(ip_bw-1, 0, 'inTemp', 4, loop_factor)}}; -{{ident(4)}}{{gm.Xfixed_arr(op_bw-1, 0, 'outTemp', loop_factor)}}; -{{ident(4)}}//read data from the window -{{ident(4)}}address = (c*IW*IH+h*IW+w)*{{iters_input}}; -{{ident(4)}}{{object.name}}_read_input(in, inTemp[0], address); -{{ident(4)}}address = (c*IW*IH+h*IW+w+1)*{{iters_input}}; -{{ident(4)}}{{object.name}}_read_input(in, inTemp[1], address); -{{ident(4)}}address = (c*IW*IH+(h+1)*IW+w)*{{iters_input}}; -{{ident(4)}}{{object.name}}_read_input(in, inTemp[2], address); -{{ident(4)}}address = (c*IW*IH+(h+1)*IW+w+1)*{{iters_input}}; -{{ident(4)}}{{object.name}}_read_input(in, inTemp[3], address); -{{ident(4)}}//perform max pooling -{{ident(4)}}pool1_maxpool4(inTemp, bias, outTemp, left_shift, right_shift); -{{ident(4)}}address = (c*OH*OW+(padsize+h/2)*OW+w/2+padsize)*{{iters_output}}; -{{ident(4)}}{{object.name}}_write_output(out, outTemp, address); -{% endcall %} -{{ident(3)}}//pad zeros at the end of each line -{% call gm.forloop(3, 'i', 'padsize') %} -{{ident(4)}}address = (c*OH*OW+(padsize+h/2)*OW+IW/2+padsize+i)*{{iters_output}}; -{{ident(3)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{% endcall %} - -{% call gm.forloop(2, 'i', 'padsize*IW') %} -{{ident(3)}}address = (c*OH*OW+(padsize+IH/2)*OW+i); -{{ident(3)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{% endcall %} -} -{% else %} - This implementation performs 2*2 max poolling, - wiht stride 2. It will not pad the output for - the next layer. -Params: See below -*/ -void {{object.name}}_bias_relu_pool_flatten( - int IH, //Input height - int IW, //Input width - int COO, //number of packed channels - int OH, //Output height - int OW, //Output width - int left_shift, //left shift needed to align bias with the pre-activation - int right_shift, //right shift needed to align pre-activation with output - {# TODO: placeholder for using onchip memory #} - {{gm.Xfui_arr(mem_bw, 'weight', 50176)}}, - {{gm.Xfui_arr(mem_bw, 'in', 50176)}}, - {{gm.Xfui_arr(mem_bw,'out', 50176)}} -) { - {{gm.Xfixed_arr(b_bw-1, 0, 'bias', loop_factor)}}; - int weight_idx = 0; - int address = 0; -{% call gm.forloop(1, 'c', 'COO') %} -{{ident(2)}}{{object.name}}_read_bias(weight, bias, weight_idx*{{iters_bias}}); -{{ident(2)}}weight_idx ++; -{% call gm.forloop_s(2, 'h', 'IH', 2) %} -{% call gm.forloop_s(3, 'w', 'IW', 2) %} -{{ident(4)}}//read in data for the pooling window -{{ident(4)}}{{gm.Xfixed_2darr(ip_bw-1, 0, 'inTemp', 4, loop_factor)}}; -{{ident(4)}}{{gm.Xfixed_arr(op_bw-1, 0, 'outTemp', loop_factor)}}; -{{ident(4)}}address = (c*IW*IH+h*IW+w)*{{iters_input}}; -{{ident(4)}}{{object.name}}_read_input(in, inTemp[0], address); -{{ident(4)}}address = (c*IW*IH+h*IW+w+1)*{{iters_input}}; -{{ident(4)}}{{object.name}}_read_input(in, inTemp[1], address); -{{ident(4)}}address = (c*IW*IH+(h+1)*IW+w)*{{iters_input}}; -{{ident(4)}}{{object.name}}_read_input(in, inTemp[2], address); -{{ident(4)}}address = (c*IW*IH+(h+1)*IW+w+1)*{{iters_input}}; -{{ident(4)}}{{object.name}}_read_input(in, inTemp[3], address); -{{ident(4)}}//perform max pooling -{{ident(4)}}pool1_maxpool4(inTemp, bias, outTemp, left_shift, right_shift); -{{ident(4)}}address = (c*OH*OW+(h/2)*OW+(w/2))*{{iters_output}}; -{{ident(4)}}//write output to the buffer -{{ident(4)}}{{object.name}}_write_output(out, outTemp, address); -{% endcall %} -{% endcall %} -{% endcall %} -} -{% endif %} -{# less optimized generic implementation #} -{%- else -%} -/* -Name: {{object.name}}_maxpool_{{ksize}} -Description: performs max pooling for a single 2*2 window -Params: - base -- data to compare with - rival -- data that will replace the on in base if it is larger - after adding the bias and is >0 - bias -- pack of biases - left_shift -- left shift needed for the bias to align with - pre-activation -*/ -void {{object.name}}_maxpool_{{ksize}}( - {{gm.Xfixed_arr(ip_bw-1, 0, 'base', loop_factor)}}, - {{gm.Xfixed_arr(ip_bw-1, 0, 'rival', loop_factor)}}, - {{gm.Xfixed_arr(b_bw-1, 0, 'bias', loop_factor)}}, - int left_shift -) { -{% call gm.forloop(1,'j',loop_factor) %} -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'tempbias')}}; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'a')}}; -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'b')}}; -{{ident(2)}}tempbias = bias[j]; -{{ident(2)}}a = base[j]; -{{ident(2)}}b = {{object.name}}_saturate_add(rival[j], (tempbias << left_shift)); -{{ident(2)}}{{gm.Xfixed_vdecl(ip_bw-1, 0, 'x')}}; -{{ident(2)}}//compare base and rival+bias -{{ident(2)}}x=a>b?a:b; -{{ident(2)}}//perform ReLU activation -{{ident(2)}}if(x>0) -{{ident(3)}}base[j] = {{object.name}}_saturate(x); -{{ident(2)}}else -{{ident(3)}}base[j]=0; -{% endcall %} -} - -/* -Name: {{object.name}}_bias_relu_pool -Description: top level function of max pooling layer. this - layer performs 3 actions, 1- adding bias, 2- - max pooling, 3- ReLU activation. -{# use non-flatten version of maxpooling #} -{% if not object.flatten %} - This implementation performs generic max pooling that can take - any window size, stride and padding strategy. It will pad the - output according to the padsize. -Params: See below -*/ -void {{object.name}}_bias_relu_pool( - int padsize, //padsize for the next layer - int IH, //Input height - int IW, //Input width - int CO, //number of packed channels - int OH, //Output height - int OW, //Output width - int left_shift, //left shift needed to align bias with the pre-activation - int right_shift, //right shift needed to align pre-activation with output - {{gm.Xfui_arr(mem_bw, 'weight', 50176)}}, - {{gm.Xfui_arr(mem_bw, 'in', 50176)}}, - {{gm.Xfui_arr(mem_bw,'out', 50176)}} -) { - {{gm.Xfixed_arr(b_bw-1, 0, 'bias', loop_factor)}}; - int address = 0; - int weight_idx = 0; -{% call gm.forloop(1, 'c', 'CO') %} -{{ident(2)}}{{object.name}}_read_bias(weight, bias, weight_idx*{{iters_bias}}); -{{ident(2)}}weight_idx ++; -{{ident(2)}}//pad leading zero rows -{% call gm.forloop(2, 'i', 'padsize*OW') %} -{{ident(3)}}address = (c*OH*OW+i)*{{iters_output}}; -{{ident(3)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{# the boundary for VALID and SAME pooling mode is different #} -{# TODO TODO TODO: match the behavior with TensorFlow #} -{%- if (mode == 'VALID') -%} -{% call gm.forloop_s(2, 'h', 'IH-%d'|format(ksize-1), stride) %} -{{ident(3)}}// pad zeros in front of each line -{% call gm.forloop(3, 'i', 'padsize') %} -{{ident(4)}}address = (c*OH*OW+(padsize+h/2)*OW+i)*{{iters_output}}; -{{ident(4)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{{ident(3)}}// perform relu/bias/pooling -{% call gm.forloop_s(3, 'w', 'IW-%d'|format(ksize-1), stride) %} -{{ident(4)}}{{gm.Xfixed_arr(ip_bw-1, 0, 'base' , loop_factor)}}; -{{ident(4)}}{{gm.Xfixed_arr(ip_bw-1, 0, 'rival', loop_factor)}}; -{# since array initialization doesn't work, we have to use this stupid loop#} -{% call gm.forloop(4, 'j', loop_factor)%} -{{ident(5)}}base[j] = 0; -{% endcall %} -{% call gm.forloop_i(4, 0, 'hh', ksize, 1) %} -{% call gm.forloop_i(5, 0, 'ww', ksize, 1) %} -{{ident(6)}}if (!(((hh+h)<0)||((ww+w)<0)||((hh+h)>= IH)||((ww+w)>= IW))){ -{{ident(7)}}address = (c*IW*IH+(h+hh)*IW+(w+ww))*{{iters_input}}; -{{ident(7)}}{{object.name}}_read_input(in, rival, address); -{{ident(7)}}{{object.name}}_maxpool_{{ksize}}(base, rival, bias, left_shift); -{{ident(6)}}} -{% endcall %} -{% endcall %} -{{ident(4)}}//cast the result to output datatype -{{ident(4)}}{{gm.Xfixed_arr(op_bw-1, 0,'outCast', loop_factor)}}; -{% call gm.forloop(4, 'j', loop_factor)%} -{{ident(5)}}outCast[j] = {{object.name}}_saturate(base[j] >> right_shift) -{% endcall %} -{{ident(4)}}address = (c*OH*OW+(padsize+h/{{stride}})*OW+w/{{stride}}+padsize)*{{iters_output}}; -{{ident(4)}}{{object.name}}_write_output(out, outCast, address); -{% endcall %} -{{ident(3)}}//pad zeros at the end of each line -{% call gm.forloop(3, 'i', 'padsize') %} -{{ident(4)}}address = (c*OH*OW+(padsize+h/{{stride}})*OW+OW-padsize+i)*{{iters_output}}; -{{ident(4)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{% endcall %} -{%- elif (mode == 'SAME') -%} -{% call gm.forloop_s(2, 'h', 'IH', stride) %} -{{ident(3)}}// pad zeros in front of each line -{% call gm.forloop(3, 'i', 'padsize') %} -{{ident(4)}}address = (c*OH*OW+(padsize+h/2)*OW+i)*{{iters_output}};//TODO: Optimize -{{ident(4)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{{ident(3)}}// perform relu/bias/pooling -{% call gm.forloop_s(3, 'w', 'IW', stride) %} -{{ident(4)}}#pragma HLS pipeline -{{ident(4)}}{{gm.Xfixed_arr(ip_bw-1, 0, 'base' , loop_factor)}}; -{{ident(4)}}{{gm.Xfixed_arr(ip_bw-1, 0, 'rival', loop_factor)}}; -{# since array initialization doesn't work, we have to use this stupid loop#} -{% call gm.forloop(4, 'j', loop_factor)%} -{{ident(5)}}base[j] = 0; -{% endcall %} -{{ident(4)}}//boundary checking -{% call gm.forloop_i(4, -(ksize//2), 'hh', ksize//2+1, 1) %} -{% call gm.forloop_i(5, -(ksize//2), 'ww', ksize//2+1, 1) %} -{{ident(6)}}if (!(((hh+h)<0)||((ww+w)<0)||((hh+h)>= IH)||((ww+w)>= IW))){ -{{ident(7)}}address = (c*IW*IH+(h+hh)*IW+(w+ww))*{{iters_input}}; -{{ident(7)}}{{object.name}}_read_input(in, rival, address); -{{ident(7)}}{{object.name}}_maxpool_{{ksize}}(base, rival, bias, left_shift); -{{ident(6)}}} -{% endcall %} -{% endcall %} -{{ident(4)}}{{gm.Xfixed_arr(op_bw-1, 0,'outCast', loop_factor)}}; -{% call gm.forloop(4, 'j', loop_factor)%} -{{ident(5)}}outCast[j] = {{object.name}}_saturate(base[j] >> right_shift); -{% endcall %} -{{ident(4)}}address = (c*OH*OW+(padsize+h/{{stride}})*OW+w/{{stride}}+padsize)*{{iters_output}}; -{{ident(4)}}{{object.name}}_write_output(out, outCast, address); -{% endcall %} -{{ident(3)}}//pad zeros at the end of each line -{% call gm.forloop(3, 'i', 'padsize') %} -{{ident(4)}}address = (c*OH*OW+(padsize+h/{{stride}})*OW+OW-padsize+i)*{{iters_output}}; -{{ident(4)}}{{object.name}}_write_clear(out, address); -{% endcall %} -{% endcall %} -{% endif %} -{% call gm.forloop(2, 'i', 'padsize*IW') %} -{{ident(3)}}address = (c*OH*OW+(IH-padsize)*OW+i); -{{ident(3)}}{{object.name}}_write_clear(out, address); -{%- endcall %} -{%- endcall %} -} -{% else %} -{# implement flatten version of max pooling #} - This implementation performs generic max pooling that can take - any window size, stride and padding strategy. This function will - not pad the output for the next layer. The output should be seen - as flattened. -Params: See below -*/ -void {{object.name}}_bias_relu_pool_flatten( - int padsize, //padsize for the next layer - int IH, //Input height - int IW, //Input width - int CO, //number of packed channels - int OH, //Output height - int OW, //Output width - int left_shift, //left shift needed to align bias with the pre-activation - int right_shift, //right shift needed to align pre-activation with output - {{gm.Xfui_arr(mem_bw, 'weight', 50176)}}, - {{gm.Xfui_arr(mem_bw, 'in', 50176)}}, - {{gm.Xfui_arr(mem_bw,'out', 50176)}} -) { - {{gm.Xfixed_arr(b_bw-1, 0, 'bias', loop_factor)}}; - int address = 0; - int weight_idx = 0; -{% call gm.forloop(1, 'c', 'CO') %} -{{ident(2)}}{{object.name}}_read_bias(weight, bias, weight_idx*{{iters_bias}}); -{{ident(2)}}weight_idx ++; -{% if (mode == 'VALID') %} -{% call gm.forloop_s(2, 'h', 'IH-%d'|format(ksize-1), stride) %} -{% call gm.forloop_s(3, 'w', 'IW-%d'|format(ksize-1), stride) %} -{{ident(4)}}{{gm.Xfixed_arr(ip_bw-1, 0, 'base' , loop_factor)}}; -{{ident(4)}}{{gm.Xfixed_arr(ip_bw-1, 0, 'rival', loop_factor)}}; -{# since array initialization doesn't work, we have to use this stupid loop#} -{% call gm.forloop(4, 'j', loop_factor)%} -{{ident(5)}}base[j] = 0; -{% endcall %} -{% call gm.forloop_i(4, 0, 'hh', ksize, 1) %} -{% call gm.forloop_i(5, 0, 'ww', ksize, 1) %} -{{ident(6)}}if (!(((hh+h)<0)||((ww+w)<0)||((hh+h)>= IH)||((ww+w)>= IW))){ -{{ident(7)}}address = (c*IW*IH+(h+hh)*IW+(w+ww))*{{iters_input}}; -{{ident(7)}}{{object.name}}_read_input(in, rival, address); -{{ident(7)}}{{object.name}}_maxpool_{{ksize}}(base, rival, bias, left_shift); -{{ident(6)}}} -{% endcall %} -{% endcall %} -{{ident(4)}}{{gm.Xfixed_arr(op_bw-1, 0,'outCast', loop_factor)}}; -{% call gm.forloop(4, 'j', loop_factor)%} -{{ident(5)}}outCast[j] = {{object.name}}_saturate(base[j] >> right_shift) -{% endcall %} -{{ident(4)}}address = (c*OH*OW+(h/{{stride}})*OW+w/{{stride}})*{{iters_output}}; -{{ident(4)}}{{object.name}}_write_output(out, outCast, address); -{% endcall %} -{% endcall %} -{%- elif (mode == 'SAME') -%} -{% call gm.forloop_s(2, 'h', 'IH', stride) %} -{% call gm.forloop_s(3, 'w', 'IW', stride) %} -{{ident(4)}}{{gm.Xfixed_arr(ip_bw-1, 0, 'base' , loop_factor)}}; -{{ident(4)}}{{gm.Xfixed_arr(ip_bw-1, 0, 'rival', loop_factor)}}; -{# since array initialization doesn't work, we have to use this stupid loop#} -{% call gm.forloop(4, 'j', loop_factor)%} -{{ident(5)}}base[j] = 0; -{% endcall %} -{% call gm.forloop_i(4, -(ksize//2), 'hh', ksize//2+1, 1) %} -{% call gm.forloop_i(5, -(ksize//2), 'ww', ksize//2+1, 1) %} -{{ident(6)}}if (!(((hh+h)<0)||((ww+w)<0)||((hh+h)>= IH)||((ww+w)>= IW))){ -{{ident(7)}}address = (c*IW*IH+(h+hh)*IW+(w+ww))*{{iters_input}}; -{{ident(7)}}{{object.name}}_read_input(in, rival, address); -{{ident(7)}}{{object.name}}_maxpool_{{ksize}}(base, rival, bias, left_shift); -{{ident(6)}}} -{% endcall %} -{% endcall %} -{{ident(4)}}{{gm.Xfixed_arr(op_bw-1, 0,'outCast', loop_factor)}}; -{% call gm.forloop(4, 'j', loop_factor)%} -{{ident(5)}}outCast[j] = {{object.name}}_saturate(base[j] >> right_shift) -{% endcall %} -{{ident(4)}}address = (c*OH*OW+(h/{{stride}})*OW+w/{{stride}})*{{iters_output}}; -{{ident(4)}}{{object.name}}_write_output(out, outCast, address); -{% endcall %} -{% endcall %} -{% endif %} -{% endcall %} -} -{% endif %} -{% endif %} -{%- endmacro %} - -{% macro fvalid(object, flags) %} -{%- set ip_bw = object.preprocess.ip_bit_width-%} -{%- set ip_shift = object.preprocess.ip_shift-%} -{%- set op_bw = object.preprocess.op_bit_width-%} -{%- set op_shift = object.preprocess.op_shift -%} -{%- set b_bw = object.weights.b_width-%} -{%- set b_shift = object.weights.b_shift-%} -{%- set loop_factor = object.loop_factor -%} -{%- set mem_bw = object.memory_bit_width -%} -{%- set stride = object.weights.stride[1] -%} -{%- set ksize = object.weights.ksize[1] -%}{# window size for pooling #} -{%- set padsize = object.weights.padsize-%} -{%- set mode = object.mode -%} {# mode of doing pooling #} -{%- set H = object.dshape[0] -%} -{%- set W = object.dshape[1] -%} -{%- set C = object.dshape[2] -%} -{%- set channels = (object.dshape[2]/loop_factor)|round(0,'ceil')|int-%} -{# internal bw is the "safe" bit width for the result of multiplication #} -{%- set iters_bias = (b_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_input = (ip_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} -{%- set iters_output = (op_bw*loop_factor/mem_bw)|round(0,'ceil')|int -%} - -{% if object.flatten %} -{% if mode == 'SAME'%} -{%- set OH = (object.dshape[0]/stride)|round(0,'ceil')|int -%} -{%- set OW = (object.dshape[1]/stride)|round(0,'ceil')|int -%} -{% elif mode == 'VALID' %} -{%- set OH = ((object.dshape[0]-ksize+1)/stride)|round(0,'ceil')|int -%} -{%- set OW = ((object.dshape[1]-ksize+1)/stride)|round(0,'ceil')|int -%} -{% endif %} -void {{object.name}}_test_pool_flatten() { - srand(17); - bool flag = false; - int input_buf[{{H}}*{{W}}*{{C}}] = {0}; - int weight_buf[{{C}}]; - int output_buf[{{OH}}*{{OW}}*{{C}}]; - -{% call gm.forloop(1, 'i', C) %} -{% call gm.forloop(2, 'j', H) %} -{% call gm.forloop(3, 'k', W) %} -{{ident(4)}}input_buf[i*{{H}}*{{W}}+j*{{W}}+k] = rand()%4 - 2; -{% endcall %} -{% endcall %} -{% endcall %} - -{{ident(1)}}pack_input<int>({{H}}, {{W}}, {{C}}, 0, {{loop_factor}}, {{ip_bw}}, input_buf, (ap_int<512>*)a); - -{% call gm.forloop(1, 'i', C) %} -{{ident(2)}}weight_buf[i] = rand()%2; -{% endcall %} - -{% call gm.forloop(1, 'i', (object.dshape[2]/loop_factor)|round(0,'ceil')|int) %} -{% call gm.forloop(1, 'ii', loop_factor) %} -{{ident(2)}}if((i*{{loop_factor}}+ii) < {{C}}){ -{{ident(2)}}((ap_int<512>*)w)[i].{{gm.pr_range_idx(b_bw, 'ii')}} = weight_buf[i*{{loop_factor}}+ii]; -{{ident(2)}}} else { -{{ident(2)}}((ap_int<512>*)w)[i].{{gm.pr_range_idx(b_bw, 'ii')}} = 0; -{{ident(2)}}} -{% endcall %} -{% endcall %} - -{% call gm.forloop(1, 'i', 50176) %} -{{ident(2)}}((ap_int<512>*)c)[i] = 0; -{% endcall %} - -{{ident(1)}}maxpool_gold({{H}}, {{W}}, {{C}}, 0, {{ksize}}, {{stride}}, -{%- if mode == 'SAME' -%} -true, -{%- elif mode == 'VALID' -%} -false, -{%- endif -%} - input_buf, weight_buf, output_buf); -{{ident(1)}}{{object.name}}_bias_relu_pool_flatten({{object.dshape[0]}}, {{object.dshape[1]}}, {{(object.dshape[2]/loop_factor)|round(0,'ceil')|int}}, {{OH}}, {{OW}}, 0, 0, (xcelo_uint<{{mem_bw}}>*)w, (xcelo_uint<{{mem_bw}}>*)a, (xcelo_uint<{{mem_bw}}>*)c); -{% call gm.forloop(1, 'i', 1) %} -{% call gm.forloop(2, 'ii', loop_factor) %} -{% call gm.forloop(3, 'j', OH) %} -{% call gm.forloop(4, 'k', OW) %} -{{ident(5)}}{{gm.fi_vdecl(mem_bw, 'temp_c')}} = ((ap_int<512>*)c)[(i*{{OH}}*{{OW}}+j*{{OW}}+k)*{{iters_output}}]; -{{ident(5)}}{{gm.fi_vdecl(op_bw, 'out_layer')}} = temp_c.{{gm.pr_range_idx(op_bw,'ii')}}; -{{ident(5)}}int out_golden = output_buf[(i*{{loop_factor}}+ii)*{{OH}}*{{OW}}+j*{{OW}}+k]; -{{ident(5)}}if (out_layer != out_golden) { -{{ident(5)}}cout << "[" << (i*{{loop_factor}}+ii) << "][" << j << "][" << k << "]"; -{{ident(5)}}cout << "Diff: Layer - " << out_layer << "Golden - " << out_golden << endl; -{{ident(5)}}} -{%- endcall -%} -{{ident(3)}}//cout << endl; -{%- endcall -%} -{{ident(2)}}//cout << endl; -{%- endcall -%} -{%- endcall -%} -{{ident(1)}}assert(!flag); -} -{# if not flatten #} -{% else %} -{% if mode == 'SAME'%} -{%- set OH = (object.dshape[0]/stride+2*padsize)|round(0,'ceil')|int -%} -{%- set OW = (object.dshape[1]/stride+2*padsize)|round(0,'ceil')|int -%} -{% elif mode == 'VALID' %} -{%- set OH = ((object.dshape[0]-ksize+1)/stride+2*padsize)|round(0,'ceil')|int -%} -{%- set OW = ((object.dshape[1]-ksize+1)/stride+2*padsize)|round(0,'ceil')|int -%} -{% endif %} -void {{object.name}}_test_pool() { - srand(17); - bool flag = false; - int input_buf[{{H}}*{{W}}*{{C}}] = {0}; - int weight_buf[{{C}}]; - int output_buf[{{OH}}*{{OW}}*{{C}}]; - -{% call gm.forloop(1, 'i', C) %} -{% call gm.forloop(2, 'j', H) %} -{% call gm.forloop(3, 'k', W) %} -{{ident(4)}}input_buf[i*{{H}}*{{W}}+j*{{W}}+k] = rand()%4 - 2; -{% endcall %} -{% endcall %} -{% endcall %} - -{{ident(1)}}pack_input<int>({{H}}, {{W}}, {{C}}, 0, {{loop_factor}}, {{ip_bw}}, input_buf, (ap_int<512>*)a); - -{% call gm.forloop(1, 'i', C) %} -{{ident(2)}}weight_buf[i] = rand()%2; -{% endcall %} - -{% call gm.forloop(1, 'i', (object.dshape[2]/loop_factor)|round(0,'ceil')|int) %} -{% call gm.forloop(1, 'ii', loop_factor) %} -{{ident(2)}}if((i*{{loop_factor}}+ii) < {{C}}){ -{{ident(2)}}((ap_int<512>*)w)[i].{{gm.pr_range_idx(b_bw, 'ii')}} = weight_buf[i*{{loop_factor}}+ii]; -{{ident(2)}}} else { -{{ident(2)}}((ap_int<512>*)w)[i].{{gm.pr_range_idx(b_bw, 'ii')}} = 0; -{{ident(2)}}} -{% endcall %} -{% endcall %} - -{% call gm.forloop(1, 'i', 50176) %} -{{ident(2)}}((ap_int<512>*)c)[i] = 0; -{% endcall %} - -{{ident(1)}}maxpool_gold({{H}}, {{W}}, {{C}}, {{padsize}}, {{ksize}}, {{stride}}, -{%- if mode == 'SAME' -%} -true, -{%- elif mode == 'VALID' -%} -false, -{%- endif -%} - input_buf, weight_buf, output_buf); -{{ident(1)}}{{object.name}}_bias_relu_pool({{padsize}}, {{object.dshape[0]}}, {{object.dshape[1]}}, {{(object.dshape[2]/loop_factor)|round(0,'ceil')|int}}, {{OH}}, {{OW}}, 0, 0, w, a, c); - -{% call gm.forloop(1, 'i', 1) %} -{% call gm.forloop(2, 'ii', loop_factor) %} -{% call gm.forloop(3, 'j', OH) %} -{% call gm.forloop(4, 'k', OW) %} -{{ident(5)}}{{gm.fi_vdecl(512, 'temp_c')}} = ((ap_int<512>*)c)[(i*{{OH}}*{{OW}}+j*{{OW}}+k)*{{iters_output}}]; -{{ident(5)}}{{gm.fi_vdecl(8, 'out_layer')}} = temp_c.{{gm.pr_range_idx(8,'ii')}}; -{{ident(5)}}int out_golden = output_buf[(i*{{loop_factor}}+ii)*{{OH}}*{{OW}}+j*{{OW}}+k]; -{{ident(5)}}if (out_layer != out_golden) { -{{ident(5)}}cout << "[" << (i*{{loop_factor}}+ii) << "][" << j << "][" << k << "]"; -{{ident(5)}}cout << "Diff: Layer - " << out_layer << "Golden - " << out_golden << endl; -{{ident(5)}}flag = true; -{{ident(5)}}} -{%- endcall -%} -{{ident(3)}}//cout << endl; -{%- endcall -%} -{{ident(2)}}//cout << endl; -{%- endcall -%} -{%- endcall -%} -{{ident(1)}}assert(!flag); -} -{% endif %} -{% endmacro %} \ No newline at end of file diff --git a/DRED_codegen/templates/net_main.cpp.jinja b/DRED_codegen/templates/net_main.cpp.jinja deleted file mode 100644 index 4cda3bd1f3e3d7e76a63b12c1b4251f220a30d94..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/net_main.cpp.jinja +++ /dev/null @@ -1,245 +0,0 @@ -{% import 'global_macros.inc' as gm %} -{% set ident = gm.ident %} -{# Description: macro for layer function declarations #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{# name -- type name of the layer. e.g. conv,fc #} -{# template -- filename of the template #} -{%- macro fdecl_template(object, name, template) -%} -{% from template import fdecl as lfunc %} -{%- for layer in object %} -{%- if layer.layer_type == name %} -{{lfunc(layer)}} -{%- endif %} -{%- endfor %} -{%- endmacro %} - -{# Description: macro for global variable declarations #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{# name -- type name of the layer. e.g. conv,fc #} -{# template -- filename of the template #} -{%- macro var_decl_template(object, name, template) -%} -{% from template import var_decl as lfunc %} -{%- for layer in object %} -{%- if layer.layer_type == name %} -{{lfunc(layer)}} -{%- endif %} -{%- endfor %} -{%- endmacro %} - -{# Description: macro for layer function implementations #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{# name -- type name of the layer. e.g. conv, fc, etc. #} -{# template -- filename of the template #} -{%- macro fimpl_template(object, name, template) -%} -{% from template import fimpl as lfunc %} -{%- for layer in object %} -{%- if layer.layer_type == name %} -{{lfunc(layer)}} -{%- endif %} -{%- endfor %} -{%- endmacro %} - -{# Description: implemente the validation func for layers #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{# name -- type name of the layer. e.g. conv, fc, etc. #} -{# template -- filename of the template #} -{%- macro fvalid_template(object, name, template) -%} -{% from template import fvalid as lfunc %} -{%- for layer in object %} -{%- if layer.layer_type == name %} -{# to validate FC layer we need info about previous layer #} -{%- if loop.previtem -%} -{%- set prev = loop.previtem -%} -{%- else -%} -{%- set prev = layer -%} {# temporary hack to avoid error #} -{%- endif -%} -{%- if layer.layer_type == 'fc'-%} -{{lfunc(layer, prev, flags)}} -{%- else -%} -{{lfunc(layer, flags)}} -{%- endif -%} -{%- endif %} -{%- endfor %} -{%- endmacro %} - -{# Description: implemente the validation func for layers #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{# name -- type name of the layer. e.g. conv, fc, etc. #} -{# template -- filename of the template #} -{%- macro fvalid_call_template(object, flags, name, template) -%} -{% from template import fvalid_call as lfunc %} -{%- for layer in object %} -{% if layer.layer_type == name %} -{{lfunc(layer)}} -{%- endif %} -{%- endfor %} -{%- endmacro %} - -{# Description: implemente the validation func for layers #} -{# ------ #} -{# Params: #} -{# object -- layer object #} -{# name -- type name of the layer. e.g. conv, fc, etc. #} -{# template -- filename of the template #} -{%- macro fcall_network(network, flags) -%} -{% for layer in network %} -{% if layer.layer_type == 'conv' %} -{% from 'convolution.inc' import fcall as lfunc %} -{{lfunc(layer, flags)}} -{% elif layer.layer_type == 'avg_pool' %} -{% from 'avg_pool.inc' import fcall as lfunc %} -{{lfunc(layer, flags)}} -{% elif layer.layer_type == 'max_pool' %} -{% from 'max_pool.inc' import fcall as lfunc %} -{{lfunc(layer, flags)}} -{% elif layer.layer_type == 'fc' %} -{% from 'fc.inc' import fcall as lfunc %} -{# FC layer need info of the previous layer #} -{{lfunc(layer, loop.previtem, flags)}} -{% elif layer.layer_type == 'batch_norm' %} -{% from 'batch_norm.inc' import fcall as lfunc %} -{{lfunc(layer, flags)}} -{% elif layer.layer_type == 'lrn' %} -{% from 'lrn.inc' import fcall as lfunc %} -{{lfunc(layer, flags)}} -{% endif %} -{% endfor %} -{%- endmacro -%} - -#include "xcelo_fp_int.h" -#include "xcelo_uint.h" -{# C/C++ headers only needed for C verification and debug #} -{% if flags.verify %} -#include <iostream> -#include <fstream> -#include <vector> -#include <sys/resource.h> -#include <cmath> -{# aux functions need ap_int to put packed data into memory #} -#include "ap_int.h" -{# assertion requred for debugging purpose #} -{% if flags.debug %} -#include <assert.h> -{% endif %} -{% endif %} -using namespace std; - -{# intended to be offchip memory interface #} -xcelo_uint<512> a[100352]; -xcelo_uint<512> w[100352]; -xcelo_uint<512> c[100352]; -{# placeholder, reserved for onchip memory interface #} -xcelo_uint<512> a_onchip[1]; -xcelo_uint<512> c_onchip[1]; - -{# declare all layer functions, currently not used #} -//Function declarations -{{fdecl_template(network, 'conv', 'convolution.inc')}} -{{fdecl_template(network, 'avg_pool', 'avg_pool.inc')}} -{{fdecl_template(network, 'max_pool', 'max_pool.inc')}} -{{fdecl_template(network, 'fc', 'fc.inc')}} - -{# declare all global variables for each layer, currently not used #} -//Variable declarations -{{var_decl_template(network, 'conv', 'convolution.inc')}} -{{var_decl_template(network, 'avg_pool', 'avg_pool.inc')}} -{{var_decl_template(network, 'max_pool', 'max_pool.inc')}} -{{var_decl_template(network, 'fc', 'fc.inc')}} - -{# generate implementations for each type of layer #} -//Function implementations -{{fimpl_template(network, 'conv', 'convolution.inc')}} -{{fimpl_template(network, 'avg_pool', 'avg_pool.inc')}} -{{fimpl_template(network, 'max_pool', 'max_pool.inc')}} -{{fimpl_template(network, 'fc', 'fc.inc')}} -{{fimpl_template(network, 'batch_norm', 'batch_norm.inc')}} -{{fimpl_template(network, 'lrn', 'lrn.inc')}} - -{# instantiate auxiliary functions if doing verification #} -{% if flags.verify %} -{# Import all auxiliary functions: Read Weight/Input, Display Array, etc.#} -{% from 'aux_funcs.inc' import fimpl_aux %} -{{fimpl_aux()}} -{# implement validation functions if verifying a layer#} -{% if flags.verify == 'layer' %} -//Function validations -{{fvalid_template(network, 'conv', 'convolution.inc')}} -{{fvalid_template(network, 'avg_pool', 'avg_pool.inc')}} -{{fvalid_template(network, 'max_pool', 'max_pool.inc')}} -{{fvalid_template(network, 'fc', 'fc.inc')}} -{# TODO: add validation functions for LRN and BN layer#} -{% endif %} -{% endif %} - -{######################## MAIN FUNCTION IN C++ ###########################} -int main(int argc, char* argv[]) { -{# if we are verifying a single layer#} -{% if flags.verify == 'layer' %} -{# TODO: wrap the following in a macro #} -{# TODO: add BN and LRN to the following list of layers#} -{{fvalid_call_template(network, flags, 'conv', 'convolution.inc')}} -{{fvalid_call_template(network, flags, 'max_pool', 'max_pool.inc')}} -{{fvalid_call_template(network, flags, 'fc', 'fc.inc')}} -{% if flags.debug %} -{{ident(1)}}cout << "per layer test passed" << endl; -{% endif %} -{# if we are verifying a network #} -{% elif flags.verify == 'network' %} -{# get shape of input tensor to the network from the first layer #} -{%set IH = network[0].dshape[0] %} -{%set IW = network[0].dshape[1] %} -{%set IC = network[0].dshape[2] %} -{%set pad = (network[0].wshape[0]-1)/2 %} -{# NOTE: int8_t in the following line can be changed if input datatype is larger than 8 #} -{{ident(1)}}read_input<int8_t>(argv[1], {{IH}}, {{IW}}, {{IC}}, {{pad}}, (ap_int<512>*)a); - -{# call all layer functions#} -{{fcall_network(network, flags)}} - -{{ident(1)}}//{{flags.test_method}} {# show test method in as a comment in C code#} -{% if flags.test_method == 'groundtruth' %} -{# get parameters on logits from the #} -{# ATTENTION: the following two variables should be changed according to the network #} -{% set logit_bw = 8 %} -{% set num_classes = 10 %} - -{{ident(1)}}ap_int<{{logit_bw}}> result = 0; -{{ident(1)}}int classification; -{% call gm.forloop(1, 'i', num_classes) %} -{{ident(2)}}ap_int<512> temp_a = ((ap_int<512>*)a)[0]; -{{ident(2)}}ap_int<{{logit_bw}}> t = temp_a.range({{logit_bw}}*i+{{logit_bw-1}},{{logit_bw}}*i); -{% if flags.debug %}{# display logic for each class if in debugging mode #} -{{ident(2)}}cout << t << ","; -{% endif %} -{# perform simple argmax #} -{{ident(2)}}if (t > result){ -{{ident(3)}}result = t; -{{ident(3)}}classification = i; -{{ident(2)}}} -{% endcall %} -{% if flags.debug %}{# display result of classification in debugging mode #} -{{ident(1)}}//simple argmax, and show result of classification -{{ident(1)}}cout << "Classification result: " << classification << endl; -{% endif %} -{# if verifying network in with golden input & output#} -{% elif flags.test_method == 'golden' %} -{{ident(1)}}//compare with golden output -{{ident(1)}}//TODO TODO TODO -{% endif %} -{# else in clean mode #} -{% else %} -{# call all layer functions#} -//In clean mode -{{fcall_network(network, flags)}} -{% endif %} -} \ No newline at end of file diff --git a/DRED_codegen/templates/plot.ipynb b/DRED_codegen/templates/plot.ipynb deleted file mode 100644 index caea6169cc80c1a4649e9582d18401babc8e807a..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/plot.ipynb +++ /dev/null @@ -1,34 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/DRED_codegen/templates/test.jinja b/DRED_codegen/templates/test.jinja deleted file mode 100644 index 1ac6b2da0284f255f51b2448ab5759afe594188b..0000000000000000000000000000000000000000 --- a/DRED_codegen/templates/test.jinja +++ /dev/null @@ -1 +0,0 @@ -Hello {{world}}! \ No newline at end of file diff --git a/DRED_codegen/test logs b/DRED_codegen/test logs deleted file mode 100644 index 4861d209da31e46eb9b526261629fb91ad86ba02..0000000000000000000000000000000000000000 --- a/DRED_codegen/test logs +++ /dev/null @@ -1,7 +0,0 @@ -test logs - -state | network | reward | result -12 | 3 layer | x1 -1000 -1500 | agent stays at 12 -16 | 3 layer | x10 -1000 -1500 |agent stays at 12 -16 | 3 layer | x10 -5 -10 | agent stays at 12 -16 | 4 layer 128-256-128 |x100 -1 -1 | not known \ No newline at end of file diff --git a/DRED_codegen/Plot.ipynb b/Plot.ipynb similarity index 100% rename from DRED_codegen/Plot.ipynb rename to Plot.ipynb diff --git a/DRED_codegen/codegen.py b/codegen.py similarity index 100% rename from DRED_codegen/codegen.py rename to codegen.py diff --git a/DRED_codegen/conv template base.cpp b/conv template base.cpp similarity index 100% rename from DRED_codegen/conv template base.cpp rename to conv template base.cpp diff --git a/DRED_codegen/exhaustive_search.py b/exhaustive_search.py similarity index 100% rename from DRED_codegen/exhaustive_search.py rename to exhaustive_search.py diff --git a/DRED_codegen/graph_xc7a200tfbg676-2_10_10.npy b/graph_xc7a200tfbg676-2_10_10.npy similarity index 100% rename from DRED_codegen/graph_xc7a200tfbg676-2_10_10.npy rename to graph_xc7a200tfbg676-2_10_10.npy diff --git a/DRED_codegen/graph_xc7a200tfbg676-2_11_11.npy b/graph_xc7a200tfbg676-2_11_11.npy similarity index 100% rename from DRED_codegen/graph_xc7a200tfbg676-2_11_11.npy rename to graph_xc7a200tfbg676-2_11_11.npy diff --git a/DRED_codegen/graph_xc7a200tfbg676-2_8_8.npy b/graph_xc7a200tfbg676-2_8_8.npy similarity index 100% rename from DRED_codegen/graph_xc7a200tfbg676-2_8_8.npy rename to graph_xc7a200tfbg676-2_8_8.npy diff --git a/DRED_codegen/graph_xc7a200tfbg676-2_9_9.npy b/graph_xc7a200tfbg676-2_9_9.npy similarity index 100% rename from DRED_codegen/graph_xc7a200tfbg676-2_9_9.npy rename to graph_xc7a200tfbg676-2_9_9.npy diff --git a/DRED_codegen/graph_xc7vx485tffg1761-2_10_10.npy b/graph_xc7vx485tffg1761-2_10_10.npy similarity index 100% rename from DRED_codegen/graph_xc7vx485tffg1761-2_10_10.npy rename to graph_xc7vx485tffg1761-2_10_10.npy diff --git a/DRED_codegen/graph_xc7z020clg484-1_10_10.npy b/graph_xc7z020clg484-1_10_10.npy similarity index 100% rename from DRED_codegen/graph_xc7z020clg484-1_10_10.npy rename to graph_xc7z020clg484-1_10_10.npy diff --git a/DRED_codegen/graph_xc7z020clg484-1_10_10_search.npy b/graph_xc7z020clg484-1_10_10_search.npy similarity index 100% rename from DRED_codegen/graph_xc7z020clg484-1_10_10_search.npy rename to graph_xc7z020clg484-1_10_10_search.npy diff --git a/DRED_codegen/graph_xc7z020clg484-1_8_8.npy b/graph_xc7z020clg484-1_8_8.npy similarity index 100% rename from DRED_codegen/graph_xc7z020clg484-1_8_8.npy rename to graph_xc7z020clg484-1_8_8.npy diff --git a/DRED_codegen/graph_xcvu190-flgc2104-2-e_10_10.npy b/graph_xcvu190-flgc2104-2-e_10_10.npy similarity index 100% rename from DRED_codegen/graph_xcvu190-flgc2104-2-e_10_10.npy rename to graph_xcvu190-flgc2104-2-e_10_10.npy diff --git a/DRED_codegen/graph_xcvu190-flgc2104-2-e_10_10_search.npy b/graph_xcvu190-flgc2104-2-e_10_10_search.npy similarity index 100% rename from DRED_codegen/graph_xcvu190-flgc2104-2-e_10_10_search.npy rename to graph_xcvu190-flgc2104-2-e_10_10_search.npy diff --git a/DRED_codegen/graph_xcvu9p-flga2104-2L-e-es1_10_10.npy b/graph_xcvu9p-flga2104-2L-e-es1_10_10.npy similarity index 100% rename from DRED_codegen/graph_xcvu9p-flga2104-2L-e-es1_10_10.npy rename to graph_xcvu9p-flga2104-2L-e-es1_10_10.npy diff --git a/DRED_codegen/history_trace_back.npy b/history_trace_back.npy similarity index 100% rename from DRED_codegen/history_trace_back.npy rename to history_trace_back.npy diff --git a/DRED_codegen/history_trace_back3.npy b/history_trace_back3.npy similarity index 100% rename from DRED_codegen/history_trace_back3.npy rename to history_trace_back3.npy diff --git a/DRED_codegen/history_trace_back_v2.npy b/history_trace_back_v2.npy similarity index 100% rename from DRED_codegen/history_trace_back_v2.npy rename to history_trace_back_v2.npy diff --git a/DRED_codegen/hls_proj/.apc/autopilot.apfmapping b/hls_proj/.apc/autopilot.apfmapping similarity index 100% rename from DRED_codegen/hls_proj/.apc/autopilot.apfmapping rename to hls_proj/.apc/autopilot.apfmapping diff --git a/DRED_codegen/hls_proj/.cproject b/hls_proj/.cproject similarity index 100% rename from DRED_codegen/hls_proj/.cproject rename to hls_proj/.cproject diff --git a/DRED_codegen/hls_proj/.project b/hls_proj/.project similarity index 100% rename from DRED_codegen/hls_proj/.project rename to hls_proj/.project diff --git a/DRED_codegen/hls_proj/.settings/hls_proj.Debug.launch b/hls_proj/.settings/hls_proj.Debug.launch similarity index 100% rename from DRED_codegen/hls_proj/.settings/hls_proj.Debug.launch rename to hls_proj/.settings/hls_proj.Debug.launch diff --git a/DRED_codegen/hls_proj/.settings/hls_proj.Release.launch b/hls_proj/.settings/hls_proj.Release.launch similarity index 100% rename from DRED_codegen/hls_proj/.settings/hls_proj.Release.launch rename to hls_proj/.settings/hls_proj.Release.launch diff --git a/DRED_codegen/hls_proj/.vivado_hls_log_all.xml b/hls_proj/.vivado_hls_log_all.xml similarity index 100% rename from DRED_codegen/hls_proj/.vivado_hls_log_all.xml rename to hls_proj/.vivado_hls_log_all.xml diff --git a/DRED_codegen/hls_proj/gened.cpp b/hls_proj/gened.cpp similarity index 100% rename from DRED_codegen/hls_proj/gened.cpp rename to hls_proj/gened.cpp diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.export.ll b/hls_proj/solution1/.autopilot/db/a.export.ll similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.export.ll rename to hls_proj/solution1/.autopilot/db/a.export.ll diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g b/hls_proj/solution1/.autopilot/db/a.g similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g rename to hls_proj/solution1/.autopilot/db/a.g diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.0 b/hls_proj/solution1/.autopilot/db/a.g.0 similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.0 rename to hls_proj/solution1/.autopilot/db/a.g.0 diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.0.bc b/hls_proj/solution1/.autopilot/db/a.g.0.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.0.bc rename to hls_proj/solution1/.autopilot/db/a.g.0.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.1.bc b/hls_proj/solution1/.autopilot/db/a.g.1.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.1.bc rename to hls_proj/solution1/.autopilot/db/a.g.1.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.1.ll b/hls_proj/solution1/.autopilot/db/a.g.1.ll similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.1.ll rename to hls_proj/solution1/.autopilot/db/a.g.1.ll diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.2.bc b/hls_proj/solution1/.autopilot/db/a.g.2.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.2.bc rename to hls_proj/solution1/.autopilot/db/a.g.2.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.2.prechk.bc b/hls_proj/solution1/.autopilot/db/a.g.2.prechk.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.2.prechk.bc rename to hls_proj/solution1/.autopilot/db/a.g.2.prechk.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.bc b/hls_proj/solution1/.autopilot/db/a.g.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.bc rename to hls_proj/solution1/.autopilot/db/a.g.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.ll b/hls_proj/solution1/.autopilot/db/a.g.ll similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.g.ll rename to hls_proj/solution1/.autopilot/db/a.g.ll diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o b/hls_proj/solution1/.autopilot/db/a.o similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o rename to hls_proj/solution1/.autopilot/db/a.o diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.bc b/hls_proj/solution1/.autopilot/db/a.o.1.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.bc rename to hls_proj/solution1/.autopilot/db/a.o.1.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.ll b/hls_proj/solution1/.autopilot/db/a.o.1.ll similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.ll rename to hls_proj/solution1/.autopilot/db/a.o.1.ll diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.tmp.bc b/hls_proj/solution1/.autopilot/db/a.o.1.tmp.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.1.tmp.bc rename to hls_proj/solution1/.autopilot/db/a.o.1.tmp.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.2.bc b/hls_proj/solution1/.autopilot/db/a.o.2.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.2.bc rename to hls_proj/solution1/.autopilot/db/a.o.2.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.2.ll b/hls_proj/solution1/.autopilot/db/a.o.2.ll similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.2.ll rename to hls_proj/solution1/.autopilot/db/a.o.2.ll diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.3.bc b/hls_proj/solution1/.autopilot/db/a.o.3.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.3.bc rename to hls_proj/solution1/.autopilot/db/a.o.3.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.3.ll b/hls_proj/solution1/.autopilot/db/a.o.3.ll similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.3.ll rename to hls_proj/solution1/.autopilot/db/a.o.3.ll diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.bc b/hls_proj/solution1/.autopilot/db/a.o.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.bc rename to hls_proj/solution1/.autopilot/db/a.o.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.ll b/hls_proj/solution1/.autopilot/db/a.o.ll similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.o.ll rename to hls_proj/solution1/.autopilot/db/a.o.ll diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.pp.0.bc b/hls_proj/solution1/.autopilot/db/a.pp.0.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.pp.0.bc rename to hls_proj/solution1/.autopilot/db/a.pp.0.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/a.pp.bc b/hls_proj/solution1/.autopilot/db/a.pp.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/a.pp.bc rename to hls_proj/solution1/.autopilot/db/a.pp.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/autopilot.flow.log b/hls_proj/solution1/.autopilot/db/autopilot.flow.log similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/autopilot.flow.log rename to hls_proj/solution1/.autopilot/db/autopilot.flow.log diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/autopilot.rtl.models.txt b/hls_proj/solution1/.autopilot/db/autopilot.rtl.models.txt similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/autopilot.rtl.models.txt rename to hls_proj/solution1/.autopilot/db/autopilot.rtl.models.txt diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/bugpoint.sh b/hls_proj/solution1/.autopilot/db/bugpoint.sh similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/bugpoint.sh rename to hls_proj/solution1/.autopilot/db/bugpoint.sh diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.bc b/hls_proj/solution1/.autopilot/db/gened.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.bc rename to hls_proj/solution1/.autopilot/db/gened.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.g.bc b/hls_proj/solution1/.autopilot/db/gened.g.bc similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.g.bc rename to hls_proj/solution1/.autopilot/db/gened.g.bc diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp b/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp rename to hls_proj/solution1/.autopilot/db/gened.pp.0.cpp diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-cdt.cpp b/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-cdt.cpp similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-cdt.cpp rename to hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-cdt.cpp diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp b/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp rename to hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp.CXX b/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp.CXX similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp.CXX rename to hls_proj/solution1/.autopilot/db/gened.pp.0.cpp.ap-line.cpp.CXX diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.00.o b/hls_proj/solution1/.autopilot/db/gened.pp.00.o similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pp.00.o rename to hls_proj/solution1/.autopilot/db/gened.pp.00.o diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp b/hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp rename to hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp.ap-line.CXX b/hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp.ap-line.CXX similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp.ap-line.CXX rename to hls_proj/solution1/.autopilot/db/gened.pragma.0.cpp.ap-line.CXX diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.1.cpp b/hls_proj/solution1/.autopilot/db/gened.pragma.1.cpp similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.1.cpp rename to hls_proj/solution1/.autopilot/db/gened.pragma.1.cpp diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.2.cpp b/hls_proj/solution1/.autopilot/db/gened.pragma.2.cpp similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/gened.pragma.2.cpp rename to hls_proj/solution1/.autopilot/db/gened.pragma.2.cpp diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/global.setting.tcl b/hls_proj/solution1/.autopilot/db/global.setting.tcl similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/global.setting.tcl rename to hls_proj/solution1/.autopilot/db/global.setting.tcl diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.cpp b/hls_proj/solution1/.autopilot/db/hls_design_meta.cpp similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.cpp rename to hls_proj/solution1/.autopilot/db/hls_design_meta.cpp diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.h b/hls_proj/solution1/.autopilot/db/hls_design_meta.h similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.h rename to hls_proj/solution1/.autopilot/db/hls_design_meta.h diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.tcl b/hls_proj/solution1/.autopilot/db/hls_design_meta.tcl similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/hls_design_meta.tcl rename to hls_proj/solution1/.autopilot/db/hls_design_meta.tcl diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.0.directive b/hls_proj/solution1/.autopilot/db/pragma.0.directive similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.0.directive rename to hls_proj/solution1/.autopilot/db/pragma.0.directive diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.1.directive b/hls_proj/solution1/.autopilot/db/pragma.1.directive similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.1.directive rename to hls_proj/solution1/.autopilot/db/pragma.1.directive diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.status.tcl b/hls_proj/solution1/.autopilot/db/pragma.status.tcl similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/pragma.status.tcl rename to hls_proj/solution1/.autopilot/db/pragma.status.tcl diff --git a/DRED_codegen/hls_proj/solution1/.autopilot/db/ve_warning.tcl b/hls_proj/solution1/.autopilot/db/ve_warning.tcl similarity index 100% rename from DRED_codegen/hls_proj/solution1/.autopilot/db/ve_warning.tcl rename to hls_proj/solution1/.autopilot/db/ve_warning.tcl diff --git a/DRED_codegen/hls_proj/solution1/solution1.aps b/hls_proj/solution1/solution1.aps similarity index 100% rename from DRED_codegen/hls_proj/solution1/solution1.aps rename to hls_proj/solution1/solution1.aps diff --git a/DRED_codegen/hls_proj/solution1/solution1.directive b/hls_proj/solution1/solution1.directive similarity index 100% rename from DRED_codegen/hls_proj/solution1/solution1.directive rename to hls_proj/solution1/solution1.directive diff --git a/DRED_codegen/hls_proj/solution1/solution1.log b/hls_proj/solution1/solution1.log similarity index 100% rename from DRED_codegen/hls_proj/solution1/solution1.log rename to hls_proj/solution1/solution1.log diff --git a/DRED_codegen/hls_proj/vivado_hls.app b/hls_proj/vivado_hls.app similarity index 100% rename from DRED_codegen/hls_proj/vivado_hls.app rename to hls_proj/vivado_hls.app diff --git a/DRED_codegen/inference.py b/inference.py similarity index 100% rename from DRED_codegen/inference.py rename to inference.py diff --git a/DRED_codegen/inference_escape.py b/inference_escape.py similarity index 100% rename from DRED_codegen/inference_escape.py rename to inference_escape.py diff --git a/nmtdecoder/ce.cc b/nmtdecoder/ce.cc deleted file mode 100644 index 66bd146316e4a96325aed845c63f8425011f19db..0000000000000000000000000000000000000000 --- a/nmtdecoder/ce.cc +++ /dev/null @@ -1,3483 +0,0 @@ -#include "ce.h" -float compute_engine8( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8){ -#pragma HLS PIPELINE - float m1, m2, m3, m4, - m5, m6, m7, m8; - - float add1, add2, add3, add4, - add5, add6; - m1 = a1*b1; - m2 = a2*b2; - m3 = a3*b3; - m4 = a4*b4; - m5 = a5*b5; - m6 = a6*b6; - m7 = a7*b7; - m8 = a8*b8; - add1 = m1+m2; - add2 = m3+m4; - add3 = m5+m6; - add4 = m7+m8; - add5 = add4+add3; - add6 = add2+add1; - return add5+add6; -} -float compute_engine16( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16){ -#pragma HLS PIPELINE - float m1, m2, m3, m4, - m5, m6, m7, m8, m9, - m10, m11, m12, m13, m14, - m15, m16; - - float add1, add2, add3, add4, - add5, add6, add7, add8, add9, - add10, add11, add12, add13, add14; - m1 = a1*b1; - m2 = a2*b2; - m3 = a3*b3; - m4 = a4*b4; - m5 = a5*b5; - m6 = a6*b6; - m7 = a7*b7; - m8 = a8*b8; - m9 = a9*b9; - m10 = a10*b10; - m11 = a11*b11; - m12 = a12*b12; - m13 = a13*b13; - m14 = a14*b14; - m15 = a15*b15; - m16 = a16*b16; - add1 = m1+m2; - add2 = m3+m4; - add3 = m5+m6; - add4 = m7+m8; - add5 = m9+m10; - add6 = m11+m12; - add7 = m13+m14; - add8 = m15+m16; - add9 = add8+add7; - add10 = add6+add5; - add11 = add4+add3; - add12 = add2+add1; - add13 = add12+add11; - add14 = add10+add9; - return add13+add14; -} -float compute_engine32( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32){ -#pragma HLS PIPELINE - float m1, m2, m3, m4, - m5, m6, m7, m8, m9, - m10, m11, m12, m13, m14, - m15, m16, m17, m18, m19, - m20, m21, m22, m23, m24, - m25, m26, m27, m28, m29, - m30, m31, m32; - - float add1, add2, add3, add4, - add5, add6, add7, add8, add9, - add10, add11, add12, add13, add14, - add15, add16, add17, add18, add19, - add20, add21, add22, add23, add24, - add25, add26, add27, add28, add29, - add30; - m1 = a1*b1; - m2 = a2*b2; - m3 = a3*b3; - m4 = a4*b4; - m5 = a5*b5; - m6 = a6*b6; - m7 = a7*b7; - m8 = a8*b8; - m9 = a9*b9; - m10 = a10*b10; - m11 = a11*b11; - m12 = a12*b12; - m13 = a13*b13; - m14 = a14*b14; - m15 = a15*b15; - m16 = a16*b16; - m17 = a17*b17; - m18 = a18*b18; - m19 = a19*b19; - m20 = a20*b20; - m21 = a21*b21; - m22 = a22*b22; - m23 = a23*b23; - m24 = a24*b24; - m25 = a25*b25; - m26 = a26*b26; - m27 = a27*b27; - m28 = a28*b28; - m29 = a29*b29; - m30 = a30*b30; - m31 = a31*b31; - m32 = a32*b32; - add1 = m1+m2; - add2 = m3+m4; - add3 = m5+m6; - add4 = m7+m8; - add5 = m9+m10; - add6 = m11+m12; - add7 = m13+m14; - add8 = m15+m16; - add9 = m17+m18; - add10 = m19+m20; - add11 = m21+m22; - add12 = m23+m24; - add13 = m25+m26; - add14 = m27+m28; - add15 = m29+m30; - add16 = m31+m32; - add17 = add16+add15; - add18 = add14+add13; - add19 = add12+add11; - add20 = add10+add9; - add21 = add8+add7; - add22 = add6+add5; - add23 = add4+add3; - add24 = add2+add1; - add25 = add24+add23; - add26 = add22+add21; - add27 = add20+add19; - add28 = add18+add17; - add29 = add28+add27; - add30 = add26+add25; - return add29+add30; -} -float compute_engine64( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32, -float a33, float b33, -float a34, float b34, -float a35, float b35, -float a36, float b36, -float a37, float b37, -float a38, float b38, -float a39, float b39, -float a40, float b40, -float a41, float b41, -float a42, float b42, -float a43, float b43, -float a44, float b44, -float a45, float b45, -float a46, float b46, -float a47, float b47, -float a48, float b48, -float a49, float b49, -float a50, float b50, -float a51, float b51, -float a52, float b52, -float a53, float b53, -float a54, float b54, -float a55, float b55, -float a56, float b56, -float a57, float b57, -float a58, float b58, -float a59, float b59, -float a60, float b60, -float a61, float b61, -float a62, float b62, -float a63, float b63, -float a64, float b64){ -#pragma HLS inline off -#pragma HLS PIPELINE - float m1, m2, m3, m4, - m5, m6, m7, m8, m9, - m10, m11, m12, m13, m14, - m15, m16, m17, m18, m19, - m20, m21, m22, m23, m24, - m25, m26, m27, m28, m29, - m30, m31, m32, m33, m34, - m35, m36, m37, m38, m39, - m40, m41, m42, m43, m44, - m45, m46, m47, m48, m49, - m50, m51, m52, m53, m54, - m55, m56, m57, m58, m59, - m60, m61, m62, m63, m64; - - float add1, add2, add3, add4, - add5, add6, add7, add8, add9, - add10, add11, add12, add13, add14, - add15, add16, add17, add18, add19, - add20, add21, add22, add23, add24, - add25, add26, add27, add28, add29, - add30, add31, add32, add33, add34, - add35, add36, add37, add38, add39, - add40, add41, add42, add43, add44, - add45, add46, add47, add48, add49, - add50, add51, add52, add53, add54, - add55, add56, add57, add58, add59, - add60, add61, add62; - m1 = a1*b1; - m2 = a2*b2; - m3 = a3*b3; - m4 = a4*b4; - m5 = a5*b5; - m6 = a6*b6; - m7 = a7*b7; - m8 = a8*b8; - m9 = a9*b9; - m10 = a10*b10; - m11 = a11*b11; - m12 = a12*b12; - m13 = a13*b13; - m14 = a14*b14; - m15 = a15*b15; - m16 = a16*b16; - m17 = a17*b17; - m18 = a18*b18; - m19 = a19*b19; - m20 = a20*b20; - m21 = a21*b21; - m22 = a22*b22; - m23 = a23*b23; - m24 = a24*b24; - m25 = a25*b25; - m26 = a26*b26; - m27 = a27*b27; - m28 = a28*b28; - m29 = a29*b29; - m30 = a30*b30; - m31 = a31*b31; - m32 = a32*b32; - m33 = a33*b33; - m34 = a34*b34; - m35 = a35*b35; - m36 = a36*b36; - m37 = a37*b37; - m38 = a38*b38; - m39 = a39*b39; - m40 = a40*b40; - m41 = a41*b41; - m42 = a42*b42; - m43 = a43*b43; - m44 = a44*b44; - m45 = a45*b45; - m46 = a46*b46; - m47 = a47*b47; - m48 = a48*b48; - m49 = a49*b49; - m50 = a50*b50; - m51 = a51*b51; - m52 = a52*b52; - m53 = a53*b53; - m54 = a54*b54; - m55 = a55*b55; - m56 = a56*b56; - m57 = a57*b57; - m58 = a58*b58; - m59 = a59*b59; - m60 = a60*b60; - m61 = a61*b61; - m62 = a62*b62; - m63 = a63*b63; - m64 = a64*b64; - add1 = m1+m2; - add2 = m3+m4; - add3 = m5+m6; - add4 = m7+m8; - add5 = m9+m10; - add6 = m11+m12; - add7 = m13+m14; - add8 = m15+m16; - add9 = m17+m18; - add10 = m19+m20; - add11 = m21+m22; - add12 = m23+m24; - add13 = m25+m26; - add14 = m27+m28; - add15 = m29+m30; - add16 = m31+m32; - add17 = m33+m34; - add18 = m35+m36; - add19 = m37+m38; - add20 = m39+m40; - add21 = m41+m42; - add22 = m43+m44; - add23 = m45+m46; - add24 = m47+m48; - add25 = m49+m50; - add26 = m51+m52; - add27 = m53+m54; - add28 = m55+m56; - add29 = m57+m58; - add30 = m59+m60; - add31 = m61+m62; - add32 = m63+m64; - add33 = add32+add31; - add34 = add30+add29; - add35 = add28+add27; - add36 = add26+add25; - add37 = add24+add23; - add38 = add22+add21; - add39 = add20+add19; - add40 = add18+add17; - add41 = add16+add15; - add42 = add14+add13; - add43 = add12+add11; - add44 = add10+add9; - add45 = add8+add7; - add46 = add6+add5; - add47 = add4+add3; - add48 = add2+add1; - add49 = add48+add47; - add50 = add46+add45; - add51 = add44+add43; - add52 = add42+add41; - add53 = add40+add39; - add54 = add38+add37; - add55 = add36+add35; - add56 = add34+add33; - add57 = add56+add55; - add58 = add54+add53; - add59 = add52+add51; - add60 = add50+add49; - add61 = add60+add59; - add62 = add58+add57; - return add61+add62; -} -float compute_engine128( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32, -float a33, float b33, -float a34, float b34, -float a35, float b35, -float a36, float b36, -float a37, float b37, -float a38, float b38, -float a39, float b39, -float a40, float b40, -float a41, float b41, -float a42, float b42, -float a43, float b43, -float a44, float b44, -float a45, float b45, -float a46, float b46, -float a47, float b47, -float a48, float b48, -float a49, float b49, -float a50, float b50, -float a51, float b51, -float a52, float b52, -float a53, float b53, -float a54, float b54, -float a55, float b55, -float a56, float b56, -float a57, float b57, -float a58, float b58, -float a59, float b59, -float a60, float b60, -float a61, float b61, -float a62, float b62, -float a63, float b63, -float a64, float b64, -float a65, float b65, -float a66, float b66, -float a67, float b67, -float a68, float b68, -float a69, float b69, -float a70, float b70, -float a71, float b71, -float a72, float b72, -float a73, float b73, -float a74, float b74, -float a75, float b75, -float a76, float b76, -float a77, float b77, -float a78, float b78, -float a79, float b79, -float a80, float b80, -float a81, float b81, -float a82, float b82, -float a83, float b83, -float a84, float b84, -float a85, float b85, -float a86, float b86, -float a87, float b87, -float a88, float b88, -float a89, float b89, -float a90, float b90, -float a91, float b91, -float a92, float b92, -float a93, float b93, -float a94, float b94, -float a95, float b95, -float a96, float b96, -float a97, float b97, -float a98, float b98, -float a99, float b99, -float a100, float b100, -float a101, float b101, -float a102, float b102, -float a103, float b103, -float a104, float b104, -float a105, float b105, -float a106, float b106, -float a107, float b107, -float a108, float b108, -float a109, float b109, -float a110, float b110, -float a111, float b111, -float a112, float b112, -float a113, float b113, -float a114, float b114, -float a115, float b115, -float a116, float b116, -float a117, float b117, -float a118, float b118, -float a119, float b119, -float a120, float b120, -float a121, float b121, -float a122, float b122, -float a123, float b123, -float a124, float b124, -float a125, float b125, -float a126, float b126, -float a127, float b127, -float a128, float b128){ -#pragma HLS PIPELINE - float m1, m2, m3, m4, - m5, m6, m7, m8, m9, - m10, m11, m12, m13, m14, - m15, m16, m17, m18, m19, - m20, m21, m22, m23, m24, - m25, m26, m27, m28, m29, - m30, m31, m32, m33, m34, - m35, m36, m37, m38, m39, - m40, m41, m42, m43, m44, - m45, m46, m47, m48, m49, - m50, m51, m52, m53, m54, - m55, m56, m57, m58, m59, - m60, m61, m62, m63, m64, - m65, m66, m67, m68, m69, - m70, m71, m72, m73, m74, - m75, m76, m77, m78, m79, - m80, m81, m82, m83, m84, - m85, m86, m87, m88, m89, - m90, m91, m92, m93, m94, - m95, m96, m97, m98, m99, - m100, m101, m102, m103, m104, - m105, m106, m107, m108, m109, - m110, m111, m112, m113, m114, - m115, m116, m117, m118, m119, - m120, m121, m122, m123, m124, - m125, m126, m127, m128; - - float add1, add2, add3, add4, - add5, add6, add7, add8, add9, - add10, add11, add12, add13, add14, - add15, add16, add17, add18, add19, - add20, add21, add22, add23, add24, - add25, add26, add27, add28, add29, - add30, add31, add32, add33, add34, - add35, add36, add37, add38, add39, - add40, add41, add42, add43, add44, - add45, add46, add47, add48, add49, - add50, add51, add52, add53, add54, - add55, add56, add57, add58, add59, - add60, add61, add62, add63, add64, - add65, add66, add67, add68, add69, - add70, add71, add72, add73, add74, - add75, add76, add77, add78, add79, - add80, add81, add82, add83, add84, - add85, add86, add87, add88, add89, - add90, add91, add92, add93, add94, - add95, add96, add97, add98, add99, - add100, add101, add102, add103, add104, - add105, add106, add107, add108, add109, - add110, add111, add112, add113, add114, - add115, add116, add117, add118, add119, - add120, add121, add122, add123, add124, - add125, add126; - m1 = a1*b1; - m2 = a2*b2; - m3 = a3*b3; - m4 = a4*b4; - m5 = a5*b5; - m6 = a6*b6; - m7 = a7*b7; - m8 = a8*b8; - m9 = a9*b9; - m10 = a10*b10; - m11 = a11*b11; - m12 = a12*b12; - m13 = a13*b13; - m14 = a14*b14; - m15 = a15*b15; - m16 = a16*b16; - m17 = a17*b17; - m18 = a18*b18; - m19 = a19*b19; - m20 = a20*b20; - m21 = a21*b21; - m22 = a22*b22; - m23 = a23*b23; - m24 = a24*b24; - m25 = a25*b25; - m26 = a26*b26; - m27 = a27*b27; - m28 = a28*b28; - m29 = a29*b29; - m30 = a30*b30; - m31 = a31*b31; - m32 = a32*b32; - m33 = a33*b33; - m34 = a34*b34; - m35 = a35*b35; - m36 = a36*b36; - m37 = a37*b37; - m38 = a38*b38; - m39 = a39*b39; - m40 = a40*b40; - m41 = a41*b41; - m42 = a42*b42; - m43 = a43*b43; - m44 = a44*b44; - m45 = a45*b45; - m46 = a46*b46; - m47 = a47*b47; - m48 = a48*b48; - m49 = a49*b49; - m50 = a50*b50; - m51 = a51*b51; - m52 = a52*b52; - m53 = a53*b53; - m54 = a54*b54; - m55 = a55*b55; - m56 = a56*b56; - m57 = a57*b57; - m58 = a58*b58; - m59 = a59*b59; - m60 = a60*b60; - m61 = a61*b61; - m62 = a62*b62; - m63 = a63*b63; - m64 = a64*b64; - m65 = a65*b65; - m66 = a66*b66; - m67 = a67*b67; - m68 = a68*b68; - m69 = a69*b69; - m70 = a70*b70; - m71 = a71*b71; - m72 = a72*b72; - m73 = a73*b73; - m74 = a74*b74; - m75 = a75*b75; - m76 = a76*b76; - m77 = a77*b77; - m78 = a78*b78; - m79 = a79*b79; - m80 = a80*b80; - m81 = a81*b81; - m82 = a82*b82; - m83 = a83*b83; - m84 = a84*b84; - m85 = a85*b85; - m86 = a86*b86; - m87 = a87*b87; - m88 = a88*b88; - m89 = a89*b89; - m90 = a90*b90; - m91 = a91*b91; - m92 = a92*b92; - m93 = a93*b93; - m94 = a94*b94; - m95 = a95*b95; - m96 = a96*b96; - m97 = a97*b97; - m98 = a98*b98; - m99 = a99*b99; - m100 = a100*b100; - m101 = a101*b101; - m102 = a102*b102; - m103 = a103*b103; - m104 = a104*b104; - m105 = a105*b105; - m106 = a106*b106; - m107 = a107*b107; - m108 = a108*b108; - m109 = a109*b109; - m110 = a110*b110; - m111 = a111*b111; - m112 = a112*b112; - m113 = a113*b113; - m114 = a114*b114; - m115 = a115*b115; - m116 = a116*b116; - m117 = a117*b117; - m118 = a118*b118; - m119 = a119*b119; - m120 = a120*b120; - m121 = a121*b121; - m122 = a122*b122; - m123 = a123*b123; - m124 = a124*b124; - m125 = a125*b125; - m126 = a126*b126; - m127 = a127*b127; - m128 = a128*b128; - add1 = m1+m2; - add2 = m3+m4; - add3 = m5+m6; - add4 = m7+m8; - add5 = m9+m10; - add6 = m11+m12; - add7 = m13+m14; - add8 = m15+m16; - add9 = m17+m18; - add10 = m19+m20; - add11 = m21+m22; - add12 = m23+m24; - add13 = m25+m26; - add14 = m27+m28; - add15 = m29+m30; - add16 = m31+m32; - add17 = m33+m34; - add18 = m35+m36; - add19 = m37+m38; - add20 = m39+m40; - add21 = m41+m42; - add22 = m43+m44; - add23 = m45+m46; - add24 = m47+m48; - add25 = m49+m50; - add26 = m51+m52; - add27 = m53+m54; - add28 = m55+m56; - add29 = m57+m58; - add30 = m59+m60; - add31 = m61+m62; - add32 = m63+m64; - add33 = m65+m66; - add34 = m67+m68; - add35 = m69+m70; - add36 = m71+m72; - add37 = m73+m74; - add38 = m75+m76; - add39 = m77+m78; - add40 = m79+m80; - add41 = m81+m82; - add42 = m83+m84; - add43 = m85+m86; - add44 = m87+m88; - add45 = m89+m90; - add46 = m91+m92; - add47 = m93+m94; - add48 = m95+m96; - add49 = m97+m98; - add50 = m99+m100; - add51 = m101+m102; - add52 = m103+m104; - add53 = m105+m106; - add54 = m107+m108; - add55 = m109+m110; - add56 = m111+m112; - add57 = m113+m114; - add58 = m115+m116; - add59 = m117+m118; - add60 = m119+m120; - add61 = m121+m122; - add62 = m123+m124; - add63 = m125+m126; - add64 = m127+m128; - add65 = add64+add63; - add66 = add62+add61; - add67 = add60+add59; - add68 = add58+add57; - add69 = add56+add55; - add70 = add54+add53; - add71 = add52+add51; - add72 = add50+add49; - add73 = add48+add47; - add74 = add46+add45; - add75 = add44+add43; - add76 = add42+add41; - add77 = add40+add39; - add78 = add38+add37; - add79 = add36+add35; - add80 = add34+add33; - add81 = add32+add31; - add82 = add30+add29; - add83 = add28+add27; - add84 = add26+add25; - add85 = add24+add23; - add86 = add22+add21; - add87 = add20+add19; - add88 = add18+add17; - add89 = add16+add15; - add90 = add14+add13; - add91 = add12+add11; - add92 = add10+add9; - add93 = add8+add7; - add94 = add6+add5; - add95 = add4+add3; - add96 = add2+add1; - add97 = add96+add95; - add98 = add94+add93; - add99 = add92+add91; - add100 = add90+add89; - add101 = add88+add87; - add102 = add86+add85; - add103 = add84+add83; - add104 = add82+add81; - add105 = add80+add79; - add106 = add78+add77; - add107 = add76+add75; - add108 = add74+add73; - add109 = add72+add71; - add110 = add70+add69; - add111 = add68+add67; - add112 = add66+add65; - add113 = add112+add111; - add114 = add110+add109; - add115 = add108+add107; - add116 = add106+add105; - add117 = add104+add103; - add118 = add102+add101; - add119 = add100+add99; - add120 = add98+add97; - add121 = add120+add119; - add122 = add118+add117; - add123 = add116+add115; - add124 = add114+add113; - add125 = add124+add123; - add126 = add122+add121; - return add125+add126; -} -float compute_engine256( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32, -float a33, float b33, -float a34, float b34, -float a35, float b35, -float a36, float b36, -float a37, float b37, -float a38, float b38, -float a39, float b39, -float a40, float b40, -float a41, float b41, -float a42, float b42, -float a43, float b43, -float a44, float b44, -float a45, float b45, -float a46, float b46, -float a47, float b47, -float a48, float b48, -float a49, float b49, -float a50, float b50, -float a51, float b51, -float a52, float b52, -float a53, float b53, -float a54, float b54, -float a55, float b55, -float a56, float b56, -float a57, float b57, -float a58, float b58, -float a59, float b59, -float a60, float b60, -float a61, float b61, -float a62, float b62, -float a63, float b63, -float a64, float b64, -float a65, float b65, -float a66, float b66, -float a67, float b67, -float a68, float b68, -float a69, float b69, -float a70, float b70, -float a71, float b71, -float a72, float b72, -float a73, float b73, -float a74, float b74, -float a75, float b75, -float a76, float b76, -float a77, float b77, -float a78, float b78, -float a79, float b79, -float a80, float b80, -float a81, float b81, -float a82, float b82, -float a83, float b83, -float a84, float b84, -float a85, float b85, -float a86, float b86, -float a87, float b87, -float a88, float b88, -float a89, float b89, -float a90, float b90, -float a91, float b91, -float a92, float b92, -float a93, float b93, -float a94, float b94, -float a95, float b95, -float a96, float b96, -float a97, float b97, -float a98, float b98, -float a99, float b99, -float a100, float b100, -float a101, float b101, -float a102, float b102, -float a103, float b103, -float a104, float b104, -float a105, float b105, -float a106, float b106, -float a107, float b107, -float a108, float b108, -float a109, float b109, -float a110, float b110, -float a111, float b111, -float a112, float b112, -float a113, float b113, -float a114, float b114, -float a115, float b115, -float a116, float b116, -float a117, float b117, -float a118, float b118, -float a119, float b119, -float a120, float b120, -float a121, float b121, -float a122, float b122, -float a123, float b123, -float a124, float b124, -float a125, float b125, -float a126, float b126, -float a127, float b127, -float a128, float b128, -float a129, float b129, -float a130, float b130, -float a131, float b131, -float a132, float b132, -float a133, float b133, -float a134, float b134, -float a135, float b135, -float a136, float b136, -float a137, float b137, -float a138, float b138, -float a139, float b139, -float a140, float b140, -float a141, float b141, -float a142, float b142, -float a143, float b143, -float a144, float b144, -float a145, float b145, -float a146, float b146, -float a147, float b147, -float a148, float b148, -float a149, float b149, -float a150, float b150, -float a151, float b151, -float a152, float b152, -float a153, float b153, -float a154, float b154, -float a155, float b155, -float a156, float b156, -float a157, float b157, -float a158, float b158, -float a159, float b159, -float a160, float b160, -float a161, float b161, -float a162, float b162, -float a163, float b163, -float a164, float b164, -float a165, float b165, -float a166, float b166, -float a167, float b167, -float a168, float b168, -float a169, float b169, -float a170, float b170, -float a171, float b171, -float a172, float b172, -float a173, float b173, -float a174, float b174, -float a175, float b175, -float a176, float b176, -float a177, float b177, -float a178, float b178, -float a179, float b179, -float a180, float b180, -float a181, float b181, -float a182, float b182, -float a183, float b183, -float a184, float b184, -float a185, float b185, -float a186, float b186, -float a187, float b187, -float a188, float b188, -float a189, float b189, -float a190, float b190, -float a191, float b191, -float a192, float b192, -float a193, float b193, -float a194, float b194, -float a195, float b195, -float a196, float b196, -float a197, float b197, -float a198, float b198, -float a199, float b199, -float a200, float b200, -float a201, float b201, -float a202, float b202, -float a203, float b203, -float a204, float b204, -float a205, float b205, -float a206, float b206, -float a207, float b207, -float a208, float b208, -float a209, float b209, -float a210, float b210, -float a211, float b211, -float a212, float b212, -float a213, float b213, -float a214, float b214, -float a215, float b215, -float a216, float b216, -float a217, float b217, -float a218, float b218, -float a219, float b219, -float a220, float b220, -float a221, float b221, -float a222, float b222, -float a223, float b223, -float a224, float b224, -float a225, float b225, -float a226, float b226, -float a227, float b227, -float a228, float b228, -float a229, float b229, -float a230, float b230, -float a231, float b231, -float a232, float b232, -float a233, float b233, -float a234, float b234, -float a235, float b235, -float a236, float b236, -float a237, float b237, -float a238, float b238, -float a239, float b239, -float a240, float b240, -float a241, float b241, -float a242, float b242, -float a243, float b243, -float a244, float b244, -float a245, float b245, -float a246, float b246, -float a247, float b247, -float a248, float b248, -float a249, float b249, -float a250, float b250, -float a251, float b251, -float a252, float b252, -float a253, float b253, -float a254, float b254, -float a255, float b255, -float a256, float b256){ -#pragma HLS PIPELINE - float m1, m2, m3, m4, - m5, m6, m7, m8, m9, - m10, m11, m12, m13, m14, - m15, m16, m17, m18, m19, - m20, m21, m22, m23, m24, - m25, m26, m27, m28, m29, - m30, m31, m32, m33, m34, - m35, m36, m37, m38, m39, - m40, m41, m42, m43, m44, - m45, m46, m47, m48, m49, - m50, m51, m52, m53, m54, - m55, m56, m57, m58, m59, - m60, m61, m62, m63, m64, - m65, m66, m67, m68, m69, - m70, m71, m72, m73, m74, - m75, m76, m77, m78, m79, - m80, m81, m82, m83, m84, - m85, m86, m87, m88, m89, - m90, m91, m92, m93, m94, - m95, m96, m97, m98, m99, - m100, m101, m102, m103, m104, - m105, m106, m107, m108, m109, - m110, m111, m112, m113, m114, - m115, m116, m117, m118, m119, - m120, m121, m122, m123, m124, - m125, m126, m127, m128, m129, - m130, m131, m132, m133, m134, - m135, m136, m137, m138, m139, - m140, m141, m142, m143, m144, - m145, m146, m147, m148, m149, - m150, m151, m152, m153, m154, - m155, m156, m157, m158, m159, - m160, m161, m162, m163, m164, - m165, m166, m167, m168, m169, - m170, m171, m172, m173, m174, - m175, m176, m177, m178, m179, - m180, m181, m182, m183, m184, - m185, m186, m187, m188, m189, - m190, m191, m192, m193, m194, - m195, m196, m197, m198, m199, - m200, m201, m202, m203, m204, - m205, m206, m207, m208, m209, - m210, m211, m212, m213, m214, - m215, m216, m217, m218, m219, - m220, m221, m222, m223, m224, - m225, m226, m227, m228, m229, - m230, m231, m232, m233, m234, - m235, m236, m237, m238, m239, - m240, m241, m242, m243, m244, - m245, m246, m247, m248, m249, - m250, m251, m252, m253, m254, - m255, m256; - - float add1, add2, add3, add4, - add5, add6, add7, add8, add9, - add10, add11, add12, add13, add14, - add15, add16, add17, add18, add19, - add20, add21, add22, add23, add24, - add25, add26, add27, add28, add29, - add30, add31, add32, add33, add34, - add35, add36, add37, add38, add39, - add40, add41, add42, add43, add44, - add45, add46, add47, add48, add49, - add50, add51, add52, add53, add54, - add55, add56, add57, add58, add59, - add60, add61, add62, add63, add64, - add65, add66, add67, add68, add69, - add70, add71, add72, add73, add74, - add75, add76, add77, add78, add79, - add80, add81, add82, add83, add84, - add85, add86, add87, add88, add89, - add90, add91, add92, add93, add94, - add95, add96, add97, add98, add99, - add100, add101, add102, add103, add104, - add105, add106, add107, add108, add109, - add110, add111, add112, add113, add114, - add115, add116, add117, add118, add119, - add120, add121, add122, add123, add124, - add125, add126, add127, add128, add129, - add130, add131, add132, add133, add134, - add135, add136, add137, add138, add139, - add140, add141, add142, add143, add144, - add145, add146, add147, add148, add149, - add150, add151, add152, add153, add154, - add155, add156, add157, add158, add159, - add160, add161, add162, add163, add164, - add165, add166, add167, add168, add169, - add170, add171, add172, add173, add174, - add175, add176, add177, add178, add179, - add180, add181, add182, add183, add184, - add185, add186, add187, add188, add189, - add190, add191, add192, add193, add194, - add195, add196, add197, add198, add199, - add200, add201, add202, add203, add204, - add205, add206, add207, add208, add209, - add210, add211, add212, add213, add214, - add215, add216, add217, add218, add219, - add220, add221, add222, add223, add224, - add225, add226, add227, add228, add229, - add230, add231, add232, add233, add234, - add235, add236, add237, add238, add239, - add240, add241, add242, add243, add244, - add245, add246, add247, add248, add249, - add250, add251, add252, add253, add254; - m1 = a1*b1; - m2 = a2*b2; - m3 = a3*b3; - m4 = a4*b4; - m5 = a5*b5; - m6 = a6*b6; - m7 = a7*b7; - m8 = a8*b8; - m9 = a9*b9; - m10 = a10*b10; - m11 = a11*b11; - m12 = a12*b12; - m13 = a13*b13; - m14 = a14*b14; - m15 = a15*b15; - m16 = a16*b16; - m17 = a17*b17; - m18 = a18*b18; - m19 = a19*b19; - m20 = a20*b20; - m21 = a21*b21; - m22 = a22*b22; - m23 = a23*b23; - m24 = a24*b24; - m25 = a25*b25; - m26 = a26*b26; - m27 = a27*b27; - m28 = a28*b28; - m29 = a29*b29; - m30 = a30*b30; - m31 = a31*b31; - m32 = a32*b32; - m33 = a33*b33; - m34 = a34*b34; - m35 = a35*b35; - m36 = a36*b36; - m37 = a37*b37; - m38 = a38*b38; - m39 = a39*b39; - m40 = a40*b40; - m41 = a41*b41; - m42 = a42*b42; - m43 = a43*b43; - m44 = a44*b44; - m45 = a45*b45; - m46 = a46*b46; - m47 = a47*b47; - m48 = a48*b48; - m49 = a49*b49; - m50 = a50*b50; - m51 = a51*b51; - m52 = a52*b52; - m53 = a53*b53; - m54 = a54*b54; - m55 = a55*b55; - m56 = a56*b56; - m57 = a57*b57; - m58 = a58*b58; - m59 = a59*b59; - m60 = a60*b60; - m61 = a61*b61; - m62 = a62*b62; - m63 = a63*b63; - m64 = a64*b64; - m65 = a65*b65; - m66 = a66*b66; - m67 = a67*b67; - m68 = a68*b68; - m69 = a69*b69; - m70 = a70*b70; - m71 = a71*b71; - m72 = a72*b72; - m73 = a73*b73; - m74 = a74*b74; - m75 = a75*b75; - m76 = a76*b76; - m77 = a77*b77; - m78 = a78*b78; - m79 = a79*b79; - m80 = a80*b80; - m81 = a81*b81; - m82 = a82*b82; - m83 = a83*b83; - m84 = a84*b84; - m85 = a85*b85; - m86 = a86*b86; - m87 = a87*b87; - m88 = a88*b88; - m89 = a89*b89; - m90 = a90*b90; - m91 = a91*b91; - m92 = a92*b92; - m93 = a93*b93; - m94 = a94*b94; - m95 = a95*b95; - m96 = a96*b96; - m97 = a97*b97; - m98 = a98*b98; - m99 = a99*b99; - m100 = a100*b100; - m101 = a101*b101; - m102 = a102*b102; - m103 = a103*b103; - m104 = a104*b104; - m105 = a105*b105; - m106 = a106*b106; - m107 = a107*b107; - m108 = a108*b108; - m109 = a109*b109; - m110 = a110*b110; - m111 = a111*b111; - m112 = a112*b112; - m113 = a113*b113; - m114 = a114*b114; - m115 = a115*b115; - m116 = a116*b116; - m117 = a117*b117; - m118 = a118*b118; - m119 = a119*b119; - m120 = a120*b120; - m121 = a121*b121; - m122 = a122*b122; - m123 = a123*b123; - m124 = a124*b124; - m125 = a125*b125; - m126 = a126*b126; - m127 = a127*b127; - m128 = a128*b128; - m129 = a129*b129; - m130 = a130*b130; - m131 = a131*b131; - m132 = a132*b132; - m133 = a133*b133; - m134 = a134*b134; - m135 = a135*b135; - m136 = a136*b136; - m137 = a137*b137; - m138 = a138*b138; - m139 = a139*b139; - m140 = a140*b140; - m141 = a141*b141; - m142 = a142*b142; - m143 = a143*b143; - m144 = a144*b144; - m145 = a145*b145; - m146 = a146*b146; - m147 = a147*b147; - m148 = a148*b148; - m149 = a149*b149; - m150 = a150*b150; - m151 = a151*b151; - m152 = a152*b152; - m153 = a153*b153; - m154 = a154*b154; - m155 = a155*b155; - m156 = a156*b156; - m157 = a157*b157; - m158 = a158*b158; - m159 = a159*b159; - m160 = a160*b160; - m161 = a161*b161; - m162 = a162*b162; - m163 = a163*b163; - m164 = a164*b164; - m165 = a165*b165; - m166 = a166*b166; - m167 = a167*b167; - m168 = a168*b168; - m169 = a169*b169; - m170 = a170*b170; - m171 = a171*b171; - m172 = a172*b172; - m173 = a173*b173; - m174 = a174*b174; - m175 = a175*b175; - m176 = a176*b176; - m177 = a177*b177; - m178 = a178*b178; - m179 = a179*b179; - m180 = a180*b180; - m181 = a181*b181; - m182 = a182*b182; - m183 = a183*b183; - m184 = a184*b184; - m185 = a185*b185; - m186 = a186*b186; - m187 = a187*b187; - m188 = a188*b188; - m189 = a189*b189; - m190 = a190*b190; - m191 = a191*b191; - m192 = a192*b192; - m193 = a193*b193; - m194 = a194*b194; - m195 = a195*b195; - m196 = a196*b196; - m197 = a197*b197; - m198 = a198*b198; - m199 = a199*b199; - m200 = a200*b200; - m201 = a201*b201; - m202 = a202*b202; - m203 = a203*b203; - m204 = a204*b204; - m205 = a205*b205; - m206 = a206*b206; - m207 = a207*b207; - m208 = a208*b208; - m209 = a209*b209; - m210 = a210*b210; - m211 = a211*b211; - m212 = a212*b212; - m213 = a213*b213; - m214 = a214*b214; - m215 = a215*b215; - m216 = a216*b216; - m217 = a217*b217; - m218 = a218*b218; - m219 = a219*b219; - m220 = a220*b220; - m221 = a221*b221; - m222 = a222*b222; - m223 = a223*b223; - m224 = a224*b224; - m225 = a225*b225; - m226 = a226*b226; - m227 = a227*b227; - m228 = a228*b228; - m229 = a229*b229; - m230 = a230*b230; - m231 = a231*b231; - m232 = a232*b232; - m233 = a233*b233; - m234 = a234*b234; - m235 = a235*b235; - m236 = a236*b236; - m237 = a237*b237; - m238 = a238*b238; - m239 = a239*b239; - m240 = a240*b240; - m241 = a241*b241; - m242 = a242*b242; - m243 = a243*b243; - m244 = a244*b244; - m245 = a245*b245; - m246 = a246*b246; - m247 = a247*b247; - m248 = a248*b248; - m249 = a249*b249; - m250 = a250*b250; - m251 = a251*b251; - m252 = a252*b252; - m253 = a253*b253; - m254 = a254*b254; - m255 = a255*b255; - m256 = a256*b256; - add1 = m1+m2; - add2 = m3+m4; - add3 = m5+m6; - add4 = m7+m8; - add5 = m9+m10; - add6 = m11+m12; - add7 = m13+m14; - add8 = m15+m16; - add9 = m17+m18; - add10 = m19+m20; - add11 = m21+m22; - add12 = m23+m24; - add13 = m25+m26; - add14 = m27+m28; - add15 = m29+m30; - add16 = m31+m32; - add17 = m33+m34; - add18 = m35+m36; - add19 = m37+m38; - add20 = m39+m40; - add21 = m41+m42; - add22 = m43+m44; - add23 = m45+m46; - add24 = m47+m48; - add25 = m49+m50; - add26 = m51+m52; - add27 = m53+m54; - add28 = m55+m56; - add29 = m57+m58; - add30 = m59+m60; - add31 = m61+m62; - add32 = m63+m64; - add33 = m65+m66; - add34 = m67+m68; - add35 = m69+m70; - add36 = m71+m72; - add37 = m73+m74; - add38 = m75+m76; - add39 = m77+m78; - add40 = m79+m80; - add41 = m81+m82; - add42 = m83+m84; - add43 = m85+m86; - add44 = m87+m88; - add45 = m89+m90; - add46 = m91+m92; - add47 = m93+m94; - add48 = m95+m96; - add49 = m97+m98; - add50 = m99+m100; - add51 = m101+m102; - add52 = m103+m104; - add53 = m105+m106; - add54 = m107+m108; - add55 = m109+m110; - add56 = m111+m112; - add57 = m113+m114; - add58 = m115+m116; - add59 = m117+m118; - add60 = m119+m120; - add61 = m121+m122; - add62 = m123+m124; - add63 = m125+m126; - add64 = m127+m128; - add65 = m129+m130; - add66 = m131+m132; - add67 = m133+m134; - add68 = m135+m136; - add69 = m137+m138; - add70 = m139+m140; - add71 = m141+m142; - add72 = m143+m144; - add73 = m145+m146; - add74 = m147+m148; - add75 = m149+m150; - add76 = m151+m152; - add77 = m153+m154; - add78 = m155+m156; - add79 = m157+m158; - add80 = m159+m160; - add81 = m161+m162; - add82 = m163+m164; - add83 = m165+m166; - add84 = m167+m168; - add85 = m169+m170; - add86 = m171+m172; - add87 = m173+m174; - add88 = m175+m176; - add89 = m177+m178; - add90 = m179+m180; - add91 = m181+m182; - add92 = m183+m184; - add93 = m185+m186; - add94 = m187+m188; - add95 = m189+m190; - add96 = m191+m192; - add97 = m193+m194; - add98 = m195+m196; - add99 = m197+m198; - add100 = m199+m200; - add101 = m201+m202; - add102 = m203+m204; - add103 = m205+m206; - add104 = m207+m208; - add105 = m209+m210; - add106 = m211+m212; - add107 = m213+m214; - add108 = m215+m216; - add109 = m217+m218; - add110 = m219+m220; - add111 = m221+m222; - add112 = m223+m224; - add113 = m225+m226; - add114 = m227+m228; - add115 = m229+m230; - add116 = m231+m232; - add117 = m233+m234; - add118 = m235+m236; - add119 = m237+m238; - add120 = m239+m240; - add121 = m241+m242; - add122 = m243+m244; - add123 = m245+m246; - add124 = m247+m248; - add125 = m249+m250; - add126 = m251+m252; - add127 = m253+m254; - add128 = m255+m256; - add129 = add128+add127; - add130 = add126+add125; - add131 = add124+add123; - add132 = add122+add121; - add133 = add120+add119; - add134 = add118+add117; - add135 = add116+add115; - add136 = add114+add113; - add137 = add112+add111; - add138 = add110+add109; - add139 = add108+add107; - add140 = add106+add105; - add141 = add104+add103; - add142 = add102+add101; - add143 = add100+add99; - add144 = add98+add97; - add145 = add96+add95; - add146 = add94+add93; - add147 = add92+add91; - add148 = add90+add89; - add149 = add88+add87; - add150 = add86+add85; - add151 = add84+add83; - add152 = add82+add81; - add153 = add80+add79; - add154 = add78+add77; - add155 = add76+add75; - add156 = add74+add73; - add157 = add72+add71; - add158 = add70+add69; - add159 = add68+add67; - add160 = add66+add65; - add161 = add64+add63; - add162 = add62+add61; - add163 = add60+add59; - add164 = add58+add57; - add165 = add56+add55; - add166 = add54+add53; - add167 = add52+add51; - add168 = add50+add49; - add169 = add48+add47; - add170 = add46+add45; - add171 = add44+add43; - add172 = add42+add41; - add173 = add40+add39; - add174 = add38+add37; - add175 = add36+add35; - add176 = add34+add33; - add177 = add32+add31; - add178 = add30+add29; - add179 = add28+add27; - add180 = add26+add25; - add181 = add24+add23; - add182 = add22+add21; - add183 = add20+add19; - add184 = add18+add17; - add185 = add16+add15; - add186 = add14+add13; - add187 = add12+add11; - add188 = add10+add9; - add189 = add8+add7; - add190 = add6+add5; - add191 = add4+add3; - add192 = add2+add1; - add193 = add192+add191; - add194 = add190+add189; - add195 = add188+add187; - add196 = add186+add185; - add197 = add184+add183; - add198 = add182+add181; - add199 = add180+add179; - add200 = add178+add177; - add201 = add176+add175; - add202 = add174+add173; - add203 = add172+add171; - add204 = add170+add169; - add205 = add168+add167; - add206 = add166+add165; - add207 = add164+add163; - add208 = add162+add161; - add209 = add160+add159; - add210 = add158+add157; - add211 = add156+add155; - add212 = add154+add153; - add213 = add152+add151; - add214 = add150+add149; - add215 = add148+add147; - add216 = add146+add145; - add217 = add144+add143; - add218 = add142+add141; - add219 = add140+add139; - add220 = add138+add137; - add221 = add136+add135; - add222 = add134+add133; - add223 = add132+add131; - add224 = add130+add129; - add225 = add224+add223; - add226 = add222+add221; - add227 = add220+add219; - add228 = add218+add217; - add229 = add216+add215; - add230 = add214+add213; - add231 = add212+add211; - add232 = add210+add209; - add233 = add208+add207; - add234 = add206+add205; - add235 = add204+add203; - add236 = add202+add201; - add237 = add200+add199; - add238 = add198+add197; - add239 = add196+add195; - add240 = add194+add193; - add241 = add240+add239; - add242 = add238+add237; - add243 = add236+add235; - add244 = add234+add233; - add245 = add232+add231; - add246 = add230+add229; - add247 = add228+add227; - add248 = add226+add225; - add249 = add248+add247; - add250 = add246+add245; - add251 = add244+add243; - add252 = add242+add241; - add253 = add252+add251; - add254 = add250+add249; - return add253+add254; -} -float compute_engine512( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32, -float a33, float b33, -float a34, float b34, -float a35, float b35, -float a36, float b36, -float a37, float b37, -float a38, float b38, -float a39, float b39, -float a40, float b40, -float a41, float b41, -float a42, float b42, -float a43, float b43, -float a44, float b44, -float a45, float b45, -float a46, float b46, -float a47, float b47, -float a48, float b48, -float a49, float b49, -float a50, float b50, -float a51, float b51, -float a52, float b52, -float a53, float b53, -float a54, float b54, -float a55, float b55, -float a56, float b56, -float a57, float b57, -float a58, float b58, -float a59, float b59, -float a60, float b60, -float a61, float b61, -float a62, float b62, -float a63, float b63, -float a64, float b64, -float a65, float b65, -float a66, float b66, -float a67, float b67, -float a68, float b68, -float a69, float b69, -float a70, float b70, -float a71, float b71, -float a72, float b72, -float a73, float b73, -float a74, float b74, -float a75, float b75, -float a76, float b76, -float a77, float b77, -float a78, float b78, -float a79, float b79, -float a80, float b80, -float a81, float b81, -float a82, float b82, -float a83, float b83, -float a84, float b84, -float a85, float b85, -float a86, float b86, -float a87, float b87, -float a88, float b88, -float a89, float b89, -float a90, float b90, -float a91, float b91, -float a92, float b92, -float a93, float b93, -float a94, float b94, -float a95, float b95, -float a96, float b96, -float a97, float b97, -float a98, float b98, -float a99, float b99, -float a100, float b100, -float a101, float b101, -float a102, float b102, -float a103, float b103, -float a104, float b104, -float a105, float b105, -float a106, float b106, -float a107, float b107, -float a108, float b108, -float a109, float b109, -float a110, float b110, -float a111, float b111, -float a112, float b112, -float a113, float b113, -float a114, float b114, -float a115, float b115, -float a116, float b116, -float a117, float b117, -float a118, float b118, -float a119, float b119, -float a120, float b120, -float a121, float b121, -float a122, float b122, -float a123, float b123, -float a124, float b124, -float a125, float b125, -float a126, float b126, -float a127, float b127, -float a128, float b128, -float a129, float b129, -float a130, float b130, -float a131, float b131, -float a132, float b132, -float a133, float b133, -float a134, float b134, -float a135, float b135, -float a136, float b136, -float a137, float b137, -float a138, float b138, -float a139, float b139, -float a140, float b140, -float a141, float b141, -float a142, float b142, -float a143, float b143, -float a144, float b144, -float a145, float b145, -float a146, float b146, -float a147, float b147, -float a148, float b148, -float a149, float b149, -float a150, float b150, -float a151, float b151, -float a152, float b152, -float a153, float b153, -float a154, float b154, -float a155, float b155, -float a156, float b156, -float a157, float b157, -float a158, float b158, -float a159, float b159, -float a160, float b160, -float a161, float b161, -float a162, float b162, -float a163, float b163, -float a164, float b164, -float a165, float b165, -float a166, float b166, -float a167, float b167, -float a168, float b168, -float a169, float b169, -float a170, float b170, -float a171, float b171, -float a172, float b172, -float a173, float b173, -float a174, float b174, -float a175, float b175, -float a176, float b176, -float a177, float b177, -float a178, float b178, -float a179, float b179, -float a180, float b180, -float a181, float b181, -float a182, float b182, -float a183, float b183, -float a184, float b184, -float a185, float b185, -float a186, float b186, -float a187, float b187, -float a188, float b188, -float a189, float b189, -float a190, float b190, -float a191, float b191, -float a192, float b192, -float a193, float b193, -float a194, float b194, -float a195, float b195, -float a196, float b196, -float a197, float b197, -float a198, float b198, -float a199, float b199, -float a200, float b200, -float a201, float b201, -float a202, float b202, -float a203, float b203, -float a204, float b204, -float a205, float b205, -float a206, float b206, -float a207, float b207, -float a208, float b208, -float a209, float b209, -float a210, float b210, -float a211, float b211, -float a212, float b212, -float a213, float b213, -float a214, float b214, -float a215, float b215, -float a216, float b216, -float a217, float b217, -float a218, float b218, -float a219, float b219, -float a220, float b220, -float a221, float b221, -float a222, float b222, -float a223, float b223, -float a224, float b224, -float a225, float b225, -float a226, float b226, -float a227, float b227, -float a228, float b228, -float a229, float b229, -float a230, float b230, -float a231, float b231, -float a232, float b232, -float a233, float b233, -float a234, float b234, -float a235, float b235, -float a236, float b236, -float a237, float b237, -float a238, float b238, -float a239, float b239, -float a240, float b240, -float a241, float b241, -float a242, float b242, -float a243, float b243, -float a244, float b244, -float a245, float b245, -float a246, float b246, -float a247, float b247, -float a248, float b248, -float a249, float b249, -float a250, float b250, -float a251, float b251, -float a252, float b252, -float a253, float b253, -float a254, float b254, -float a255, float b255, -float a256, float b256, -float a257, float b257, -float a258, float b258, -float a259, float b259, -float a260, float b260, -float a261, float b261, -float a262, float b262, -float a263, float b263, -float a264, float b264, -float a265, float b265, -float a266, float b266, -float a267, float b267, -float a268, float b268, -float a269, float b269, -float a270, float b270, -float a271, float b271, -float a272, float b272, -float a273, float b273, -float a274, float b274, -float a275, float b275, -float a276, float b276, -float a277, float b277, -float a278, float b278, -float a279, float b279, -float a280, float b280, -float a281, float b281, -float a282, float b282, -float a283, float b283, -float a284, float b284, -float a285, float b285, -float a286, float b286, -float a287, float b287, -float a288, float b288, -float a289, float b289, -float a290, float b290, -float a291, float b291, -float a292, float b292, -float a293, float b293, -float a294, float b294, -float a295, float b295, -float a296, float b296, -float a297, float b297, -float a298, float b298, -float a299, float b299, -float a300, float b300, -float a301, float b301, -float a302, float b302, -float a303, float b303, -float a304, float b304, -float a305, float b305, -float a306, float b306, -float a307, float b307, -float a308, float b308, -float a309, float b309, -float a310, float b310, -float a311, float b311, -float a312, float b312, -float a313, float b313, -float a314, float b314, -float a315, float b315, -float a316, float b316, -float a317, float b317, -float a318, float b318, -float a319, float b319, -float a320, float b320, -float a321, float b321, -float a322, float b322, -float a323, float b323, -float a324, float b324, -float a325, float b325, -float a326, float b326, -float a327, float b327, -float a328, float b328, -float a329, float b329, -float a330, float b330, -float a331, float b331, -float a332, float b332, -float a333, float b333, -float a334, float b334, -float a335, float b335, -float a336, float b336, -float a337, float b337, -float a338, float b338, -float a339, float b339, -float a340, float b340, -float a341, float b341, -float a342, float b342, -float a343, float b343, -float a344, float b344, -float a345, float b345, -float a346, float b346, -float a347, float b347, -float a348, float b348, -float a349, float b349, -float a350, float b350, -float a351, float b351, -float a352, float b352, -float a353, float b353, -float a354, float b354, -float a355, float b355, -float a356, float b356, -float a357, float b357, -float a358, float b358, -float a359, float b359, -float a360, float b360, -float a361, float b361, -float a362, float b362, -float a363, float b363, -float a364, float b364, -float a365, float b365, -float a366, float b366, -float a367, float b367, -float a368, float b368, -float a369, float b369, -float a370, float b370, -float a371, float b371, -float a372, float b372, -float a373, float b373, -float a374, float b374, -float a375, float b375, -float a376, float b376, -float a377, float b377, -float a378, float b378, -float a379, float b379, -float a380, float b380, -float a381, float b381, -float a382, float b382, -float a383, float b383, -float a384, float b384, -float a385, float b385, -float a386, float b386, -float a387, float b387, -float a388, float b388, -float a389, float b389, -float a390, float b390, -float a391, float b391, -float a392, float b392, -float a393, float b393, -float a394, float b394, -float a395, float b395, -float a396, float b396, -float a397, float b397, -float a398, float b398, -float a399, float b399, -float a400, float b400, -float a401, float b401, -float a402, float b402, -float a403, float b403, -float a404, float b404, -float a405, float b405, -float a406, float b406, -float a407, float b407, -float a408, float b408, -float a409, float b409, -float a410, float b410, -float a411, float b411, -float a412, float b412, -float a413, float b413, -float a414, float b414, -float a415, float b415, -float a416, float b416, -float a417, float b417, -float a418, float b418, -float a419, float b419, -float a420, float b420, -float a421, float b421, -float a422, float b422, -float a423, float b423, -float a424, float b424, -float a425, float b425, -float a426, float b426, -float a427, float b427, -float a428, float b428, -float a429, float b429, -float a430, float b430, -float a431, float b431, -float a432, float b432, -float a433, float b433, -float a434, float b434, -float a435, float b435, -float a436, float b436, -float a437, float b437, -float a438, float b438, -float a439, float b439, -float a440, float b440, -float a441, float b441, -float a442, float b442, -float a443, float b443, -float a444, float b444, -float a445, float b445, -float a446, float b446, -float a447, float b447, -float a448, float b448, -float a449, float b449, -float a450, float b450, -float a451, float b451, -float a452, float b452, -float a453, float b453, -float a454, float b454, -float a455, float b455, -float a456, float b456, -float a457, float b457, -float a458, float b458, -float a459, float b459, -float a460, float b460, -float a461, float b461, -float a462, float b462, -float a463, float b463, -float a464, float b464, -float a465, float b465, -float a466, float b466, -float a467, float b467, -float a468, float b468, -float a469, float b469, -float a470, float b470, -float a471, float b471, -float a472, float b472, -float a473, float b473, -float a474, float b474, -float a475, float b475, -float a476, float b476, -float a477, float b477, -float a478, float b478, -float a479, float b479, -float a480, float b480, -float a481, float b481, -float a482, float b482, -float a483, float b483, -float a484, float b484, -float a485, float b485, -float a486, float b486, -float a487, float b487, -float a488, float b488, -float a489, float b489, -float a490, float b490, -float a491, float b491, -float a492, float b492, -float a493, float b493, -float a494, float b494, -float a495, float b495, -float a496, float b496, -float a497, float b497, -float a498, float b498, -float a499, float b499, -float a500, float b500, -float a501, float b501, -float a502, float b502, -float a503, float b503, -float a504, float b504, -float a505, float b505, -float a506, float b506, -float a507, float b507, -float a508, float b508, -float a509, float b509, -float a510, float b510, -float a511, float b511, -float a512, float b512){ -#pragma HLS PIPELINE - float m1, m2, m3, m4, - m5, m6, m7, m8, m9, - m10, m11, m12, m13, m14, - m15, m16, m17, m18, m19, - m20, m21, m22, m23, m24, - m25, m26, m27, m28, m29, - m30, m31, m32, m33, m34, - m35, m36, m37, m38, m39, - m40, m41, m42, m43, m44, - m45, m46, m47, m48, m49, - m50, m51, m52, m53, m54, - m55, m56, m57, m58, m59, - m60, m61, m62, m63, m64, - m65, m66, m67, m68, m69, - m70, m71, m72, m73, m74, - m75, m76, m77, m78, m79, - m80, m81, m82, m83, m84, - m85, m86, m87, m88, m89, - m90, m91, m92, m93, m94, - m95, m96, m97, m98, m99, - m100, m101, m102, m103, m104, - m105, m106, m107, m108, m109, - m110, m111, m112, m113, m114, - m115, m116, m117, m118, m119, - m120, m121, m122, m123, m124, - m125, m126, m127, m128, m129, - m130, m131, m132, m133, m134, - m135, m136, m137, m138, m139, - m140, m141, m142, m143, m144, - m145, m146, m147, m148, m149, - m150, m151, m152, m153, m154, - m155, m156, m157, m158, m159, - m160, m161, m162, m163, m164, - m165, m166, m167, m168, m169, - m170, m171, m172, m173, m174, - m175, m176, m177, m178, m179, - m180, m181, m182, m183, m184, - m185, m186, m187, m188, m189, - m190, m191, m192, m193, m194, - m195, m196, m197, m198, m199, - m200, m201, m202, m203, m204, - m205, m206, m207, m208, m209, - m210, m211, m212, m213, m214, - m215, m216, m217, m218, m219, - m220, m221, m222, m223, m224, - m225, m226, m227, m228, m229, - m230, m231, m232, m233, m234, - m235, m236, m237, m238, m239, - m240, m241, m242, m243, m244, - m245, m246, m247, m248, m249, - m250, m251, m252, m253, m254, - m255, m256, m257, m258, m259, - m260, m261, m262, m263, m264, - m265, m266, m267, m268, m269, - m270, m271, m272, m273, m274, - m275, m276, m277, m278, m279, - m280, m281, m282, m283, m284, - m285, m286, m287, m288, m289, - m290, m291, m292, m293, m294, - m295, m296, m297, m298, m299, - m300, m301, m302, m303, m304, - m305, m306, m307, m308, m309, - m310, m311, m312, m313, m314, - m315, m316, m317, m318, m319, - m320, m321, m322, m323, m324, - m325, m326, m327, m328, m329, - m330, m331, m332, m333, m334, - m335, m336, m337, m338, m339, - m340, m341, m342, m343, m344, - m345, m346, m347, m348, m349, - m350, m351, m352, m353, m354, - m355, m356, m357, m358, m359, - m360, m361, m362, m363, m364, - m365, m366, m367, m368, m369, - m370, m371, m372, m373, m374, - m375, m376, m377, m378, m379, - m380, m381, m382, m383, m384, - m385, m386, m387, m388, m389, - m390, m391, m392, m393, m394, - m395, m396, m397, m398, m399, - m400, m401, m402, m403, m404, - m405, m406, m407, m408, m409, - m410, m411, m412, m413, m414, - m415, m416, m417, m418, m419, - m420, m421, m422, m423, m424, - m425, m426, m427, m428, m429, - m430, m431, m432, m433, m434, - m435, m436, m437, m438, m439, - m440, m441, m442, m443, m444, - m445, m446, m447, m448, m449, - m450, m451, m452, m453, m454, - m455, m456, m457, m458, m459, - m460, m461, m462, m463, m464, - m465, m466, m467, m468, m469, - m470, m471, m472, m473, m474, - m475, m476, m477, m478, m479, - m480, m481, m482, m483, m484, - m485, m486, m487, m488, m489, - m490, m491, m492, m493, m494, - m495, m496, m497, m498, m499, - m500, m501, m502, m503, m504, - m505, m506, m507, m508, m509, - m510, m511, m512; - - float add1, add2, add3, add4, - add5, add6, add7, add8, add9, - add10, add11, add12, add13, add14, - add15, add16, add17, add18, add19, - add20, add21, add22, add23, add24, - add25, add26, add27, add28, add29, - add30, add31, add32, add33, add34, - add35, add36, add37, add38, add39, - add40, add41, add42, add43, add44, - add45, add46, add47, add48, add49, - add50, add51, add52, add53, add54, - add55, add56, add57, add58, add59, - add60, add61, add62, add63, add64, - add65, add66, add67, add68, add69, - add70, add71, add72, add73, add74, - add75, add76, add77, add78, add79, - add80, add81, add82, add83, add84, - add85, add86, add87, add88, add89, - add90, add91, add92, add93, add94, - add95, add96, add97, add98, add99, - add100, add101, add102, add103, add104, - add105, add106, add107, add108, add109, - add110, add111, add112, add113, add114, - add115, add116, add117, add118, add119, - add120, add121, add122, add123, add124, - add125, add126, add127, add128, add129, - add130, add131, add132, add133, add134, - add135, add136, add137, add138, add139, - add140, add141, add142, add143, add144, - add145, add146, add147, add148, add149, - add150, add151, add152, add153, add154, - add155, add156, add157, add158, add159, - add160, add161, add162, add163, add164, - add165, add166, add167, add168, add169, - add170, add171, add172, add173, add174, - add175, add176, add177, add178, add179, - add180, add181, add182, add183, add184, - add185, add186, add187, add188, add189, - add190, add191, add192, add193, add194, - add195, add196, add197, add198, add199, - add200, add201, add202, add203, add204, - add205, add206, add207, add208, add209, - add210, add211, add212, add213, add214, - add215, add216, add217, add218, add219, - add220, add221, add222, add223, add224, - add225, add226, add227, add228, add229, - add230, add231, add232, add233, add234, - add235, add236, add237, add238, add239, - add240, add241, add242, add243, add244, - add245, add246, add247, add248, add249, - add250, add251, add252, add253, add254, - add255, add256, add257, add258, add259, - add260, add261, add262, add263, add264, - add265, add266, add267, add268, add269, - add270, add271, add272, add273, add274, - add275, add276, add277, add278, add279, - add280, add281, add282, add283, add284, - add285, add286, add287, add288, add289, - add290, add291, add292, add293, add294, - add295, add296, add297, add298, add299, - add300, add301, add302, add303, add304, - add305, add306, add307, add308, add309, - add310, add311, add312, add313, add314, - add315, add316, add317, add318, add319, - add320, add321, add322, add323, add324, - add325, add326, add327, add328, add329, - add330, add331, add332, add333, add334, - add335, add336, add337, add338, add339, - add340, add341, add342, add343, add344, - add345, add346, add347, add348, add349, - add350, add351, add352, add353, add354, - add355, add356, add357, add358, add359, - add360, add361, add362, add363, add364, - add365, add366, add367, add368, add369, - add370, add371, add372, add373, add374, - add375, add376, add377, add378, add379, - add380, add381, add382, add383, add384, - add385, add386, add387, add388, add389, - add390, add391, add392, add393, add394, - add395, add396, add397, add398, add399, - add400, add401, add402, add403, add404, - add405, add406, add407, add408, add409, - add410, add411, add412, add413, add414, - add415, add416, add417, add418, add419, - add420, add421, add422, add423, add424, - add425, add426, add427, add428, add429, - add430, add431, add432, add433, add434, - add435, add436, add437, add438, add439, - add440, add441, add442, add443, add444, - add445, add446, add447, add448, add449, - add450, add451, add452, add453, add454, - add455, add456, add457, add458, add459, - add460, add461, add462, add463, add464, - add465, add466, add467, add468, add469, - add470, add471, add472, add473, add474, - add475, add476, add477, add478, add479, - add480, add481, add482, add483, add484, - add485, add486, add487, add488, add489, - add490, add491, add492, add493, add494, - add495, add496, add497, add498, add499, - add500, add501, add502, add503, add504, - add505, add506, add507, add508, add509, - add510; - m1 = a1*b1; - m2 = a2*b2; - m3 = a3*b3; - m4 = a4*b4; - m5 = a5*b5; - m6 = a6*b6; - m7 = a7*b7; - m8 = a8*b8; - m9 = a9*b9; - m10 = a10*b10; - m11 = a11*b11; - m12 = a12*b12; - m13 = a13*b13; - m14 = a14*b14; - m15 = a15*b15; - m16 = a16*b16; - m17 = a17*b17; - m18 = a18*b18; - m19 = a19*b19; - m20 = a20*b20; - m21 = a21*b21; - m22 = a22*b22; - m23 = a23*b23; - m24 = a24*b24; - m25 = a25*b25; - m26 = a26*b26; - m27 = a27*b27; - m28 = a28*b28; - m29 = a29*b29; - m30 = a30*b30; - m31 = a31*b31; - m32 = a32*b32; - m33 = a33*b33; - m34 = a34*b34; - m35 = a35*b35; - m36 = a36*b36; - m37 = a37*b37; - m38 = a38*b38; - m39 = a39*b39; - m40 = a40*b40; - m41 = a41*b41; - m42 = a42*b42; - m43 = a43*b43; - m44 = a44*b44; - m45 = a45*b45; - m46 = a46*b46; - m47 = a47*b47; - m48 = a48*b48; - m49 = a49*b49; - m50 = a50*b50; - m51 = a51*b51; - m52 = a52*b52; - m53 = a53*b53; - m54 = a54*b54; - m55 = a55*b55; - m56 = a56*b56; - m57 = a57*b57; - m58 = a58*b58; - m59 = a59*b59; - m60 = a60*b60; - m61 = a61*b61; - m62 = a62*b62; - m63 = a63*b63; - m64 = a64*b64; - m65 = a65*b65; - m66 = a66*b66; - m67 = a67*b67; - m68 = a68*b68; - m69 = a69*b69; - m70 = a70*b70; - m71 = a71*b71; - m72 = a72*b72; - m73 = a73*b73; - m74 = a74*b74; - m75 = a75*b75; - m76 = a76*b76; - m77 = a77*b77; - m78 = a78*b78; - m79 = a79*b79; - m80 = a80*b80; - m81 = a81*b81; - m82 = a82*b82; - m83 = a83*b83; - m84 = a84*b84; - m85 = a85*b85; - m86 = a86*b86; - m87 = a87*b87; - m88 = a88*b88; - m89 = a89*b89; - m90 = a90*b90; - m91 = a91*b91; - m92 = a92*b92; - m93 = a93*b93; - m94 = a94*b94; - m95 = a95*b95; - m96 = a96*b96; - m97 = a97*b97; - m98 = a98*b98; - m99 = a99*b99; - m100 = a100*b100; - m101 = a101*b101; - m102 = a102*b102; - m103 = a103*b103; - m104 = a104*b104; - m105 = a105*b105; - m106 = a106*b106; - m107 = a107*b107; - m108 = a108*b108; - m109 = a109*b109; - m110 = a110*b110; - m111 = a111*b111; - m112 = a112*b112; - m113 = a113*b113; - m114 = a114*b114; - m115 = a115*b115; - m116 = a116*b116; - m117 = a117*b117; - m118 = a118*b118; - m119 = a119*b119; - m120 = a120*b120; - m121 = a121*b121; - m122 = a122*b122; - m123 = a123*b123; - m124 = a124*b124; - m125 = a125*b125; - m126 = a126*b126; - m127 = a127*b127; - m128 = a128*b128; - m129 = a129*b129; - m130 = a130*b130; - m131 = a131*b131; - m132 = a132*b132; - m133 = a133*b133; - m134 = a134*b134; - m135 = a135*b135; - m136 = a136*b136; - m137 = a137*b137; - m138 = a138*b138; - m139 = a139*b139; - m140 = a140*b140; - m141 = a141*b141; - m142 = a142*b142; - m143 = a143*b143; - m144 = a144*b144; - m145 = a145*b145; - m146 = a146*b146; - m147 = a147*b147; - m148 = a148*b148; - m149 = a149*b149; - m150 = a150*b150; - m151 = a151*b151; - m152 = a152*b152; - m153 = a153*b153; - m154 = a154*b154; - m155 = a155*b155; - m156 = a156*b156; - m157 = a157*b157; - m158 = a158*b158; - m159 = a159*b159; - m160 = a160*b160; - m161 = a161*b161; - m162 = a162*b162; - m163 = a163*b163; - m164 = a164*b164; - m165 = a165*b165; - m166 = a166*b166; - m167 = a167*b167; - m168 = a168*b168; - m169 = a169*b169; - m170 = a170*b170; - m171 = a171*b171; - m172 = a172*b172; - m173 = a173*b173; - m174 = a174*b174; - m175 = a175*b175; - m176 = a176*b176; - m177 = a177*b177; - m178 = a178*b178; - m179 = a179*b179; - m180 = a180*b180; - m181 = a181*b181; - m182 = a182*b182; - m183 = a183*b183; - m184 = a184*b184; - m185 = a185*b185; - m186 = a186*b186; - m187 = a187*b187; - m188 = a188*b188; - m189 = a189*b189; - m190 = a190*b190; - m191 = a191*b191; - m192 = a192*b192; - m193 = a193*b193; - m194 = a194*b194; - m195 = a195*b195; - m196 = a196*b196; - m197 = a197*b197; - m198 = a198*b198; - m199 = a199*b199; - m200 = a200*b200; - m201 = a201*b201; - m202 = a202*b202; - m203 = a203*b203; - m204 = a204*b204; - m205 = a205*b205; - m206 = a206*b206; - m207 = a207*b207; - m208 = a208*b208; - m209 = a209*b209; - m210 = a210*b210; - m211 = a211*b211; - m212 = a212*b212; - m213 = a213*b213; - m214 = a214*b214; - m215 = a215*b215; - m216 = a216*b216; - m217 = a217*b217; - m218 = a218*b218; - m219 = a219*b219; - m220 = a220*b220; - m221 = a221*b221; - m222 = a222*b222; - m223 = a223*b223; - m224 = a224*b224; - m225 = a225*b225; - m226 = a226*b226; - m227 = a227*b227; - m228 = a228*b228; - m229 = a229*b229; - m230 = a230*b230; - m231 = a231*b231; - m232 = a232*b232; - m233 = a233*b233; - m234 = a234*b234; - m235 = a235*b235; - m236 = a236*b236; - m237 = a237*b237; - m238 = a238*b238; - m239 = a239*b239; - m240 = a240*b240; - m241 = a241*b241; - m242 = a242*b242; - m243 = a243*b243; - m244 = a244*b244; - m245 = a245*b245; - m246 = a246*b246; - m247 = a247*b247; - m248 = a248*b248; - m249 = a249*b249; - m250 = a250*b250; - m251 = a251*b251; - m252 = a252*b252; - m253 = a253*b253; - m254 = a254*b254; - m255 = a255*b255; - m256 = a256*b256; - m257 = a257*b257; - m258 = a258*b258; - m259 = a259*b259; - m260 = a260*b260; - m261 = a261*b261; - m262 = a262*b262; - m263 = a263*b263; - m264 = a264*b264; - m265 = a265*b265; - m266 = a266*b266; - m267 = a267*b267; - m268 = a268*b268; - m269 = a269*b269; - m270 = a270*b270; - m271 = a271*b271; - m272 = a272*b272; - m273 = a273*b273; - m274 = a274*b274; - m275 = a275*b275; - m276 = a276*b276; - m277 = a277*b277; - m278 = a278*b278; - m279 = a279*b279; - m280 = a280*b280; - m281 = a281*b281; - m282 = a282*b282; - m283 = a283*b283; - m284 = a284*b284; - m285 = a285*b285; - m286 = a286*b286; - m287 = a287*b287; - m288 = a288*b288; - m289 = a289*b289; - m290 = a290*b290; - m291 = a291*b291; - m292 = a292*b292; - m293 = a293*b293; - m294 = a294*b294; - m295 = a295*b295; - m296 = a296*b296; - m297 = a297*b297; - m298 = a298*b298; - m299 = a299*b299; - m300 = a300*b300; - m301 = a301*b301; - m302 = a302*b302; - m303 = a303*b303; - m304 = a304*b304; - m305 = a305*b305; - m306 = a306*b306; - m307 = a307*b307; - m308 = a308*b308; - m309 = a309*b309; - m310 = a310*b310; - m311 = a311*b311; - m312 = a312*b312; - m313 = a313*b313; - m314 = a314*b314; - m315 = a315*b315; - m316 = a316*b316; - m317 = a317*b317; - m318 = a318*b318; - m319 = a319*b319; - m320 = a320*b320; - m321 = a321*b321; - m322 = a322*b322; - m323 = a323*b323; - m324 = a324*b324; - m325 = a325*b325; - m326 = a326*b326; - m327 = a327*b327; - m328 = a328*b328; - m329 = a329*b329; - m330 = a330*b330; - m331 = a331*b331; - m332 = a332*b332; - m333 = a333*b333; - m334 = a334*b334; - m335 = a335*b335; - m336 = a336*b336; - m337 = a337*b337; - m338 = a338*b338; - m339 = a339*b339; - m340 = a340*b340; - m341 = a341*b341; - m342 = a342*b342; - m343 = a343*b343; - m344 = a344*b344; - m345 = a345*b345; - m346 = a346*b346; - m347 = a347*b347; - m348 = a348*b348; - m349 = a349*b349; - m350 = a350*b350; - m351 = a351*b351; - m352 = a352*b352; - m353 = a353*b353; - m354 = a354*b354; - m355 = a355*b355; - m356 = a356*b356; - m357 = a357*b357; - m358 = a358*b358; - m359 = a359*b359; - m360 = a360*b360; - m361 = a361*b361; - m362 = a362*b362; - m363 = a363*b363; - m364 = a364*b364; - m365 = a365*b365; - m366 = a366*b366; - m367 = a367*b367; - m368 = a368*b368; - m369 = a369*b369; - m370 = a370*b370; - m371 = a371*b371; - m372 = a372*b372; - m373 = a373*b373; - m374 = a374*b374; - m375 = a375*b375; - m376 = a376*b376; - m377 = a377*b377; - m378 = a378*b378; - m379 = a379*b379; - m380 = a380*b380; - m381 = a381*b381; - m382 = a382*b382; - m383 = a383*b383; - m384 = a384*b384; - m385 = a385*b385; - m386 = a386*b386; - m387 = a387*b387; - m388 = a388*b388; - m389 = a389*b389; - m390 = a390*b390; - m391 = a391*b391; - m392 = a392*b392; - m393 = a393*b393; - m394 = a394*b394; - m395 = a395*b395; - m396 = a396*b396; - m397 = a397*b397; - m398 = a398*b398; - m399 = a399*b399; - m400 = a400*b400; - m401 = a401*b401; - m402 = a402*b402; - m403 = a403*b403; - m404 = a404*b404; - m405 = a405*b405; - m406 = a406*b406; - m407 = a407*b407; - m408 = a408*b408; - m409 = a409*b409; - m410 = a410*b410; - m411 = a411*b411; - m412 = a412*b412; - m413 = a413*b413; - m414 = a414*b414; - m415 = a415*b415; - m416 = a416*b416; - m417 = a417*b417; - m418 = a418*b418; - m419 = a419*b419; - m420 = a420*b420; - m421 = a421*b421; - m422 = a422*b422; - m423 = a423*b423; - m424 = a424*b424; - m425 = a425*b425; - m426 = a426*b426; - m427 = a427*b427; - m428 = a428*b428; - m429 = a429*b429; - m430 = a430*b430; - m431 = a431*b431; - m432 = a432*b432; - m433 = a433*b433; - m434 = a434*b434; - m435 = a435*b435; - m436 = a436*b436; - m437 = a437*b437; - m438 = a438*b438; - m439 = a439*b439; - m440 = a440*b440; - m441 = a441*b441; - m442 = a442*b442; - m443 = a443*b443; - m444 = a444*b444; - m445 = a445*b445; - m446 = a446*b446; - m447 = a447*b447; - m448 = a448*b448; - m449 = a449*b449; - m450 = a450*b450; - m451 = a451*b451; - m452 = a452*b452; - m453 = a453*b453; - m454 = a454*b454; - m455 = a455*b455; - m456 = a456*b456; - m457 = a457*b457; - m458 = a458*b458; - m459 = a459*b459; - m460 = a460*b460; - m461 = a461*b461; - m462 = a462*b462; - m463 = a463*b463; - m464 = a464*b464; - m465 = a465*b465; - m466 = a466*b466; - m467 = a467*b467; - m468 = a468*b468; - m469 = a469*b469; - m470 = a470*b470; - m471 = a471*b471; - m472 = a472*b472; - m473 = a473*b473; - m474 = a474*b474; - m475 = a475*b475; - m476 = a476*b476; - m477 = a477*b477; - m478 = a478*b478; - m479 = a479*b479; - m480 = a480*b480; - m481 = a481*b481; - m482 = a482*b482; - m483 = a483*b483; - m484 = a484*b484; - m485 = a485*b485; - m486 = a486*b486; - m487 = a487*b487; - m488 = a488*b488; - m489 = a489*b489; - m490 = a490*b490; - m491 = a491*b491; - m492 = a492*b492; - m493 = a493*b493; - m494 = a494*b494; - m495 = a495*b495; - m496 = a496*b496; - m497 = a497*b497; - m498 = a498*b498; - m499 = a499*b499; - m500 = a500*b500; - m501 = a501*b501; - m502 = a502*b502; - m503 = a503*b503; - m504 = a504*b504; - m505 = a505*b505; - m506 = a506*b506; - m507 = a507*b507; - m508 = a508*b508; - m509 = a509*b509; - m510 = a510*b510; - m511 = a511*b511; - m512 = a512*b512; - add1 = m1+m2; - add2 = m3+m4; - add3 = m5+m6; - add4 = m7+m8; - add5 = m9+m10; - add6 = m11+m12; - add7 = m13+m14; - add8 = m15+m16; - add9 = m17+m18; - add10 = m19+m20; - add11 = m21+m22; - add12 = m23+m24; - add13 = m25+m26; - add14 = m27+m28; - add15 = m29+m30; - add16 = m31+m32; - add17 = m33+m34; - add18 = m35+m36; - add19 = m37+m38; - add20 = m39+m40; - add21 = m41+m42; - add22 = m43+m44; - add23 = m45+m46; - add24 = m47+m48; - add25 = m49+m50; - add26 = m51+m52; - add27 = m53+m54; - add28 = m55+m56; - add29 = m57+m58; - add30 = m59+m60; - add31 = m61+m62; - add32 = m63+m64; - add33 = m65+m66; - add34 = m67+m68; - add35 = m69+m70; - add36 = m71+m72; - add37 = m73+m74; - add38 = m75+m76; - add39 = m77+m78; - add40 = m79+m80; - add41 = m81+m82; - add42 = m83+m84; - add43 = m85+m86; - add44 = m87+m88; - add45 = m89+m90; - add46 = m91+m92; - add47 = m93+m94; - add48 = m95+m96; - add49 = m97+m98; - add50 = m99+m100; - add51 = m101+m102; - add52 = m103+m104; - add53 = m105+m106; - add54 = m107+m108; - add55 = m109+m110; - add56 = m111+m112; - add57 = m113+m114; - add58 = m115+m116; - add59 = m117+m118; - add60 = m119+m120; - add61 = m121+m122; - add62 = m123+m124; - add63 = m125+m126; - add64 = m127+m128; - add65 = m129+m130; - add66 = m131+m132; - add67 = m133+m134; - add68 = m135+m136; - add69 = m137+m138; - add70 = m139+m140; - add71 = m141+m142; - add72 = m143+m144; - add73 = m145+m146; - add74 = m147+m148; - add75 = m149+m150; - add76 = m151+m152; - add77 = m153+m154; - add78 = m155+m156; - add79 = m157+m158; - add80 = m159+m160; - add81 = m161+m162; - add82 = m163+m164; - add83 = m165+m166; - add84 = m167+m168; - add85 = m169+m170; - add86 = m171+m172; - add87 = m173+m174; - add88 = m175+m176; - add89 = m177+m178; - add90 = m179+m180; - add91 = m181+m182; - add92 = m183+m184; - add93 = m185+m186; - add94 = m187+m188; - add95 = m189+m190; - add96 = m191+m192; - add97 = m193+m194; - add98 = m195+m196; - add99 = m197+m198; - add100 = m199+m200; - add101 = m201+m202; - add102 = m203+m204; - add103 = m205+m206; - add104 = m207+m208; - add105 = m209+m210; - add106 = m211+m212; - add107 = m213+m214; - add108 = m215+m216; - add109 = m217+m218; - add110 = m219+m220; - add111 = m221+m222; - add112 = m223+m224; - add113 = m225+m226; - add114 = m227+m228; - add115 = m229+m230; - add116 = m231+m232; - add117 = m233+m234; - add118 = m235+m236; - add119 = m237+m238; - add120 = m239+m240; - add121 = m241+m242; - add122 = m243+m244; - add123 = m245+m246; - add124 = m247+m248; - add125 = m249+m250; - add126 = m251+m252; - add127 = m253+m254; - add128 = m255+m256; - add129 = m257+m258; - add130 = m259+m260; - add131 = m261+m262; - add132 = m263+m264; - add133 = m265+m266; - add134 = m267+m268; - add135 = m269+m270; - add136 = m271+m272; - add137 = m273+m274; - add138 = m275+m276; - add139 = m277+m278; - add140 = m279+m280; - add141 = m281+m282; - add142 = m283+m284; - add143 = m285+m286; - add144 = m287+m288; - add145 = m289+m290; - add146 = m291+m292; - add147 = m293+m294; - add148 = m295+m296; - add149 = m297+m298; - add150 = m299+m300; - add151 = m301+m302; - add152 = m303+m304; - add153 = m305+m306; - add154 = m307+m308; - add155 = m309+m310; - add156 = m311+m312; - add157 = m313+m314; - add158 = m315+m316; - add159 = m317+m318; - add160 = m319+m320; - add161 = m321+m322; - add162 = m323+m324; - add163 = m325+m326; - add164 = m327+m328; - add165 = m329+m330; - add166 = m331+m332; - add167 = m333+m334; - add168 = m335+m336; - add169 = m337+m338; - add170 = m339+m340; - add171 = m341+m342; - add172 = m343+m344; - add173 = m345+m346; - add174 = m347+m348; - add175 = m349+m350; - add176 = m351+m352; - add177 = m353+m354; - add178 = m355+m356; - add179 = m357+m358; - add180 = m359+m360; - add181 = m361+m362; - add182 = m363+m364; - add183 = m365+m366; - add184 = m367+m368; - add185 = m369+m370; - add186 = m371+m372; - add187 = m373+m374; - add188 = m375+m376; - add189 = m377+m378; - add190 = m379+m380; - add191 = m381+m382; - add192 = m383+m384; - add193 = m385+m386; - add194 = m387+m388; - add195 = m389+m390; - add196 = m391+m392; - add197 = m393+m394; - add198 = m395+m396; - add199 = m397+m398; - add200 = m399+m400; - add201 = m401+m402; - add202 = m403+m404; - add203 = m405+m406; - add204 = m407+m408; - add205 = m409+m410; - add206 = m411+m412; - add207 = m413+m414; - add208 = m415+m416; - add209 = m417+m418; - add210 = m419+m420; - add211 = m421+m422; - add212 = m423+m424; - add213 = m425+m426; - add214 = m427+m428; - add215 = m429+m430; - add216 = m431+m432; - add217 = m433+m434; - add218 = m435+m436; - add219 = m437+m438; - add220 = m439+m440; - add221 = m441+m442; - add222 = m443+m444; - add223 = m445+m446; - add224 = m447+m448; - add225 = m449+m450; - add226 = m451+m452; - add227 = m453+m454; - add228 = m455+m456; - add229 = m457+m458; - add230 = m459+m460; - add231 = m461+m462; - add232 = m463+m464; - add233 = m465+m466; - add234 = m467+m468; - add235 = m469+m470; - add236 = m471+m472; - add237 = m473+m474; - add238 = m475+m476; - add239 = m477+m478; - add240 = m479+m480; - add241 = m481+m482; - add242 = m483+m484; - add243 = m485+m486; - add244 = m487+m488; - add245 = m489+m490; - add246 = m491+m492; - add247 = m493+m494; - add248 = m495+m496; - add249 = m497+m498; - add250 = m499+m500; - add251 = m501+m502; - add252 = m503+m504; - add253 = m505+m506; - add254 = m507+m508; - add255 = m509+m510; - add256 = m511+m512; - add257 = add256+add255; - add258 = add254+add253; - add259 = add252+add251; - add260 = add250+add249; - add261 = add248+add247; - add262 = add246+add245; - add263 = add244+add243; - add264 = add242+add241; - add265 = add240+add239; - add266 = add238+add237; - add267 = add236+add235; - add268 = add234+add233; - add269 = add232+add231; - add270 = add230+add229; - add271 = add228+add227; - add272 = add226+add225; - add273 = add224+add223; - add274 = add222+add221; - add275 = add220+add219; - add276 = add218+add217; - add277 = add216+add215; - add278 = add214+add213; - add279 = add212+add211; - add280 = add210+add209; - add281 = add208+add207; - add282 = add206+add205; - add283 = add204+add203; - add284 = add202+add201; - add285 = add200+add199; - add286 = add198+add197; - add287 = add196+add195; - add288 = add194+add193; - add289 = add192+add191; - add290 = add190+add189; - add291 = add188+add187; - add292 = add186+add185; - add293 = add184+add183; - add294 = add182+add181; - add295 = add180+add179; - add296 = add178+add177; - add297 = add176+add175; - add298 = add174+add173; - add299 = add172+add171; - add300 = add170+add169; - add301 = add168+add167; - add302 = add166+add165; - add303 = add164+add163; - add304 = add162+add161; - add305 = add160+add159; - add306 = add158+add157; - add307 = add156+add155; - add308 = add154+add153; - add309 = add152+add151; - add310 = add150+add149; - add311 = add148+add147; - add312 = add146+add145; - add313 = add144+add143; - add314 = add142+add141; - add315 = add140+add139; - add316 = add138+add137; - add317 = add136+add135; - add318 = add134+add133; - add319 = add132+add131; - add320 = add130+add129; - add321 = add128+add127; - add322 = add126+add125; - add323 = add124+add123; - add324 = add122+add121; - add325 = add120+add119; - add326 = add118+add117; - add327 = add116+add115; - add328 = add114+add113; - add329 = add112+add111; - add330 = add110+add109; - add331 = add108+add107; - add332 = add106+add105; - add333 = add104+add103; - add334 = add102+add101; - add335 = add100+add99; - add336 = add98+add97; - add337 = add96+add95; - add338 = add94+add93; - add339 = add92+add91; - add340 = add90+add89; - add341 = add88+add87; - add342 = add86+add85; - add343 = add84+add83; - add344 = add82+add81; - add345 = add80+add79; - add346 = add78+add77; - add347 = add76+add75; - add348 = add74+add73; - add349 = add72+add71; - add350 = add70+add69; - add351 = add68+add67; - add352 = add66+add65; - add353 = add64+add63; - add354 = add62+add61; - add355 = add60+add59; - add356 = add58+add57; - add357 = add56+add55; - add358 = add54+add53; - add359 = add52+add51; - add360 = add50+add49; - add361 = add48+add47; - add362 = add46+add45; - add363 = add44+add43; - add364 = add42+add41; - add365 = add40+add39; - add366 = add38+add37; - add367 = add36+add35; - add368 = add34+add33; - add369 = add32+add31; - add370 = add30+add29; - add371 = add28+add27; - add372 = add26+add25; - add373 = add24+add23; - add374 = add22+add21; - add375 = add20+add19; - add376 = add18+add17; - add377 = add16+add15; - add378 = add14+add13; - add379 = add12+add11; - add380 = add10+add9; - add381 = add8+add7; - add382 = add6+add5; - add383 = add4+add3; - add384 = add2+add1; - add385 = add384+add383; - add386 = add382+add381; - add387 = add380+add379; - add388 = add378+add377; - add389 = add376+add375; - add390 = add374+add373; - add391 = add372+add371; - add392 = add370+add369; - add393 = add368+add367; - add394 = add366+add365; - add395 = add364+add363; - add396 = add362+add361; - add397 = add360+add359; - add398 = add358+add357; - add399 = add356+add355; - add400 = add354+add353; - add401 = add352+add351; - add402 = add350+add349; - add403 = add348+add347; - add404 = add346+add345; - add405 = add344+add343; - add406 = add342+add341; - add407 = add340+add339; - add408 = add338+add337; - add409 = add336+add335; - add410 = add334+add333; - add411 = add332+add331; - add412 = add330+add329; - add413 = add328+add327; - add414 = add326+add325; - add415 = add324+add323; - add416 = add322+add321; - add417 = add320+add319; - add418 = add318+add317; - add419 = add316+add315; - add420 = add314+add313; - add421 = add312+add311; - add422 = add310+add309; - add423 = add308+add307; - add424 = add306+add305; - add425 = add304+add303; - add426 = add302+add301; - add427 = add300+add299; - add428 = add298+add297; - add429 = add296+add295; - add430 = add294+add293; - add431 = add292+add291; - add432 = add290+add289; - add433 = add288+add287; - add434 = add286+add285; - add435 = add284+add283; - add436 = add282+add281; - add437 = add280+add279; - add438 = add278+add277; - add439 = add276+add275; - add440 = add274+add273; - add441 = add272+add271; - add442 = add270+add269; - add443 = add268+add267; - add444 = add266+add265; - add445 = add264+add263; - add446 = add262+add261; - add447 = add260+add259; - add448 = add258+add257; - add449 = add448+add447; - add450 = add446+add445; - add451 = add444+add443; - add452 = add442+add441; - add453 = add440+add439; - add454 = add438+add437; - add455 = add436+add435; - add456 = add434+add433; - add457 = add432+add431; - add458 = add430+add429; - add459 = add428+add427; - add460 = add426+add425; - add461 = add424+add423; - add462 = add422+add421; - add463 = add420+add419; - add464 = add418+add417; - add465 = add416+add415; - add466 = add414+add413; - add467 = add412+add411; - add468 = add410+add409; - add469 = add408+add407; - add470 = add406+add405; - add471 = add404+add403; - add472 = add402+add401; - add473 = add400+add399; - add474 = add398+add397; - add475 = add396+add395; - add476 = add394+add393; - add477 = add392+add391; - add478 = add390+add389; - add479 = add388+add387; - add480 = add386+add385; - add481 = add480+add479; - add482 = add478+add477; - add483 = add476+add475; - add484 = add474+add473; - add485 = add472+add471; - add486 = add470+add469; - add487 = add468+add467; - add488 = add466+add465; - add489 = add464+add463; - add490 = add462+add461; - add491 = add460+add459; - add492 = add458+add457; - add493 = add456+add455; - add494 = add454+add453; - add495 = add452+add451; - add496 = add450+add449; - add497 = add496+add495; - add498 = add494+add493; - add499 = add492+add491; - add500 = add490+add489; - add501 = add488+add487; - add502 = add486+add485; - add503 = add484+add483; - add504 = add482+add481; - add505 = add504+add503; - add506 = add502+add501; - add507 = add500+add499; - add508 = add498+add497; - add509 = add508+add507; - add510 = add506+add505; - return add509+add510; -} \ No newline at end of file diff --git a/nmtdecoder/ce.h b/nmtdecoder/ce.h deleted file mode 100644 index 62dda7b0a16a7d3ff6eaf5bf56eaaf2c415816b5..0000000000000000000000000000000000000000 --- a/nmtdecoder/ce.h +++ /dev/null @@ -1,1026 +0,0 @@ -#ifndef CE_H -#define CE_H -float compute_engine8( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8); -float compute_engine16( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16); -float compute_engine32( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32); -float compute_engine64( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32, -float a33, float b33, -float a34, float b34, -float a35, float b35, -float a36, float b36, -float a37, float b37, -float a38, float b38, -float a39, float b39, -float a40, float b40, -float a41, float b41, -float a42, float b42, -float a43, float b43, -float a44, float b44, -float a45, float b45, -float a46, float b46, -float a47, float b47, -float a48, float b48, -float a49, float b49, -float a50, float b50, -float a51, float b51, -float a52, float b52, -float a53, float b53, -float a54, float b54, -float a55, float b55, -float a56, float b56, -float a57, float b57, -float a58, float b58, -float a59, float b59, -float a60, float b60, -float a61, float b61, -float a62, float b62, -float a63, float b63, -float a64, float b64); -float compute_engine128( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32, -float a33, float b33, -float a34, float b34, -float a35, float b35, -float a36, float b36, -float a37, float b37, -float a38, float b38, -float a39, float b39, -float a40, float b40, -float a41, float b41, -float a42, float b42, -float a43, float b43, -float a44, float b44, -float a45, float b45, -float a46, float b46, -float a47, float b47, -float a48, float b48, -float a49, float b49, -float a50, float b50, -float a51, float b51, -float a52, float b52, -float a53, float b53, -float a54, float b54, -float a55, float b55, -float a56, float b56, -float a57, float b57, -float a58, float b58, -float a59, float b59, -float a60, float b60, -float a61, float b61, -float a62, float b62, -float a63, float b63, -float a64, float b64, -float a65, float b65, -float a66, float b66, -float a67, float b67, -float a68, float b68, -float a69, float b69, -float a70, float b70, -float a71, float b71, -float a72, float b72, -float a73, float b73, -float a74, float b74, -float a75, float b75, -float a76, float b76, -float a77, float b77, -float a78, float b78, -float a79, float b79, -float a80, float b80, -float a81, float b81, -float a82, float b82, -float a83, float b83, -float a84, float b84, -float a85, float b85, -float a86, float b86, -float a87, float b87, -float a88, float b88, -float a89, float b89, -float a90, float b90, -float a91, float b91, -float a92, float b92, -float a93, float b93, -float a94, float b94, -float a95, float b95, -float a96, float b96, -float a97, float b97, -float a98, float b98, -float a99, float b99, -float a100, float b100, -float a101, float b101, -float a102, float b102, -float a103, float b103, -float a104, float b104, -float a105, float b105, -float a106, float b106, -float a107, float b107, -float a108, float b108, -float a109, float b109, -float a110, float b110, -float a111, float b111, -float a112, float b112, -float a113, float b113, -float a114, float b114, -float a115, float b115, -float a116, float b116, -float a117, float b117, -float a118, float b118, -float a119, float b119, -float a120, float b120, -float a121, float b121, -float a122, float b122, -float a123, float b123, -float a124, float b124, -float a125, float b125, -float a126, float b126, -float a127, float b127, -float a128, float b128); -float compute_engine256( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32, -float a33, float b33, -float a34, float b34, -float a35, float b35, -float a36, float b36, -float a37, float b37, -float a38, float b38, -float a39, float b39, -float a40, float b40, -float a41, float b41, -float a42, float b42, -float a43, float b43, -float a44, float b44, -float a45, float b45, -float a46, float b46, -float a47, float b47, -float a48, float b48, -float a49, float b49, -float a50, float b50, -float a51, float b51, -float a52, float b52, -float a53, float b53, -float a54, float b54, -float a55, float b55, -float a56, float b56, -float a57, float b57, -float a58, float b58, -float a59, float b59, -float a60, float b60, -float a61, float b61, -float a62, float b62, -float a63, float b63, -float a64, float b64, -float a65, float b65, -float a66, float b66, -float a67, float b67, -float a68, float b68, -float a69, float b69, -float a70, float b70, -float a71, float b71, -float a72, float b72, -float a73, float b73, -float a74, float b74, -float a75, float b75, -float a76, float b76, -float a77, float b77, -float a78, float b78, -float a79, float b79, -float a80, float b80, -float a81, float b81, -float a82, float b82, -float a83, float b83, -float a84, float b84, -float a85, float b85, -float a86, float b86, -float a87, float b87, -float a88, float b88, -float a89, float b89, -float a90, float b90, -float a91, float b91, -float a92, float b92, -float a93, float b93, -float a94, float b94, -float a95, float b95, -float a96, float b96, -float a97, float b97, -float a98, float b98, -float a99, float b99, -float a100, float b100, -float a101, float b101, -float a102, float b102, -float a103, float b103, -float a104, float b104, -float a105, float b105, -float a106, float b106, -float a107, float b107, -float a108, float b108, -float a109, float b109, -float a110, float b110, -float a111, float b111, -float a112, float b112, -float a113, float b113, -float a114, float b114, -float a115, float b115, -float a116, float b116, -float a117, float b117, -float a118, float b118, -float a119, float b119, -float a120, float b120, -float a121, float b121, -float a122, float b122, -float a123, float b123, -float a124, float b124, -float a125, float b125, -float a126, float b126, -float a127, float b127, -float a128, float b128, -float a129, float b129, -float a130, float b130, -float a131, float b131, -float a132, float b132, -float a133, float b133, -float a134, float b134, -float a135, float b135, -float a136, float b136, -float a137, float b137, -float a138, float b138, -float a139, float b139, -float a140, float b140, -float a141, float b141, -float a142, float b142, -float a143, float b143, -float a144, float b144, -float a145, float b145, -float a146, float b146, -float a147, float b147, -float a148, float b148, -float a149, float b149, -float a150, float b150, -float a151, float b151, -float a152, float b152, -float a153, float b153, -float a154, float b154, -float a155, float b155, -float a156, float b156, -float a157, float b157, -float a158, float b158, -float a159, float b159, -float a160, float b160, -float a161, float b161, -float a162, float b162, -float a163, float b163, -float a164, float b164, -float a165, float b165, -float a166, float b166, -float a167, float b167, -float a168, float b168, -float a169, float b169, -float a170, float b170, -float a171, float b171, -float a172, float b172, -float a173, float b173, -float a174, float b174, -float a175, float b175, -float a176, float b176, -float a177, float b177, -float a178, float b178, -float a179, float b179, -float a180, float b180, -float a181, float b181, -float a182, float b182, -float a183, float b183, -float a184, float b184, -float a185, float b185, -float a186, float b186, -float a187, float b187, -float a188, float b188, -float a189, float b189, -float a190, float b190, -float a191, float b191, -float a192, float b192, -float a193, float b193, -float a194, float b194, -float a195, float b195, -float a196, float b196, -float a197, float b197, -float a198, float b198, -float a199, float b199, -float a200, float b200, -float a201, float b201, -float a202, float b202, -float a203, float b203, -float a204, float b204, -float a205, float b205, -float a206, float b206, -float a207, float b207, -float a208, float b208, -float a209, float b209, -float a210, float b210, -float a211, float b211, -float a212, float b212, -float a213, float b213, -float a214, float b214, -float a215, float b215, -float a216, float b216, -float a217, float b217, -float a218, float b218, -float a219, float b219, -float a220, float b220, -float a221, float b221, -float a222, float b222, -float a223, float b223, -float a224, float b224, -float a225, float b225, -float a226, float b226, -float a227, float b227, -float a228, float b228, -float a229, float b229, -float a230, float b230, -float a231, float b231, -float a232, float b232, -float a233, float b233, -float a234, float b234, -float a235, float b235, -float a236, float b236, -float a237, float b237, -float a238, float b238, -float a239, float b239, -float a240, float b240, -float a241, float b241, -float a242, float b242, -float a243, float b243, -float a244, float b244, -float a245, float b245, -float a246, float b246, -float a247, float b247, -float a248, float b248, -float a249, float b249, -float a250, float b250, -float a251, float b251, -float a252, float b252, -float a253, float b253, -float a254, float b254, -float a255, float b255, -float a256, float b256); -float compute_engine512( -float a1, float b1, -float a2, float b2, -float a3, float b3, -float a4, float b4, -float a5, float b5, -float a6, float b6, -float a7, float b7, -float a8, float b8, -float a9, float b9, -float a10, float b10, -float a11, float b11, -float a12, float b12, -float a13, float b13, -float a14, float b14, -float a15, float b15, -float a16, float b16, -float a17, float b17, -float a18, float b18, -float a19, float b19, -float a20, float b20, -float a21, float b21, -float a22, float b22, -float a23, float b23, -float a24, float b24, -float a25, float b25, -float a26, float b26, -float a27, float b27, -float a28, float b28, -float a29, float b29, -float a30, float b30, -float a31, float b31, -float a32, float b32, -float a33, float b33, -float a34, float b34, -float a35, float b35, -float a36, float b36, -float a37, float b37, -float a38, float b38, -float a39, float b39, -float a40, float b40, -float a41, float b41, -float a42, float b42, -float a43, float b43, -float a44, float b44, -float a45, float b45, -float a46, float b46, -float a47, float b47, -float a48, float b48, -float a49, float b49, -float a50, float b50, -float a51, float b51, -float a52, float b52, -float a53, float b53, -float a54, float b54, -float a55, float b55, -float a56, float b56, -float a57, float b57, -float a58, float b58, -float a59, float b59, -float a60, float b60, -float a61, float b61, -float a62, float b62, -float a63, float b63, -float a64, float b64, -float a65, float b65, -float a66, float b66, -float a67, float b67, -float a68, float b68, -float a69, float b69, -float a70, float b70, -float a71, float b71, -float a72, float b72, -float a73, float b73, -float a74, float b74, -float a75, float b75, -float a76, float b76, -float a77, float b77, -float a78, float b78, -float a79, float b79, -float a80, float b80, -float a81, float b81, -float a82, float b82, -float a83, float b83, -float a84, float b84, -float a85, float b85, -float a86, float b86, -float a87, float b87, -float a88, float b88, -float a89, float b89, -float a90, float b90, -float a91, float b91, -float a92, float b92, -float a93, float b93, -float a94, float b94, -float a95, float b95, -float a96, float b96, -float a97, float b97, -float a98, float b98, -float a99, float b99, -float a100, float b100, -float a101, float b101, -float a102, float b102, -float a103, float b103, -float a104, float b104, -float a105, float b105, -float a106, float b106, -float a107, float b107, -float a108, float b108, -float a109, float b109, -float a110, float b110, -float a111, float b111, -float a112, float b112, -float a113, float b113, -float a114, float b114, -float a115, float b115, -float a116, float b116, -float a117, float b117, -float a118, float b118, -float a119, float b119, -float a120, float b120, -float a121, float b121, -float a122, float b122, -float a123, float b123, -float a124, float b124, -float a125, float b125, -float a126, float b126, -float a127, float b127, -float a128, float b128, -float a129, float b129, -float a130, float b130, -float a131, float b131, -float a132, float b132, -float a133, float b133, -float a134, float b134, -float a135, float b135, -float a136, float b136, -float a137, float b137, -float a138, float b138, -float a139, float b139, -float a140, float b140, -float a141, float b141, -float a142, float b142, -float a143, float b143, -float a144, float b144, -float a145, float b145, -float a146, float b146, -float a147, float b147, -float a148, float b148, -float a149, float b149, -float a150, float b150, -float a151, float b151, -float a152, float b152, -float a153, float b153, -float a154, float b154, -float a155, float b155, -float a156, float b156, -float a157, float b157, -float a158, float b158, -float a159, float b159, -float a160, float b160, -float a161, float b161, -float a162, float b162, -float a163, float b163, -float a164, float b164, -float a165, float b165, -float a166, float b166, -float a167, float b167, -float a168, float b168, -float a169, float b169, -float a170, float b170, -float a171, float b171, -float a172, float b172, -float a173, float b173, -float a174, float b174, -float a175, float b175, -float a176, float b176, -float a177, float b177, -float a178, float b178, -float a179, float b179, -float a180, float b180, -float a181, float b181, -float a182, float b182, -float a183, float b183, -float a184, float b184, -float a185, float b185, -float a186, float b186, -float a187, float b187, -float a188, float b188, -float a189, float b189, -float a190, float b190, -float a191, float b191, -float a192, float b192, -float a193, float b193, -float a194, float b194, -float a195, float b195, -float a196, float b196, -float a197, float b197, -float a198, float b198, -float a199, float b199, -float a200, float b200, -float a201, float b201, -float a202, float b202, -float a203, float b203, -float a204, float b204, -float a205, float b205, -float a206, float b206, -float a207, float b207, -float a208, float b208, -float a209, float b209, -float a210, float b210, -float a211, float b211, -float a212, float b212, -float a213, float b213, -float a214, float b214, -float a215, float b215, -float a216, float b216, -float a217, float b217, -float a218, float b218, -float a219, float b219, -float a220, float b220, -float a221, float b221, -float a222, float b222, -float a223, float b223, -float a224, float b224, -float a225, float b225, -float a226, float b226, -float a227, float b227, -float a228, float b228, -float a229, float b229, -float a230, float b230, -float a231, float b231, -float a232, float b232, -float a233, float b233, -float a234, float b234, -float a235, float b235, -float a236, float b236, -float a237, float b237, -float a238, float b238, -float a239, float b239, -float a240, float b240, -float a241, float b241, -float a242, float b242, -float a243, float b243, -float a244, float b244, -float a245, float b245, -float a246, float b246, -float a247, float b247, -float a248, float b248, -float a249, float b249, -float a250, float b250, -float a251, float b251, -float a252, float b252, -float a253, float b253, -float a254, float b254, -float a255, float b255, -float a256, float b256, -float a257, float b257, -float a258, float b258, -float a259, float b259, -float a260, float b260, -float a261, float b261, -float a262, float b262, -float a263, float b263, -float a264, float b264, -float a265, float b265, -float a266, float b266, -float a267, float b267, -float a268, float b268, -float a269, float b269, -float a270, float b270, -float a271, float b271, -float a272, float b272, -float a273, float b273, -float a274, float b274, -float a275, float b275, -float a276, float b276, -float a277, float b277, -float a278, float b278, -float a279, float b279, -float a280, float b280, -float a281, float b281, -float a282, float b282, -float a283, float b283, -float a284, float b284, -float a285, float b285, -float a286, float b286, -float a287, float b287, -float a288, float b288, -float a289, float b289, -float a290, float b290, -float a291, float b291, -float a292, float b292, -float a293, float b293, -float a294, float b294, -float a295, float b295, -float a296, float b296, -float a297, float b297, -float a298, float b298, -float a299, float b299, -float a300, float b300, -float a301, float b301, -float a302, float b302, -float a303, float b303, -float a304, float b304, -float a305, float b305, -float a306, float b306, -float a307, float b307, -float a308, float b308, -float a309, float b309, -float a310, float b310, -float a311, float b311, -float a312, float b312, -float a313, float b313, -float a314, float b314, -float a315, float b315, -float a316, float b316, -float a317, float b317, -float a318, float b318, -float a319, float b319, -float a320, float b320, -float a321, float b321, -float a322, float b322, -float a323, float b323, -float a324, float b324, -float a325, float b325, -float a326, float b326, -float a327, float b327, -float a328, float b328, -float a329, float b329, -float a330, float b330, -float a331, float b331, -float a332, float b332, -float a333, float b333, -float a334, float b334, -float a335, float b335, -float a336, float b336, -float a337, float b337, -float a338, float b338, -float a339, float b339, -float a340, float b340, -float a341, float b341, -float a342, float b342, -float a343, float b343, -float a344, float b344, -float a345, float b345, -float a346, float b346, -float a347, float b347, -float a348, float b348, -float a349, float b349, -float a350, float b350, -float a351, float b351, -float a352, float b352, -float a353, float b353, -float a354, float b354, -float a355, float b355, -float a356, float b356, -float a357, float b357, -float a358, float b358, -float a359, float b359, -float a360, float b360, -float a361, float b361, -float a362, float b362, -float a363, float b363, -float a364, float b364, -float a365, float b365, -float a366, float b366, -float a367, float b367, -float a368, float b368, -float a369, float b369, -float a370, float b370, -float a371, float b371, -float a372, float b372, -float a373, float b373, -float a374, float b374, -float a375, float b375, -float a376, float b376, -float a377, float b377, -float a378, float b378, -float a379, float b379, -float a380, float b380, -float a381, float b381, -float a382, float b382, -float a383, float b383, -float a384, float b384, -float a385, float b385, -float a386, float b386, -float a387, float b387, -float a388, float b388, -float a389, float b389, -float a390, float b390, -float a391, float b391, -float a392, float b392, -float a393, float b393, -float a394, float b394, -float a395, float b395, -float a396, float b396, -float a397, float b397, -float a398, float b398, -float a399, float b399, -float a400, float b400, -float a401, float b401, -float a402, float b402, -float a403, float b403, -float a404, float b404, -float a405, float b405, -float a406, float b406, -float a407, float b407, -float a408, float b408, -float a409, float b409, -float a410, float b410, -float a411, float b411, -float a412, float b412, -float a413, float b413, -float a414, float b414, -float a415, float b415, -float a416, float b416, -float a417, float b417, -float a418, float b418, -float a419, float b419, -float a420, float b420, -float a421, float b421, -float a422, float b422, -float a423, float b423, -float a424, float b424, -float a425, float b425, -float a426, float b426, -float a427, float b427, -float a428, float b428, -float a429, float b429, -float a430, float b430, -float a431, float b431, -float a432, float b432, -float a433, float b433, -float a434, float b434, -float a435, float b435, -float a436, float b436, -float a437, float b437, -float a438, float b438, -float a439, float b439, -float a440, float b440, -float a441, float b441, -float a442, float b442, -float a443, float b443, -float a444, float b444, -float a445, float b445, -float a446, float b446, -float a447, float b447, -float a448, float b448, -float a449, float b449, -float a450, float b450, -float a451, float b451, -float a452, float b452, -float a453, float b453, -float a454, float b454, -float a455, float b455, -float a456, float b456, -float a457, float b457, -float a458, float b458, -float a459, float b459, -float a460, float b460, -float a461, float b461, -float a462, float b462, -float a463, float b463, -float a464, float b464, -float a465, float b465, -float a466, float b466, -float a467, float b467, -float a468, float b468, -float a469, float b469, -float a470, float b470, -float a471, float b471, -float a472, float b472, -float a473, float b473, -float a474, float b474, -float a475, float b475, -float a476, float b476, -float a477, float b477, -float a478, float b478, -float a479, float b479, -float a480, float b480, -float a481, float b481, -float a482, float b482, -float a483, float b483, -float a484, float b484, -float a485, float b485, -float a486, float b486, -float a487, float b487, -float a488, float b488, -float a489, float b489, -float a490, float b490, -float a491, float b491, -float a492, float b492, -float a493, float b493, -float a494, float b494, -float a495, float b495, -float a496, float b496, -float a497, float b497, -float a498, float b498, -float a499, float b499, -float a500, float b500, -float a501, float b501, -float a502, float b502, -float a503, float b503, -float a504, float b504, -float a505, float b505, -float a506, float b506, -float a507, float b507, -float a508, float b508, -float a509, float b509, -float a510, float b510, -float a511, float b511, -float a512, float b512); -#endif \ No newline at end of file diff --git a/nmtdecoder/layers.cc b/nmtdecoder/layers.cc deleted file mode 100644 index f2b1dd6e528b2a67461e52b7492892248cf84d1c..0000000000000000000000000000000000000000 --- a/nmtdecoder/layers.cc +++ /dev/null @@ -1,330 +0,0 @@ -#include "layers.h" -//#include <stdio.h> -#include <math.h> -float exp_hls(float in){ -/* -union -{ -unsigned int i; -float f; -}v; -v.i=(1<<23)*(1.4426950409*in+126.94201519f); -return v.f; -*/ -return expf(in); -} - -float sigmoid(float x) { - return 1. / (1. + exp_hls(-x)); -} - -float tanh_hls(float x){ - //return (exp_hls(2*x)-1)/(exp_hls(2*x)+1); - return tanhf(x); -} -float z[dim];//update gate - float r[dim];//reset gate - float h_[dim];//new memory - - -/*memory is for the last state*/ -void gru_layer_base(ap_int<512> *data,float input[dim_word], float output[dim],float memory[dim],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ - #pragma HLS INTERFACE m_axi port=data - int i,kk; - /*float z[dim];//update gate - float r[dim];//reset gate - float h_[dim];//new memory*/ - float bx[dim]; - int index=0; - - #pragma HLS ARRAY_PARTITION variable=bx cyclic dim=1 factor = 16 - for(i = 0;i < dim; i++){ - h_[i]=0.0; - } - - for(i = 0;i < dim; i+=16){ - for (kk=0; kk<16; kk++){ - r[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(i = 0;i < dim; i+=16){ - for (kk=0; kk<16; kk++){ - z[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - mvm512_1024_2out(data+128, input, r, z,kvector,kmatrix,kout); - //for(i = 0;i < dim; i++) - //printf("r %f\n",r[i]); - //for(i = 0;i < dim; i++) - //printf("memory %f\n",memory[i]); - mvm1024_1024_2out(data+65664, memory,r, z,kvector,kmatrix,kout); - //for(i = 0;i < dim; i++) - //printf("r %f\n",r[i]); - - for(i = 0;i < dim; i++){ - r[i]=sigmoid(r[i]); - z[i]=sigmoid(z[i]); - - } - - mvm1024_1024(data+196736, memory,h_,kvector,kmatrix,kout); - //for(i = 0;i < dim; i++) - //printf("h1 %f\n",h_[i]); - for(i = 0;i < dim; i++) - h_[i]=r[i]*h_[i]; - mvm512_1024(data+262272, input,h_,kvector,kmatrix,kout); - //for(i = 0;i < dim; i++) - //printf("h %f\n",h_[i]); - index=295040; - for(i = 0;i < dim; i+=16){ - for (kk=0; kk<16; kk++){ - bx[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - - for(i = 0;i < dim; i++){ - h_[i] = tanh_hls(h_[i] + bx[i]); - output[i] =(1.0 - z[i])* h_[i] + z[i] * memory[i]; - } - - -} - - -void gru_layer_nl(ap_int<512> *data,float input[ctxdim], float output[dim],float memory[dim],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ - #pragma HLS INTERFACE m_axi port=data - int i,kk,index; - /*float z[dim];//update gate - float r[dim];//reset gate - float h_[dim];//new memory*/ - - index=0; - for(i = 0;i < dim; i+=16){ - for (kk=0; kk<16; kk++){ - r[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(i = 0;i < dim; i+=16){ - for (kk=0; kk<16; kk++){ - z[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(i = 0;i < dim; i+=16){ - for (kk=0; kk<16; kk++){ - h_[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - mvm2048_1024_2out(data+192, input, r, z,kvector,kmatrix,kout); - mvm1024_1024_2out(data+262336, memory,r, z,kvector,kmatrix,kout); - for(i = 0;i < dim; i++){ - r[i]=sigmoid(r[i]); - z[i]=sigmoid(z[i]); - - } - - mvm1024_1024(data+393408, memory,h_,kvector,kmatrix,kout); - for(i = 0;i < dim; i++) - h_[i]=r[i]*(h_[i]); - mvm2048_1024(data+458944, input,h_,kvector,kmatrix,kout); - for(i = 0;i < dim; i++){ -//#pragma HLS unroll factor = 16 - h_[i] = tanh_hls(h_[i]); - output[i] =(1.0 - z[i])* h_[i] + z[i] * memory[i]; - } -} - -void attention(ap_int<512> *data,float input_h1[dim], float input_f[MAXSTEP][dim], float input_b[MAXSTEP][dim], int stepsize, float output[ctxdim],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -//#pragma HLS inline off -//#pragma HLS inline region recursive -//#pragma HLS ALLOCATION instances=compute_engine64 limit=1 function -#pragma HLS INTERFACE m_axi port=data - int i,j,k,kk,index; - index=0; - float s; - float alpha[MAXSTEP]; - float temp[ctxdim]; - float decoder_b_att[ctxdim]; - float decoder_U_att[ctxdim]; - float decoder_c_tt; - -#pragma HLS ARRAY_PARTITION variable=decoder_U_att cyclic dim=1 factor = 16 -#pragma HLS ARRAY_PARTITION variable=decoder_b_att cyclic dim=1 factor = 16 - for(j=0; j< ctxdim; j++){ - output[j]=0.0; // clear output - - } - - for(i = 0;i < ctxdim; i+=16){ - for (kk=0; kk<16; kk++){ - decoder_b_att[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(i = 0;i < ctxdim; i+=16){ - for (kk=0; kk<16; kk++){ - decoder_U_att[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - decoder_c_tt=trans_i(data[256].range(31,0)); - for(i=0; i< MAXSTEP; i++){ - if(i<stepsize){ - for(j=0; j< ctxdim; j++) - temp[j]=decoder_b_att[j]; - mvm1024_2048(data+257, input_h1,temp,kvector,kmatrix,kout); - mvm1024_2048_2in(data+131329,input_f[i],input_b[stepsize-i-1],temp,kvector,kmatrix,kout); - for(j=0; j< ctxdim; j++){ -//#pragma HLS unroll factor = 8 - temp[j]=tanh_hls(temp[j]); - } - alpha[i] = 0.0; - for(j=0; j< ctxdim; j+=64){ -#pragma HLS pipeline - alpha[i] += compute_engine64(temp[j+0],decoder_U_att[j+0], - temp[j+1],decoder_U_att[j+1], - temp[j+2],decoder_U_att[j+2], - temp[j+3],decoder_U_att[j+3], - temp[j+4],decoder_U_att[j+4], - temp[j+5],decoder_U_att[j+5], - temp[j+6],decoder_U_att[j+6], - temp[j+7],decoder_U_att[j+7], - temp[j+8],decoder_U_att[j+8], - temp[j+9],decoder_U_att[j+9], - temp[j+10],decoder_U_att[j+10], - temp[j+11],decoder_U_att[j+11], - temp[j+12],decoder_U_att[j+12], - temp[j+13],decoder_U_att[j+13], - temp[j+14],decoder_U_att[j+14], - temp[j+15],decoder_U_att[j+15], - temp[j+16],decoder_U_att[j+16], - temp[j+17],decoder_U_att[j+17], - temp[j+18],decoder_U_att[j+18], - temp[j+19],decoder_U_att[j+19], - temp[j+20],decoder_U_att[j+20], - temp[j+21],decoder_U_att[j+21], - temp[j+22],decoder_U_att[j+22], - temp[j+23],decoder_U_att[j+23], - temp[j+24],decoder_U_att[j+24], - temp[j+25],decoder_U_att[j+25], - temp[j+26],decoder_U_att[j+26], - temp[j+27],decoder_U_att[j+27], - temp[j+28],decoder_U_att[j+28], - temp[j+29],decoder_U_att[j+29], - temp[j+30],decoder_U_att[j+30], - temp[j+31],decoder_U_att[j+31], - temp[j+32],decoder_U_att[j+32], - temp[j+33],decoder_U_att[j+33], - temp[j+34],decoder_U_att[j+34], - temp[j+35],decoder_U_att[j+35], - temp[j+36],decoder_U_att[j+36], - temp[j+37],decoder_U_att[j+37], - temp[j+38],decoder_U_att[j+38], - temp[j+39],decoder_U_att[j+39], - temp[j+40],decoder_U_att[j+40], - temp[j+41],decoder_U_att[j+41], - temp[j+42],decoder_U_att[j+42], - temp[j+43],decoder_U_att[j+43], - temp[j+44],decoder_U_att[j+44], - temp[j+45],decoder_U_att[j+45], - temp[j+46],decoder_U_att[j+46], - temp[j+47],decoder_U_att[j+47], - temp[j+48],decoder_U_att[j+48], - temp[j+49],decoder_U_att[j+49], - temp[j+50],decoder_U_att[j+50], - temp[j+51],decoder_U_att[j+51], - temp[j+52],decoder_U_att[j+52], - temp[j+53],decoder_U_att[j+53], - temp[j+54],decoder_U_att[j+54], - temp[j+55],decoder_U_att[j+55], - temp[j+56],decoder_U_att[j+56], - temp[j+57],decoder_U_att[j+57], - temp[j+58],decoder_U_att[j+58], - temp[j+59],decoder_U_att[j+59], - temp[j+60],decoder_U_att[j+60], - temp[j+61],decoder_U_att[j+61], - temp[j+62],decoder_U_att[j+62], - temp[j+63],decoder_U_att[j+63]); - } - alpha[i]= exp_hls(alpha[i] + decoder_c_tt); //alpha = exp(alpha) - } - } - s = 0.0; - for(i=0; i< MAXSTEP; i++){ - if(i<stepsize){ - s += alpha[i]; - } - } - for(i=0; i< MAXSTEP; i++){ // ctx = sum(alpha * hi) - if(i<stepsize){ - alpha[i] = alpha[i]/s; - //printf("alpha: %f \n", alpha[i]); - for(j =0; j < dim; j++){ - output[j] += alpha[i]* input_f[i][j]; - output[j+dim] += alpha[i]* input_b[stepsize-i-1][j]; - } - } - } -} -void logit_lstm(ap_int<512> *data,float input[dim], float output[dim_word],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ - #pragma HLS INTERFACE m_axi port=data - int i,kk; - int index=0; - for(i = 0;i < dim_word; i+=16){ - for (kk=0; kk<16; kk++){ - output[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - mvm1024_512(data+32, input,output,kvector,kmatrix,kout); - -} - -void logit_prev(ap_int<512> *data,float input[dim_word], float output[dim_word],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ - #pragma HLS INTERFACE m_axi port=data - int i,kk; - int index=0; - for(i = 0;i < dim_word; i+=16){ - for (kk=0; kk<16; kk++){ - output[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - mvm512_512(data+32, input,output,kvector,kmatrix,kout); - -} - -void logit_ctx(ap_int<512> *data,float input[ctxdim], float output[dim_word],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ - #pragma HLS INTERFACE m_axi port=data - int i,kk; - int index=0; - for(i = 0;i < dim_word; i+=16){ - for (kk=0; kk<16; kk++){ - output[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - mvm2048_512(data+32, input,output,kvector,kmatrix,kout); -} - -void logit(ap_int<512> *data,float input[dim_word], float output[n_words],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ - #pragma HLS INTERFACE m_axi port=data - int i,kk; - int index=0; - for(i = 0;i < n_words; i+=16){ - for (kk=0; kk<16; kk++){ - output[i+kk]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - mvm512_30000(data+1875, input,output,kvector,kmatrix,kout); -} - diff --git a/nmtdecoder/layers.h b/nmtdecoder/layers.h deleted file mode 100644 index 7e3b76d0bb60954fe4f8abd36d85c4ce7206c97a..0000000000000000000000000000000000000000 --- a/nmtdecoder/layers.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef LAYERS_H -#define LAYERS_H -#include "mvm.h" -#define dim_word 512 // word vector dimensionality -#define dim 1024 //the number of LSTM units -#define n_words_src 30000 // source vocabulary size -#define n_words 30000 // target vocabulary size -#define ctxdim 2*dim //context dimension - -#define MAXSTEP 50 //maximum encoder step -#define MAXSTEP_DE 50 // maximum decoder step - -#define MAX_WORD 1000 -#define MAX_SEN 100 - -#define K 5 //beam size - -float exp_hls(float in); - -/*sigmoid function*/ -float sigmoid(float x); -/*tanh function*/ -float tanh_hls(float x); - -/* -input: emb (n_timesteps, n_samples, dim_word) -output: h dim -*/ - -void gru_layer_base(ap_int<512> *data,float input[dim_word], float output[dim],float memory[dim],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -/* -input: ctxdim -output:dim -*/ -void gru_layer_nl(ap_int<512> *data,float input[ctxdim], float output[dim],float memory[dim],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -/* -input: all of the context -output:ctxdim -*/ -void attention(ap_int<512> *data,float input_h1[dim],float input_f[MAXSTEP][dim], float input_b[MAXSTEP][dim], int stepsize, float output[ctxdim],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); -/* -input:next_state - hidden states of the decoder gru -output:dim_word -*/ -void logit_lstm(ap_int<512> *data,float input[dim], float output[dim_word],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); -/* -input: emb -output:dim_word -*/ -void logit_prev(ap_int<512> *data,float input[dim_word], float output[dim_word],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); -/* -input:ctx weighted averages of context, generated by attention module -output:dim_word -*/ -void logit_ctx(ap_int<512> *data,float input[ctxdim], float output[dim_word],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); -/* -input:dim_word -output:n_words -*/ -void logit(ap_int<512> *data,float input[dim_word],float output[n_words],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -#endif diff --git a/nmtdecoder/mvm.cc b/nmtdecoder/mvm.cc deleted file mode 100644 index 97205970755def10c69a2c94f14d4f75d6c01ae7..0000000000000000000000000000000000000000 --- a/nmtdecoder/mvm.cc +++ /dev/null @@ -1,833 +0,0 @@ -#include "mvm.h" -#include <math.h> -//#include<stdio.h> - -float trans_i(ap_int<32> a){ - u temp; - temp.i=a; - return temp.f; - -} -void kernel(float w[K_SIZE][K_NUM], float x[K_SIZE],float out[K_NUM]){ -#pragma HLS ARRAY_PARTITION variable=w cyclic dim=1 factor = 64 -#pragma HLS ARRAY_PARTITION variable=w cyclic dim=2 factor = 2 -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 64 -#pragma HLS ARRAY_PARTITION variable=out cyclic dim=1 factor = 2 -//#pragma HLS ALLOCATION instances=compute_engine64 limit=2 function - - out[0]=compute_engine64(x[0],w[0][0], - x[1],w[1][0], - x[2],w[2][0], - x[3],w[3][0], - x[4],w[4][0], - x[5],w[5][0], - x[6],w[6][0], - x[7],w[7][0], - x[8],w[8][0], - x[9],w[9][0], - x[10],w[10][0], - x[11],w[11][0], - x[12],w[12][0], - x[13],w[13][0], - x[14],w[14][0], - x[15],w[15][0], - x[16],w[16][0], - x[17],w[17][0], - x[18],w[18][0], - x[19],w[19][0], - x[20],w[20][0], - x[21],w[21][0], - x[22],w[22][0], - x[23],w[23][0], - x[24],w[24][0], - x[25],w[25][0], - x[26],w[26][0], - x[27],w[27][0], - x[28],w[28][0], - x[29],w[29][0], - x[30],w[30][0], - x[31],w[31][0], - x[32],w[32][0], - x[33],w[33][0], - x[34],w[34][0], - x[35],w[35][0], - x[36],w[36][0], - x[37],w[37][0], - x[38],w[38][0], - x[39],w[39][0], - x[40],w[40][0], - x[41],w[41][0], - x[42],w[42][0], - x[43],w[43][0], - x[44],w[44][0], - x[45],w[45][0], - x[46],w[46][0], - x[47],w[47][0], - x[48],w[48][0], - x[49],w[49][0], - x[50],w[50][0], - x[51],w[51][0], - x[52],w[52][0], - x[53],w[53][0], - x[54],w[54][0], - x[55],w[55][0], - x[56],w[56][0], - x[57],w[57][0], - x[58],w[58][0], - x[59],w[59][0], - x[60],w[60][0], - x[61],w[61][0], - x[62],w[62][0], - x[63],w[63][0]); - out[1]=compute_engine64(x[0],w[0][1], - x[1],w[1][1], - x[2],w[2][1], - x[3],w[3][1], - x[4],w[4][1], - x[5],w[5][1], - x[6],w[6][1], - x[7],w[7][1], - x[8],w[8][1], - x[9],w[9][1], - x[10],w[10][1], - x[11],w[11][1], - x[12],w[12][1], - x[13],w[13][1], - x[14],w[14][1], - x[15],w[15][1], - x[16],w[16][1], - x[17],w[17][1], - x[18],w[18][1], - x[19],w[19][1], - x[20],w[20][1], - x[21],w[21][1], - x[22],w[22][1], - x[23],w[23][1], - x[24],w[24][1], - x[25],w[25][1], - x[26],w[26][1], - x[27],w[27][1], - x[28],w[28][1], - x[29],w[29][1], - x[30],w[30][1], - x[31],w[31][1], - x[32],w[32][1], - x[33],w[33][1], - x[34],w[34][1], - x[35],w[35][1], - x[36],w[36][1], - x[37],w[37][1], - x[38],w[38][1], - x[39],w[39][1], - x[40],w[40][1], - x[41],w[41][1], - x[42],w[42][1], - x[43],w[43][1], - x[44],w[44][1], - x[45],w[45][1], - x[46],w[46][1], - x[47],w[47][1], - x[48],w[48][1], - x[49],w[49][1], - x[50],w[50][1], - x[51],w[51][1], - x[52],w[52][1], - x[53],w[53][1], - x[54],w[54][1], - x[55],w[55][1], - x[56],w[56][1], - x[57],w[57][1], - x[58],w[58][1], - x[59],w[59][1], - x[60],w[60][1], - x[61],w[61][1], - x[62],w[62][1], - x[63],w[63][1]); - - -} - -void mvm512_512(ap_int<512> *data, float x[512],float mout[512],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<512;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<512;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } - -} - -void mvm512_1024(ap_int<512> *data, float x[512],float mout[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<512;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } - -} - - - -void mvm512_1024_2out(ap_int<512> *data, float x[512],float out1[1024],float out2[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=out1 cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=out2 cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<512;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - out1[l]+=kout[0]; - out1[l+1]+=kout[1]; - - } - } - for(k=0;k<512;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - out2[l]+=kout[0]; - out2[l+1]+=kout[1]; - - } - } - - -} - -void mvm512_30000(ap_int<512> *data, float x[512],float mout[30000],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<512;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<30000;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } -/* - for(k=0;k<512;k+=K_SIZE){ - - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - - for(l=29952; l< 30000;l+=K_NUM){ -#pragma HLS pipeline - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - - - } - */ -} - -void mvm1024_512(ap_int<512> *data, float x[1024],float mout[512],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<512;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - - } - -} - -void mvm1024_1024(ap_int<512> *data, float x[1024],float mout[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } - -} - -void mvm1024_1024_2out(ap_int<512> *data,float x[1024],float out1[1024], float out2[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=out1 cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=out2 cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - out1[l]+=kout[0]; - out1[l+1]+=kout[1]; - - } - //printf("kvec %f \n",kvector[1]); - //printf("kmatrix %f \n",kmatrix[10][1]); - //printf("kout %f \n",kout[1]); - } - - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - out2[l]+=kout[0]; - out2[l+1]+=kout[1]; - - } - - } - -} - -void mvm1024_1024_2in(ap_int<512> *data, float x1[1024],float x2[1024], float mout[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x1 cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=x2 cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x1[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - - } - - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x2[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - - } -} - -void mvm1024_2048(ap_int<512> *data, float x[1024],float mout[2048],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<2048;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } - -} - -void mvm1024_2048_2in(ap_int<512> *data, float x1[1024],float x2[1024], float mout[2048],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x1 cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=x2 cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x1[i+k]; - } - for(l=0;l<2048;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } - - for(k=0;k<1024;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x2[i+k]; - } - for(l=0;l<2048;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } -} -void mvm2048_512(ap_int<512> *data, float x[2048],float mout[512],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<2048;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<512;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } - -} - -void mvm2048_1024(ap_int<512> *data, float x[2048],float mout[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<2048;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } - -} - -void mvm2048_1024_2out(ap_int<512> *data, float x[2048],float out1[1024],float out2[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=out1 cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=out2 cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<2048;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - out1[l]+=kout[0]; - out1[l+1]+=kout[1]; - - } - } - - for(k=0;k<2048;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<1024;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - out2[l]+=kout[0]; - out2[l+1]+=kout[1]; - - } - } -} - -void mvm2048_2048(ap_int<512> *data, float x[2048],float mout[2048],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS INTERFACE m_axi port=data -#pragma HLS ARRAY_PARTITION variable=x cyclic dim=1 factor = 8 -#pragma HLS ARRAY_PARTITION variable=mout cyclic dim=1 factor = 8 - int i,j,k,l,kk; - int index=0; - for(k=0;k<2048;k+=K_SIZE){ - /*loading*/ - for(i=0; i< K_SIZE;i++){ - #pragma HLS UNROLL factor=8 - kvector[i]=x[i+k]; - } - for(l=0;l<2048;l+=K_NUM){ -#pragma HLS pipeline ii=8 - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][0]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - for(j=0;j< K_SIZE; j+=16){ - for (kk=0; kk<16; kk++){ - kmatrix[j+kk][1]=trans_i(data[index].range(kk*32+31,kk*32)); - } - index++; - } - - kernel(kmatrix, kvector,kout); - mout[l]+=kout[0]; - mout[l+1]+=kout[1]; - - } - } - -} diff --git a/nmtdecoder/mvm.h b/nmtdecoder/mvm.h deleted file mode 100644 index fc97d4dae37e6b9e4d725d689f3a7311806ae4c0..0000000000000000000000000000000000000000 --- a/nmtdecoder/mvm.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef MVM_H -#define MVM_H -#include "ce.h" -#define K_SIZE 64 -#define K_NUM 2 -#include "ap_fixed.h" -typedef union { - unsigned int i; - float f; - } u; - -float trans_i(ap_int<32> a); -void kernel(float w[K_SIZE][K_NUM], float x[K_SIZE],float out[K_NUM]); - -void mvm512_512(ap_int<512> *data, float x[512],float mout[512],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm512_1024(ap_int<512> *data, float x[512],float mout[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm512_1024_2out(ap_int<512> *data, float x[512],float out1[1024],float out2[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm512_30000(ap_int<512> *data, float x[512],float mout[30000],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm1024_512(ap_int<512> *data, float x[1024],float mout[512],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm1024_1024(ap_int<512> *data, float x[1024],float mout[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm1024_1024_2out(ap_int<512> *data, float x[1024],float out1[1024], float out2[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm1024_1024_2in(ap_int<512> *data, float x1[1024],float x2[1024], float mout[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm1024_2048(ap_int<512> *data, float x[1024],float mout[2048],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm1024_2048_2in(ap_int<512> *data, float x1[1024],float x2[1024], float mout[2048],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm2048_512(ap_int<512> *data, float x[2048],float mout[512],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm2048_1024(ap_int<512> *data, float x[2048],float mout[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm2048_1024_2out(ap_int<512> *data, float x[2048],float out1[1024],float out2[1024],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -void mvm2048_2048(ap_int<512> *data, float x[2048],float mout[2048],float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); - -#endif diff --git a/nmtdecoder/nmtdecoder.cc b/nmtdecoder/nmtdecoder.cc deleted file mode 100644 index 3ff608ec9c819937ea8139eb0f62865004706416..0000000000000000000000000000000000000000 --- a/nmtdecoder/nmtdecoder.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include "nmtdecoder.h" -//#include <stdio.h> - -void ff_group( - ap_int<512> *data, - float input1[dim], float output1[dim_word], - float input2[dim_word], float output2[dim_word], - float input3[ctxdim], float output3[dim_word], - float input[dim_word], float output[n_words], - float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]){ -#pragma HLS inline off -#pragma HLS inline region recursive -//#pragma HLS ALLOCATION instances=compute_engine64 limit=1 function - #pragma HLS INTERFACE m_axi port=data - int i ; - logit_lstm(data,input1, output1,kvector,kmatrix,kout); - - //printf("\nlstm %f\n",output1[10]); - logit_prev(data+32800,input2, output2,kvector,kmatrix,kout); - //printf("prev %f\n",output2[10]); - - logit_ctx(data+49216,input3, output3,kvector,kmatrix,kout); - //printf("ctx %f\n",output3[10]); - - for(i =0 ; i< dim_word; i++){ -//#pragma HLS unroll factor =16 - input[i] = tanh_hls((float)(output1[i]+ output2[i] +output3[i])); - } - - logit(data+114784, input, output,kvector,kmatrix,kout); - //printf("logit %f\n",output[10]); - }; - - -void nmtdecoder( - ap_int<512> *data, - int stepsize, - float encoder_output[MAXSTEP][dim], - float encoder_r_output[MAXSTEP][dim], - float memory[dim], - float de_input[dim_word], - float decoder_output[dim], - float logit_output[n_words], - float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM] - ){ - -#pragma HLS inline off -#pragma HLS inline region recursive -//#pragma HLS ALLOCATION instances=compute_engine64 limit=1 function - #pragma HLS INTERFACE m_axi port=data - float attention_output[ctxdim]; - float lstm_output[dim_word]; - float prev_output[dim_word]; - float ctx_output[dim_word]; - float decoder_output1[dim]; - float logit_input[dim_word]; - int i=0; - gru_layer_base(data,de_input,decoder_output1,memory,kvector,kmatrix,kout); - //printf("\ndecoder output 1 %f %f %f %f %f %f\n",decoder_output1[0],decoder_output1[50],decoder_output1[100],decoder_output1[500],decoder_output1[800],decoder_output1[1000]); - //for(i =0;i<1024;i++) - //printf("decoder output 1 %f ",decoder_output1[i]); - attention(data+295104,decoder_output1,encoder_output,encoder_r_output,stepsize,attention_output,kvector,kmatrix,kout); - //for(i =0;i<1024;i++) - //printf("Attention output %f ",attention_output[i]); - //printf("Attention output %f %f %f %f %f %f\n",attention_output[0],attention_output[100],attention_output[200],attention_output[1000],attention_output[1500],attention_output[2000]); - gru_layer_nl(data+688577,attention_output,decoder_output,decoder_output1,kvector,kmatrix,kout); - //printf("decoder output 2 %f %f %f %f %f %f\n",decoder_output[0],decoder_output[100],decoder_output[102],decoder_output[200],decoder_output[601],decoder_output[1023]); - ff_group(data+1278593,decoder_output, lstm_output,de_input, prev_output, - attention_output, ctx_output, - logit_input,logit_output,kvector,kmatrix,kout); - //printf("logit output %f %f %f %f %f %f\n",logit_output[0],logit_output[511],logit_output[10000],logit_output[15000],logit_output[20000],logit_output[29000]); - -} - - - diff --git a/nmtdecoder/nmtdecoder.h b/nmtdecoder/nmtdecoder.h deleted file mode 100644 index 24694d42203035437d491719c2f9c53062d90c93..0000000000000000000000000000000000000000 --- a/nmtdecoder/nmtdecoder.h +++ /dev/null @@ -1,24 +0,0 @@ - -#ifndef NMTDECODER_H -#define NMTDECODER_H -#include "layers.h" -void ff_group( - ap_int<512> *data, - float input1[dim], float output1[dim_word], - float input2[dim_word], float output2[dim_word], - float input3[ctxdim], float output3[dim_word], - float input[dim_word], float output[n_words], - float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM]); -void nmtdecoder( - ap_int<512> *data, - int stepsize, - float encoder_output[MAXSTEP][dim], - float encoder_r_output[MAXSTEP][dim], - float memory[dim], - float de_input[dim_word], - float decoder_output[dim], - float logit_output[n_words], - float kvector[K_SIZE],float kmatrix[K_SIZE][K_NUM],float kout[K_NUM] - ); - -#endif diff --git a/DRED_codegen/q_learning.py b/q_learning.py similarity index 100% rename from DRED_codegen/q_learning.py rename to q_learning.py diff --git a/DRED_codegen/q_learning_v2.py b/q_learning_v2.py similarity index 100% rename from DRED_codegen/q_learning_v2.py rename to q_learning_v2.py diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8340.data-00000-of-00001 b/saved_networks_q_learning/-dqn-8340.data-00000-of-00001 similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8340.data-00000-of-00001 rename to saved_networks_q_learning/-dqn-8340.data-00000-of-00001 diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8340.index b/saved_networks_q_learning/-dqn-8340.index similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8340.index rename to saved_networks_q_learning/-dqn-8340.index diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8340.meta b/saved_networks_q_learning/-dqn-8340.meta similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8340.meta rename to saved_networks_q_learning/-dqn-8340.meta diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8350.data-00000-of-00001 b/saved_networks_q_learning/-dqn-8350.data-00000-of-00001 similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8350.data-00000-of-00001 rename to saved_networks_q_learning/-dqn-8350.data-00000-of-00001 diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8350.index b/saved_networks_q_learning/-dqn-8350.index similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8350.index rename to saved_networks_q_learning/-dqn-8350.index diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8350.meta b/saved_networks_q_learning/-dqn-8350.meta similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8350.meta rename to saved_networks_q_learning/-dqn-8350.meta diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8360.data-00000-of-00001 b/saved_networks_q_learning/-dqn-8360.data-00000-of-00001 similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8360.data-00000-of-00001 rename to saved_networks_q_learning/-dqn-8360.data-00000-of-00001 diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8360.index b/saved_networks_q_learning/-dqn-8360.index similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8360.index rename to saved_networks_q_learning/-dqn-8360.index diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8360.meta b/saved_networks_q_learning/-dqn-8360.meta similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8360.meta rename to saved_networks_q_learning/-dqn-8360.meta diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8370.data-00000-of-00001 b/saved_networks_q_learning/-dqn-8370.data-00000-of-00001 similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8370.data-00000-of-00001 rename to saved_networks_q_learning/-dqn-8370.data-00000-of-00001 diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8370.index b/saved_networks_q_learning/-dqn-8370.index similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8370.index rename to saved_networks_q_learning/-dqn-8370.index diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8370.meta b/saved_networks_q_learning/-dqn-8370.meta similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8370.meta rename to saved_networks_q_learning/-dqn-8370.meta diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8380.data-00000-of-00001 b/saved_networks_q_learning/-dqn-8380.data-00000-of-00001 similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8380.data-00000-of-00001 rename to saved_networks_q_learning/-dqn-8380.data-00000-of-00001 diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8380.index b/saved_networks_q_learning/-dqn-8380.index similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8380.index rename to saved_networks_q_learning/-dqn-8380.index diff --git a/DRED_codegen/saved_networks_q_learning/-dqn-8380.meta b/saved_networks_q_learning/-dqn-8380.meta similarity index 100% rename from DRED_codegen/saved_networks_q_learning/-dqn-8380.meta rename to saved_networks_q_learning/-dqn-8380.meta diff --git a/DRED_codegen/saved_networks_q_learning/checkpoint b/saved_networks_q_learning/checkpoint similarity index 100% rename from DRED_codegen/saved_networks_q_learning/checkpoint rename to saved_networks_q_learning/checkpoint diff --git a/DRED_codegen/script.tcl b/script.tcl similarity index 100% rename from DRED_codegen/script.tcl rename to script.tcl diff --git a/DRED_codegen/synth.log b/synth.log similarity index 100% rename from DRED_codegen/synth.log rename to synth.log diff --git a/DRED_codegen/tempate.py b/tempate.py similarity index 100% rename from DRED_codegen/tempate.py rename to tempate.py diff --git a/DRED_codegen/templates/nmt.jinja b/templates/nmt.jinja similarity index 100% rename from DRED_codegen/templates/nmt.jinja rename to templates/nmt.jinja diff --git a/DRED_codegen/templates/tcl_script.jinja b/templates/tcl_script.jinja similarity index 100% rename from DRED_codegen/templates/tcl_script.jinja rename to templates/tcl_script.jinja diff --git a/DRED_codegen/vivado_hls.log b/vivado_hls.log similarity index 100% rename from DRED_codegen/vivado_hls.log rename to vivado_hls.log