diff --git a/paper/Abstract.tex b/paper/Abstract.tex deleted file mode 100644 index a7dfbccd8655747c38913a8f29ede45e646bc970..0000000000000000000000000000000000000000 --- a/paper/Abstract.tex +++ /dev/null @@ -1,35 +0,0 @@ -\begin{abstract} -% -Heterogeneous computing is widely used in the System-on-chip (SoC) processors -that power modern mobile devices in order to -reduce power consumption through specialization. -However, programming such systems can be extremely complex as a single -SoC combines multiple different -parallelism models, instruction sets, and memory hierarchies, and different -SoCs use \emph{different combinations} of these features. -We propose \NAME{}, a new Virtual Instruction Set Architecture (ISA) that aims to -address both functional portability and performance portability across -mobile heterogeneous SoCs by capturing the wide range of different -parallelism models expected to be available on future SoCs. -Our virtual ISA design uses only two parallelism models to achieve this goal: -\emph{a hierarchical dataflow graph with side effects} and -\emph{parametric vector instructions}. -\NAME{} is more general than existing ones that focus heavily on GPUs, -such as PTX, HSAIL and SPIR, e.g., it can capture both streaming pipelined -parallelism and general dataflow parallelism found in many custom and -semi-custom (programmable) accelerators. -We present a compilation strategy to generate code for a diverse range -of target hardware components from the common virtual ISA. -As a first prototype, we have implemented backends for -GPUs that use nVidia's PTX, -vector hardware using Intel's AVX, and -host code running on X86 processors. -Experimental results show that code generated for vectors and GPUs -from a single virtual ISA representation achieves -performance that is within about a factor of 2x of separately hand-tuned code, -and much closer in most cases. -We further demonstrate qualitatively using a realistic example -that our virtual ISA abstractions are also suited for capturing pipelining and -streaming parallelism. -% -\end{abstract} diff --git a/paper/Code/laplacian.ll b/paper/Code/laplacian.ll deleted file mode 100644 index e6265609649e1df45b1e226fa9d022c486d1c877..0000000000000000000000000000000000000000 --- a/paper/Code/laplacian.ll +++ /dev/null @@ -1,29 +0,0 @@ -define {float*, i64} @laplacian(float* in %I, i64 %sizeI, float* in %B, i64 %sizeB, i32 %dimX, i32 %dimY) { - ; Create dataflow nodes in child graph - %erode_node = call i8* @llvm.visc.createNode(@erode) - %dilate_node = call i8* @llvm.visc.createNode(@dilate) - %lincomb_node = call i8* @llvm.visc.createNode2D(@lincomb, i32 %dimX, i32 %dimY) - ; Bind inputs of parent node Laplacian with child nodes Dilate, Erode and lincomb - call void @llvm.visc.bind.input(i8* %dilate_node, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %dilate_node, i32 1, i32 1) - call void @llvm.visc.bind.input(i8* %dilate_node, i32 2, i32 2) - call void @llvm.visc.bind.input(i8* %dilate_node, i32 3, i32 3) - call void @llvm.visc.bind.input(i8* %erode_node, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %erode_node, i32 1, i32 1) - call void @llvm.visc.bind.input(i8* %erode_node, i32 2, i32 2) - call void @llvm.visc.bind.input(i8* %erode_node, i32 3, i32 3) - call void @llvm.visc.bind.input(i8* %lincomb_node, i32 0, i32 0) - call void @llvm.visc.bind.input(i8* %lincomb_node, i32 1, i32 1) - call void @llvm.visc.bind.input(i8* %lincomb_node, i32 2, i32 6) - call void @llvm.visc.bind.input(i8* %lincomb_node, i32 3, i32 7) - ; Create edges between child nodes for sending output of Erode and Dilate to lincomb node - call void @llvm.visc.createEdge(i8* %dilate_node, i8* %lincomb_node, i32 0, i32 2) - call void @llvm.visc.createEdge(i8* %dilate_node, i8* %lincomb_node, i32 1, i32 3) - call void @llvm.visc.createEdge(i8* %erode_node, i8* %lincomb_node, i32 0, i32 4) - call void @llvm.visc.createEdge(i8* %erode_node, i8* %lincomb_node, i32 1, i32 5) - ; Bind output of lincomb node with output of parent node Laplacian - call void @llvm.visc.bind.output(i8* %lincomb_node, i32 0, i32 0) - call void @llvm.visc.bind.output(i8* %lincomb_node, i32 1, i32 1) - ret {float*, i64} zeroinitializer -} - diff --git a/paper/Code/lincomb.ll b/paper/Code/lincomb.ll deleted file mode 100644 index 732245776af04c13194ca98dad0fe3e5aee1586f..0000000000000000000000000000000000000000 --- a/paper/Code/lincomb.ll +++ /dev/null @@ -1,21 +0,0 @@ -define void @lincomb(double* I, double* Ie, double* Id) { - %N = call i8* @llvm.visc.getNode() - %nidx = call i32 @llvm.visc.getNodeInstanceID.x(i8* %N) - %nidy = call i32 @llvm.visc.getNodeInstanceID.y(i8* %N) - %vl = call i32 @llvm.visc.getVectorLength(i32 8) - ;Index and base address calculation using %nidx, %nidy - ;for I, Id, Ie, L (not shown) - %pixel_I = load <%vl x double>* I_base - %pixel_Id = load <%vl x double>* Id_base - %pixel_Ie = load <%vl x double>* Ie_base - %tmp = insertelement <%vl x double> undef, - double 2.0, i32 0 - %vec2 = shufflevector <%vl x double> %tmp, - <%vl x double> undef, - <%vl x i32> zeroinitializer - %mul = mul <%vl x double> %vec2, %pixel_I - %add = add <%vl x double> %pixel_Id, %pixel_Ie - %res = sub <%vl x double> %add, %mul - store <%vl x double> %res, <%vl x double>* L_base - ret void -} diff --git a/paper/Code/main.cc b/paper/Code/main.cc deleted file mode 100644 index 23d2950c2af088ec3397cbe19f01ef22ab04a8eb..0000000000000000000000000000000000000000 --- a/paper/Code/main.cc +++ /dev/null @@ -1,212 +0,0 @@ -/*************************************************************************** - *cr - *cr (C) Copyright 2010 The Board of Trustees of the - *cr University of Illinois - *cr All Rights Reserved - *cr - ***************************************************************************/ - -/* - * Main entry of dense matrix-matrix multiplication kernel - */ - -#include <stdio.h> -#include <math.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <malloc.h> -#include <vector> -#include <iostream> -#include <cassert> -#include <CL/cl.h> -#include <parboil.h> - -// I/O routines -extern bool readColMajorMatrixFile(const char *fn, int &nr_row, int &nr_col, std::vector<float>&v); -extern bool writeColMajorMatrixFile(const char *fn, int, int, std::vector<float>&); -extern char* readFile(const char*); - -// Parameters of tile sizes -#define TILE_SZ 16 - -#define CHECK_ERROR(errorMessage) \ - if(clStatus != CL_SUCCESS) \ - { \ - std::cout<< errorMessage <<": "<< clStatus <<" Error!\n"; \ - std::cout<<"Line: "<<__LINE__<<"\n"; \ - exit(1); \ - } - -void basicSgemm( int m, int n, cl_mem A, cl_mem B, cl_mem C, cl_kernel clKernel, cl_command_queue clCommandQueue ) -{ - // In this code we assume the matrix sizes are multiple of tile size - if ((m%TILE_SZ) || (n%TILE_SZ)) { - std::cerr << "unsupported size of matrix. m should be multiple of " << TILE_SZ - << "; n should be multiple of " << TILE_SZ << std::endl; - } - -//#ifdef ROWM - //size_t db = m; - //size_t dg = (m*n); -//#else - //size_t db = n; - //size_t dg = (m*n); -//#endif -#ifdef ROWM - size_t dg = m; -#else - size_t dg = n; -#endif - cl_int clStatus; - //std::cout << "Block dim = " << db << ", Group dim = " << dg/db << "\n"; - clStatus = clSetKernelArg(clKernel,0,sizeof(cl_mem),(void*)&A); - clStatus = clSetKernelArg(clKernel,1,sizeof(cl_mem),(void*)&B); - clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&C); - clStatus = clSetKernelArg(clKernel,3,sizeof(int),(void*)&m); - clStatus = clSetKernelArg(clKernel,4,sizeof(int),(void*)&n); - CHECK_ERROR("clSetKernelArg") - - //clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&dg,&db,0,NULL,NULL); - clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&dg,NULL,0,NULL,NULL); - CHECK_ERROR("clEnqueueNDRangeKernel") - - clStatus = clFinish(clCommandQueue); - CHECK_ERROR("clFinish") -} - -int main (int argc, char *argv[]) { - - struct pb_Parameters *params; - struct pb_TimerSet timers; - - size_t A_sz, B_sz, C_sz; - int matArow, matAcol; - int matBrow, matBcol; - std::vector<float> matA, matB; - - - /* Read command line. Expect 3 inputs: A, B and B^T - in column-major layout*/ - params = pb_ReadParameters(&argc, argv); - if ((params->inpFiles[0] == NULL) - || (params->inpFiles[1] == NULL) - || (params->inpFiles[2] != NULL)) - { - fprintf(stderr, "Expecting three input filenames\n"); - exit(-1); - } - - /* Read in data */ - // load A - readColMajorMatrixFile(params->inpFiles[0], - matArow, matAcol, matA); - // load B^T - readColMajorMatrixFile(params->inpFiles[1], - matBrow, matBcol, matB); - - assert(matArow == matBrow && matAcol == matBcol && "Dimensions of two input matrices should match"); - pb_InitializeTimerSet(&timers); - - pb_SwitchToTimer(&timers, visc_TimerID_SETUP); - cl_int clStatus; - cl_platform_id clPlatform; - clStatus = clGetPlatformIDs(1,&clPlatform,NULL); - CHECK_ERROR("clGetPlatformIDs") - - cl_context_properties clCps[3] = {CL_CONTEXT_PLATFORM,(cl_context_properties)clPlatform,0}; - cl_context clContext = clCreateContextFromType(clCps,CL_DEVICE_TYPE_GPU,NULL,NULL,&clStatus); - CHECK_ERROR("clCreateContextFromType") - - cl_device_id clDevice; - clStatus = clGetDeviceIDs(clPlatform,CL_DEVICE_TYPE_GPU,1,&clDevice,NULL); - CHECK_ERROR("clGetDeviceIDs") - - cl_command_queue clCommandQueue = clCreateCommandQueue(clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus); - CHECK_ERROR("clCreateCommandQueue") - - pb_SetOpenCL(&clContext, &clCommandQueue); - - // const char* clSource[] = {readFile("src/opencl_base/kernel_offline.nvptx.s")}; - // cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); - cl_kernel clKernel; - cl_program clProgram; - pb_CreateAndBuildKernelFromBinary("build/opencl_default/kernel_offline.nvptx.s", "matAdd", &clContext, &clDevice, &clProgram, &clKernel); - //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); - //CHECK_ERROR("clCreateProgramWithSource") - - //char clOptions[50]; - //sprintf(clOptions,""); - - //clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL); - //CHECK_ERROR("clBuildProgram") - - //cl_kernel clKernel = clCreateKernel(clProgram,"mysgemmNT",&clStatus); - //CHECK_ERROR("clCreateKernel") - - pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE ); - // copy A to device memory - A_sz = matArow*matAcol*sizeof(float); - B_sz = matBrow*matBcol*sizeof(float); - - // allocate space for C - C_sz = matArow*matBcol*sizeof(float); - - // OpenCL memory allocation - std::vector<float> matC(matArow*matBcol); - - pb_SwitchToTimer( &timers, pb_TimerID_COPY ); - cl_mem dA = clCreateBuffer(clContext,CL_MEM_READ_ONLY,A_sz,NULL,&clStatus); - CHECK_ERROR("clCreateBuffer") - cl_mem dB = clCreateBuffer(clContext,CL_MEM_READ_ONLY,B_sz,NULL,&clStatus); - CHECK_ERROR("clCreateBuffer") - cl_mem dC = clCreateBuffer(clContext,CL_MEM_WRITE_ONLY,C_sz,NULL,&clStatus); - CHECK_ERROR("clCreateBuffer") - - // Copy A and B^T into device memory - clStatus = clEnqueueWriteBuffer(clCommandQueue,dA,CL_FALSE,0,A_sz,&matA.front(),0,NULL,NULL); - CHECK_ERROR("clEnqueueWriteBuffer") - clStatus = clEnqueueWriteBuffer(clCommandQueue,dB,CL_FALSE,0,B_sz,&matB.front(),0,NULL,NULL); - CHECK_ERROR("clEnqueueWriteBuffer") - - clStatus = clEnqueueWriteBuffer(clCommandQueue,dC,CL_TRUE,0,C_sz,&matC.front(),0,NULL,NULL); - CHECK_ERROR("clEnqueueWriteBuffer") - - pb_SwitchToTimer( &timers, pb_TimerID_KERNEL ); - - // Use standard sgemm interface - basicSgemm(matArow, matAcol, dA, dB, dC, clKernel, clCommandQueue); - - pb_SwitchToTimer( &timers, pb_TimerID_COPY ); - clEnqueueReadBuffer(clCommandQueue,dC,CL_TRUE,0,C_sz,&matC.front(),0,NULL,NULL); - - pb_SwitchToTimer( &timers, visc_TimerID_SETUP); - clStatus = clReleaseKernel(clKernel); - clStatus = clReleaseProgram(clProgram); - clStatus = clReleaseMemObject(dA); - clStatus = clReleaseMemObject(dB); - clStatus = clReleaseMemObject(dC); - clStatus = clReleaseCommandQueue(clCommandQueue); - clStatus = clReleaseContext(clContext); - - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - pb_PrintTimerSet(&timers); - - if (params->outFile) { - - /* Write C to file */ - //pb_SwitchToTimer(&timers, pb_TimerID_IO); - writeColMajorMatrixFile(params->outFile, - matArow, matBcol, matC); - } - - - double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - std::cout<< "GFLOPs = " << 2.* matArow * matBcol * matAcol/GPUtime/1e9 << std::endl; - pb_FreeParameters(params); - - //free((void*)clSource[0]); - - - return 0; -} diff --git a/paper/Code/vecAddC.cl b/paper/Code/vecAddC.cl deleted file mode 100644 index e25a02240f123dce71b1f395f25c6049b0a5d787..0000000000000000000000000000000000000000 --- a/paper/Code/vecAddC.cl +++ /dev/null @@ -1,20 +0,0 @@ -__kernel void matAdd( __global float *A, __global float *B, __global float* C, int m, int n ) { - int j = get_global_id(0); - for(int i=0; i < m; i++) - C[i*n+j] = A[i*n+j] + B[i*n+j]; -} - -void basicSgemm( int m, int n, cl_mem A, cl_mem B, cl_mem C, cl_kernel clKernel, cl_command_queue clCommandQueue ) -{ - size_t global_work_group = n; - - clSetKernelArg(clKernel,0,sizeof(cl_mem),(void*)&A); - clSetKernelArg(clKernel,1,sizeof(cl_mem),(void*)&B); - clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&C); - clSetKernelArg(clKernel,3,sizeof(int),(void*)&m); - clSetKernelArg(clKernel,4,sizeof(int),(void*)&n); - - clEnqueueNDRangeKernel(clCommandQueue, clKernel, 1, NULL, &global_work_group, NULL, 0, NULL, NULL); - - clFinish(clCommandQueue); -} diff --git a/paper/Code/vecAddR.cl b/paper/Code/vecAddR.cl deleted file mode 100644 index dbf1629da4f9e3bcc65431c212f71b23402da50e..0000000000000000000000000000000000000000 --- a/paper/Code/vecAddR.cl +++ /dev/null @@ -1,21 +0,0 @@ -__kernel void matAdd( __global float *A, __global float *B, __global float* C, int m, int n ) { - int i = get_global_id(0); - for(int j=0; j < n; j++) - C[i*n+j] = A[i*n+j] + B[i*n+j]; -} - -void basicSgemm( int m, int n, cl_mem A, cl_mem B, cl_mem C, cl_kernel clKernel, cl_command_queue clCommandQueue ) -{ - size_t global_work_group = m; - - clSetKernelArg(clKernel,0,sizeof(cl_mem),(void*)&A); - clSetKernelArg(clKernel,1,sizeof(cl_mem),(void*)&B); - clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&C); - clSetKernelArg(clKernel,3,sizeof(int),(void*)&m); - clSetKernelArg(clKernel,4,sizeof(int),(void*)&n); - - clEnqueueNDRangeKernel(clCommandQueue, clKernel, 1, NULL, - &global_work_group, NULL, 0, NULL, NULL); - - clFinish(clCommandQueue); -} diff --git a/paper/Compilation.tex b/paper/Compilation.tex deleted file mode 100644 index 4d09e5d317711a9ab095b258f2443d21aaecfa89..0000000000000000000000000000000000000000 --- a/paper/Compilation.tex +++ /dev/null @@ -1,342 +0,0 @@ -%------------------------------------------------------------------------------ -\section{Compilation Strategy} -\label{sec:compiler} -%------------------------------------------------------------------------------ - -The goal of our compilation strategy is to generate native code from a single -virtual ISA format, allowing parts of an application to map flexibly to -different compute units. Our goal, in this paper, is not to develop new -optimization techniques on this virtual ISA; we are developing -those techniques in our ongoing research. -In this paper, we show how the virtual ISA design lends itself to -be compiled piecewise to different hardware compute units. - -%\begin{center} -%\begin{figure}[hbt] -%\centering -%%\hspace*{4ex} -%\includegraphics[height=4cm]{Figures/compilation-short.png} -%\caption{\footnotesize{Compilation Flow of virtual ISA program}} -%\label{fig:compilation} -%\end{figure} -%\end{center} - -We use simple annotations on the node functions to specify to which compute -unit a given graph node should be translated, e.g., the annotation may specify -one or more of \{{\tt GPU, Vector, None}\}. -% -Typically, the annotations would be chosen by a language front-end, -a programmer, or (in future) a run-time scheduler that decided when a -new version of native code was needed for a given subgraph. -% -If an entire hierarchical graph will be compiled as a single kernel mapped -to a single compute unit, then only the parent node of that graph needs to -be annotated. -% -The compiler will generate code for each such graph using the compilation -flow described below. - -Device-specific ``translators'' use this information to generate native -code for a particular compute unit. Once mapping -of nodes to different hardware components is done, the code generation for -transfer of data between corresponding hardware components is generated. -% -In future, virtual ISA compilers can allow more flexible mapping -by generating native code for multiple targets for the same subgraph, and -relying on the runtime and scheduler to perform data transfers when mapping of -source and destination nodes of a dataflow edge are known at runtime. - -Our current compilation strategy does not support cycles in a dataflow graph, -although loops within leaf nodes present no problems. -% -Outer-level cycles must be expressed in the host code outside the dataflow -graphs, as we do for iterative algorithms (like {\tt stencil}) and streaming -computations (like the image processing pipeline described in Section~\ref{sec:evaluation:streaming}). - -%------------------------------------------------------------------------------ -\subsection{Compilation Flow} -\label{sec:compiler:flow} -%------------------------------------------------------------------------------ - -The compilation flow for a virtual ISA program can be divided into three -phases: -% -(1) Mapping and code generation of distinct subgraphs to hardware -accelerators, specifically, compute code for the annotated nodes. -% -(2) Calls to the run-time library for data movement for the DFG edges. -% -(3) Generating sequential code for the remaining unmapped parts of the graph. -% -%What about {\tt launch/wait} intrinsic code generation. -% -The latter phase -- sequential code -- is straightforward and is only -briefly described in Section~\ref{sec:compiler:impl}. -% -The other two phases are described below. - -The translation to native code is carried out for one annotated node -at a time. -The compilation requires traversal of the -dataflow graph to find the annotated nodes and to translate each of them into -native code for the selected compute unit. -% -We use Algorithm~\ref{algo:traversal} to traverse the hierarchical graph -at find the annotated nodes. -% -This algorithm is a simple depth-first traversal of the graph, translating -each annotated node as it is encountered, as described below. -% -The edges in the hierarchical graph -between nodes belonging to the same child graph express dataflow edges that -require run-time support for the data transfers. - -\begin{algorithm} - \caption{Hierarchical Dataflow Graph Traversal} - \label{algo:traversal} - \begin{algorithmic}[1.] - \Procedure{Visit}{Node $N$} - \If{$N$ was visited before} - return - \EndIf - \If{$N$ is an annotated node} - \State \textit{NN} = CollapseToLeaf($N$) - \State Translate(\textit{NN}) - \Else \Comment{$N$ is an internal node} - \State $G \gets$ child graph of node $N$ - \State $L \gets$ list of all nodes of $G$ in topological order - \While {$L$ is non-empty} - \State remove a node $n$ from $L$ - \State $\textsc{Visit}(n)$ - \EndWhile - \EndIf - \EndProcedure - \end{algorithmic} -\end{algorithm} - -%------------------------------------------------------------------------------ -\subsubsection{Mapping Subgraphs to Accelerators} -\label{sec:compiler:mapping} -%------------------------------------------------------------------------------ - -The annotations described earlier identify distinct subgraphs that should be -mapped to specific compute units. -For example, the subgraph containing -{\tt Laplacian} node in Figure~\ref{fig:designexample} expresses parallelism -well suited for a GPU, and assuming it is marked as such, -the GPU translator would translate it for execution on an available GPU. -% -It would first collapse the hierarchical graph at the node, $N$, into a single -leaf node, \textit{NN}, and then translate node \textit{NN} to the specified -compute unit. -% -Collapsing a graph into a single node is conceptually straightforward, though -it involves many steps, and the details are omitted here. -% -To translate the leaf node, the translator isolates the functions -associated with the node into a separate LLVM module and generates native -code for it. -% -The specific details of the translation are implementation specific, and are -described below in Section~\ref{sec:compiler:impl}. -% -The final result of this phase is a new -graph where all leaf nodes have been translated for execution on -individual compute units. - -%------------------------------------------------------------------------------ -\subsubsection{Data Movement and Internal Nodes' Code Generation} -\label{sec:compiler:datamovement} -%------------------------------------------------------------------------------ - -The input to this phase is a graph where all leaf nodes have been mapped to -hardware accelerators and contain target specific code. The compiler performs -code generation of all the internal nodes of this graph, and for dataflow edges -between nodes. The child graph of any internal node is traversed in -topological order and function calls are inserted to the corresponding leaf -node. -For CPU code (e.g., targeting vector hardware), -loops are inserted around a function call if a static child node maps to -multiple instances in the dynamic dataflow graph. - -For data flow edges where the -source and destination node execute on the same compute unit, or if -they execute on two different compute units that share memory, -passing a pointer between the nodes would be enough. -Such pointer passing is safe even with copy semantics because -a dataflow edge implies that the source node must have -\emph{completed} execution before the sink node can begin, so the -source code will not overwrite the data once the sink node begins execution. -% -However, several -accelerators today have separate memory hierarchy and data needs to be -explicitly brought into the accelerator memory before starting the execution. In -such cases explicit data copy instructions are generated using calls to the -accelerator API. For example, we use OpenCL API calls to move data to -and from the GPU. - -%------------------------------------------------------------------------------ -\subsection{Implementation} -\label{sec:compiler:impl} -%------------------------------------------------------------------------------ - -Our current compiler has functional translators -for compiling the \NAME{} virtual ISA to PTX, -AVX and host code for x86-64 (host code should also work for other -architectures for which an LLVM backend and the OpenCL run-time are available). -To reduce implementation effort -for our prototype, we leverage existing backends in the mainline LLVM -infrastructure for PTX (the open source NVPTX back end) and for AVX -(the LLVM-to-SPIR back-end with Intel's OpenCL SPIR-to-AVX translator). -Our implementation then mainly has to translate our -virtual ISA to the input code expected by each of these back-ends. - -%------------------------------------------------------------------------------ -\subsubsection{Translators} -\label{sec:compiler:translators} -%------------------------------------------------------------------------------ - -Our PTX translator takes the subgraph -where an internal node has a single leaf node in its child graph, which is -replicated into several dynamic instances. The PTX translator generates NVVM -IR~\cite{NVVM:URL} for the leaf node. -NVVM IR is a subset of the LLVM IR, together with a set of intrinsic functions, -which the open source NVPTX backend can translate -into PTX~\cite{PTX2.3Manual:URL} assembly. -For the internal node, our translator generates code -to load and run the PTX assembly of the leaf node on -the target nVidia GPU using the -nVidia OpenCL runtime to execute the internal node. - -In a similar fashion, our -AVX translator generates SPIR~\cite{SPIRKhronosSpec} code for the leaf node and -uses the Intel OpenCL~\cite{IntelOpenCL:URL} runtime to execute it on -multicore CPUs supporting AVX extensions. -The Intel SPIR translator to AVX has significant autovectorization capabilities -that take advantage of the independence of SPIR kernel instances to produce -vector code. -Note that it is reasonable for us to reuse Intel's vectorizor instead of -writing our own because our goal is \emph{not} to invent new vectorization and -vector code generation technology: rather, our goal is to -show that the \NAME{} virtual ISA is a suitable -input code representation for enabling effective vectorization, which we can -accomplish by feeding Intel's SPIR translator from our virtual ISA. - -OpenCL does not allow dynamic memory allocation inside the kernel. As a result, -dataflow nodes which perform dynamic memory allocation cannot be compiled for -GPUs. For nodes generating a data array as output, pointers to pre-allocated -arrays are passed as inputs to a node. Thus, pointer arguments to a node can be -pointers to both input or output data array. The general idiom we use to pass -arrays is to provide a pointer to the array and the array size as arguments. - -To differentiate between pointers to -input/output data arrays, we add attributes {\tt in}, {\tt out}, and {\tt inout} -to node arguments as shown for input pointer $I$ in Listing~\ref{lst:laplacian}. -These attributes enable us to avoid extra memory copies, when executing on GPUs. -For example, in the iterative \texttt{stencil} benchmark, the main kernel is -executed a fairly large number of times, and only one of the two arrays it -operates needs to be copied back to the host every time and the other one is -then copied back from host to the GPU. -By marking one of the array arguments as {\tt in} and the other one as -{\tt out}, we avoid the extra copy in each direction. - -%------------------------------------------------------------------------------ -\subsubsection{{Launch/Wait} Intrinsic Code Generation} -\label{sec:compiler:flow} -%------------------------------------------------------------------------------ - -The {\tt launch} intrinsic is used to asynchronously start a dataflow -graph execution from host code. The {\tt wait} intrinsic blocks until the -dataflow graph execution is complete. The compiler replaces the {\tt launch} -intrinsic with a runtime API call to start the dataflow graph execution in a -new thread, using the Posix pthreads library. -The {\tt wait} intrinsic is implemented using {\tt pthread\_join}. - -%------------------------------------------------------------------------------ -\subsubsection{\NAME{} Runtime} -\label{sec:compiler:flow} -%------------------------------------------------------------------------------ - -Previous subsections describe the static code generation of key features of the -virtual ISA. Two specific features, however, require runtime support. - -First, the virtual ISA design allows a leaf node to query node instance and dimension -queries to any ancestor. When such a query can be addressed by hardware -registers, the query intrinsic is replaced by the corresponding accelerator API -call. However, when it is not supported, the runtime maintains a stack to keep -track of the instance ID, and dimension limit of the dynamic instance of the -ancestors and responds when a query arrives. - -Second, the dataflow graph semantics of the virtual ISA assumes a globally -addressable memory model. However, in the present form, many -accelerators present in a SoC do not support this model. For example, many of -today's GPUs cannot address CPU memory directly (although this capability is -emerging and may be more common in future). -In such a scenario, the data has to be -explicitly transferred to the accelerator memory before one initiates -computation on the accelerator. To perform these data transfers, the -translator inserts static API -calls to the accelerator runtime in the generated native binary. These -data copies are expensive and critical to application performance. It may -happen that such a copy is unnecessary because the data is already present on -the device. This would happen because the data was brought in the device memory -by a prior node executing on the device. Thus, as an optimization, the \NAME{} -runtime incorporates a feature we call the ``memory tracker,'' -which keeps track of the latest copy of data arrays to avoid unnecessary -copies to and from the accelerator. - - -%\label{sec:compiler:impl} We implement the compilation strategy as a -%series of LLVM passes. We describe a few key passes in the current compiler. - -%\todo[inline]{Optimizations??} -%The compilation process preserves some key invariants: - -%\begin{enumerate} -%\item Every kernel is generated from a distinct subgraph in the dataflow graph hierarchy. -%\item After Codegen: -%\begin{itemize} - %\item Every leaf node in the hierarchy is represented by one or more kernels - %\item An internal node never becomes a kernel; instead, it is evaluated at runtime to - %instantiate the child subgraphs. -%\end{itemize} -%\end{enumerate} - -%The \NAME{} compilation flow can be broken down into the following steps - - -%\begin{enumerate} -%\item The front end would parse the source files and generate the hierarchical -%dataflow graph (called DFG, represented through intrinsics in LLVM IR) along -%with LLVM IR for the leaf nodes. -%\item The Graph Builder Pass would construct the internal representation for the -%static dataflow flow graph by parsing -%the DFG intrinsics. -%\item Other passes operate on and optimize the LLVM IR. -%\item The code generator uses the static dataflow graph to -%\begin{itemize} - %\item identify subgraphs which can be mapped to one or more available compute units - %(GPUs, DSPs, vectors) efficiently. - %\item generate code to assist the scheduler in dispatching these kernels to the - %appropriate compute units. One subgraph may be dispatched to multiple compute units. - %\item The backends generate the target specific code for the appropriate compute units. - %\item Finally, DFG runtime calls perform runtime - %scheduling and execute the target-specific kernel codes. -%\end{itemize} -%\end{enumerate} - -%Currenlty, we use modified Clang to generate vitual ISA binary from OpenCL -%source code. We have implemented compiler backends for translation of virtual -%ISA code to native code for selected target backends. - -%\begin{enumerate} -%\item DFG2LLVM\_NVPTX backend pass: This backend pass identifies the subgraph suitable -%for computation on an nVidia GPU and produces NVVM IR, which the LLVM NVPTX backend -%can translate to PTX assembly. -%\item DFG2LLVM\_SPIR backend pass: This pass is similar to the DFG2LLVM\_NVPTX pass, -%except that instead of the NVVM IR, it produces the SPIR binary. It generates -%code to invoke the Intel OpenCL runtime run the SPIR binary using Intel AVX SIMD -%instructions. -%\item DFG2LLVM\_X86 backend pass: This pass can generate single threaded x86 -%source code for any remaining node in the dataflow graph and also generates code -%for launching the dataflow graph from host. -%\end{enumerate} diff --git a/paper/Conclusion.tex b/paper/Conclusion.tex deleted file mode 100644 index 26df10b9639e3a12d5edd76717c7dae2d65a5900..0000000000000000000000000000000000000000 --- a/paper/Conclusion.tex +++ /dev/null @@ -1,22 +0,0 @@ -\section{Conclusion} -\label{sec:conclusion} - -We present \NAME{}, a new Virtual ISA that aims to address the functional and -performance portability challenges arising in today's SoC's. \NAME{} is designed -as a hierarchical dataflow graph with side effects and parametric vector -instructions. We argue that these two models of parallelism exposed by \NAME{} -successfully capture the diverse parallelism models exposed by a wide range of -parallel hardware. We also presented a compilation strategy -that uses a single object code to target a wide range of parallel hardware, and -implemented backend translators for nVidia's GPUs targeting PTX, vector -hardware using Intel's AVX, and host code for X86 processors. - -We evaluate our design by (a) using a single \NAME{} representation of four -applications from the Parboil Benchmark Suite to generate code for both nVidia's -GPUs and vector hardware, and comparing with baselines that are each seperately -tuned for their respective target device. The achieved performance is within a -factor of 2x at the worst case, demonstrating the achieved performance -portability from a single \NAME{} representation, and (b) demonstrating that -\NAME{} can naturally capture streaming parallelism due to its dataflow -representation. - diff --git a/paper/DesignGoals.tex b/paper/DesignGoals.tex deleted file mode 100644 index 4d1286d9851d03e2e823f27b7e7b340221c12115..0000000000000000000000000000000000000000 --- a/paper/DesignGoals.tex +++ /dev/null @@ -1,91 +0,0 @@ -%------------------------------------------------------------------------------ -\section{Virtual ISA Design Goals} -\label{section:goals} -%------------------------------------------------------------------------------ - -Previous work~\cite{PTX2.3Manual:URL,VectorLLVA:VEE06} has shown that the approach of a -virtual ISA can achieve both high performance and be commercially viable. -In this work, we aim to design a virtual ISA for the wide range of parallel -hardware configurations found in current and future mobile SoCs. -We briefly summarize the primary design goals of our virtual ISA: -% -\begin{description} -% -\item{\bf Object code portability with as good performance as possible:} -% -The key goal of our virtual ISA design is to enable the \emph{virtual object -code} to be portable across a wide range of different configurations of -heterogeneous parallel SoCs, while obtaining as good performance as possible -on each compute unit. -% -We emphasize that we do not necessarily aim to match manually tuned -code for individual compute units because such tuning usually comes at the -cost of portability, or at the cost of hurting performance on other compute -units. -% -Object code portability is an absolute requirement for modern -applications running on mobile hardware. -% -Applications that absolutely require hand-tuned performance can already use -conditionally compiled code or \emph{fat binaries} or both to achieve such -performance, at the cost of significantly greater programming and maintenance -effort. - -\item{\bf Language independence:} The virtual ISA should be able to support a -wide range of parallel programming languages, such as OpenCL, Renderscript, -and OpenMP 4.0 accelerator features. -In particular, the virtual ISA is \emph{not} intended as a source-level -programming language, but the parallelism abstractions must be easy to -reason about by programmers. - -\item{\bf Machine independence:} The virtual ISA should be able to support a -wide range of hardware instruction sets, application binary interfaces (ABIs). - -\item{\bf As few abstractions of parallelism as possible:} The virtual ISA must -use as few parallelism models as possible to capture the wide range of parallel -hardware on a modern SoC. This is important so that programmers can design -and tune algorithms without having to become experts in a wide range of -different kinds of parallelism. -% -These few abstractions must be able to map down effectively to today's -parallel hardware, such as multicore CPUs, GPUs and vectors, and also to -emerging parallel hardware, especially semi-custom, programmable accelerators. -% -(Custom, fixed-function accelerators may have high degrees of internal -parallelism but are usually programmed via fairly straightforward library -interfaces, which do not require exposing the detailed internal parallelism -features.) - -\item{\bf Coarse-grain parallelism across compute units:} The virtual ISA -must capture relatively large-grain parallelism mapped to different compute -units, while compiling down to use as efficient data transfer mechanisms -as possible between those compute units. - -\item{\bf Coarse- and fine-grain parallelism within compute units:} The -virtual ISA must also capture both coarse- and fine-grain parallelism that -can be mapped to a single compute unit, in order to achieve the highest -possible parallel performance for each compute kernel on a wide range of -compute units. - -\item{\bf Representation of both explicit and implicit communication:} It must -be possible to represent both explicit data copies between compute units, e.g., -between a CPU and an accelerator, and implicit data transfers through shared -memory, e.g., for a shared-memory multicore system or for emerging GPU -hardware that allows direct sharing between CPU and GPU. Both kinds of memory -transfers need to be under careful control of the programmer because memory -accesses are often the primary determining factor in program performance. - -\item{\bf Flexible scheduling support across compute units:} It must be -possible to compile kernels represented in the virtual ISA to multiple -different compute units, so that a run-time scheduler can flexibly map a given -kernel to different compute units, depending on availability constraints -and battery conservation goals. - -\item{\bf Offline compilation model:} To minimize energy consumption and -perceived application startup time, it should be possible to compile the -virtual ISA ahead-of-time (AOT) to native machine code. For example, this -was one major change from Android's Dalvik virtual machine, which uses -just-in-time (JIT) compilation every time an application is loaded, to the -ART system, which uses AOT compilation once at install time. - -\end{description} diff --git a/paper/Evaluation.tex b/paper/Evaluation.tex deleted file mode 100644 index 18e5284fc2453dcf9109d421e7de49caac3718a1..0000000000000000000000000000000000000000 --- a/paper/Evaluation.tex +++ /dev/null @@ -1,259 +0,0 @@ -%------------------------------------------------------------------------------ -\section{Evaluation} -\label{sec:evaluation} -%------------------------------------------------------------------------------ - -In our experiments, we evaluate the suitability of the virtual ISA design -in two ways. (1) The virtual ISA design should be portable. For this, we use -the same virtual ISA binary of an application to compile to different compute -units. -(2) When compared to current heterogeneous programming technologies such as -OpenCL, CUDA, and others, the virtual ISA design should be able to capture the -parallelism expressed using these languages, and thus achieve reasonable -performance when compiled to target architectures for these source-level -languages. - -%------------------------------------------------------------------------------ -\subsection{Experimental Setup and Benchmarks} -\label{sec:evaluation:setup} -%------------------------------------------------------------------------------ - -We modified the OpenCL front-end in the Clang compiler to generate the -virtual ISA for OpenCL applications. -% -We use annotations as hints to identify the subgraphs in the virtual ISA -that are suitable for accelerators. -% -We then used the compilation strategy described in Section~\ref{sec:compiler} -to translate the virtual ISA to two different target units: -the AVX instruction set in an Intel Xeon E5 core i7 and -a discrete nVidia GeForce GTX 680 GPU card with 2GB of memory. -The Intel Xeon also served as the host processor, running -at 3.6 GHz, with 16 GB RAM. - -For our experimental evaluation, we used four applications from the -Parboil~\cite{Parboil} benchmark suite: -Sparse Matrix Vector Multiple (spmv), -Single-precision Matrix Multiply (sgemm), -Stencil PDE solver (stencil), and -a Lattice-Boltzmann solver (lbm). - -In the GPU experiments, our baseline for comparison is the best available -OpenCL implementation -in Parboil that does not use local memory (since our virtual ISA does not -yet support local memory). For spvm and lbm, that is the Parboil version -labeled {\tt opencl\_nvidia}, which -has been hand-tuned for the Tesla NVidia GPUs~\cite{Liwen:Personal}. For -sgemm, the hand tuned version was utilizing local memory, thus preventing us -from using it. Instead, using that version as a starting point, we implemented -a version that is similar in every way except that the accesses to local -memory were replaced by accesses to global GPU memory instead, and that we -tuned the work group sizes to achieve the -best performance. Finally, for stencil, we use the basic version since following -the same practice did not improve the execution time. -All the applications are compiled using nVidia's proprietary OpenCL -compiler. - -In the vector experiments, our baseline is the same OpenCL implementations that -we chose as GPU baselines, -but compiled using the Intel OpenCL compiler, as we found -that these versions achieved the best performance compared to the other -available OpenCL versions on vector hardware as well. -The \NAME{} binaries were also generated using the same versions of OpenCL. - -We use two input -sizes for each benchmark, labeled `Small' and `Large' below. -Each data point we report is an average of ten runs for -the small test cases and an average of five runs for the large test cases; -we repeated the experiments multiple times to verify their stability. - -%------------------------------------------------------------------------------ -\subsection{Experimental Results} -\label{sec:evaluation:results} -%------------------------------------------------------------------------------ - -Figures~\ref{fig:gpusmall} and~\ref{fig:gpularge} show the normalized execution -time of these applications against GPU baseline for each of the two sizes. -Similarly, figures~\ref{fig:cpusmall} and~\ref{fig:cpularge} compare the -performance of \NAME{} programs with the vector baseline. The execution times are -broken down to segments corresponding to time spent in the compute kernel of the -application (kernel), copying data (copy) and remaining time spent on the host -side. The total execution time for the baseline is depicted on the -corresponding bar to give an indication of the actual numbers. - -When comparing \NAME{} code with the GPU baseline, \NAME{} achieves near -hand-tuned OpenCL performance for almost all of these benchmarks, except spmv on -`Small' dataset, where it is within a factor of $1.2$. This is because of the -small total execution time of $0.076s$ for spmv on `Small' dataset. For the `Large' -dataset, the \NAME{} code performance is on par with OpenCL implementation, -where due to the fact that the total running time is larger, the effect of -constant overhead to the total execution time is minimal. - -In the vector case, we see that the performance of \NAME{} is within 25\% in the -worst case. We observe that the kernel execution time in lbm is 25\% higher for -\NAME{} implementation than OpenCL. This is because the Intel OpenCL runtime -which is used by the \NAME{} runtime keeps one thread idle when it observes an -extra thread has been created by an application. We have to create this thread -to execute the \NAME{} dataflow graph asynchronously. We expect this overhead to -go away with improved OpenCL runtime implementation. - -%Comparing \NAME{} code with the GPU baseline, the performance is within about -%25\% of the baseline in most cases and within a factor of -%$1.8$ in the worst case. -%We see that the \NAME{} -%application spends more time in the kernel execution relative to the GPU -%baseline. However, inspection of the generated PTX files generated by nVidia -%OpenCL compiler for OpenCL applications and \NAME{} compiler for \NAME{} applications -%has shown that they are almost identical, with the only difference being a minor -%number of instructions being reordered. Also, we notice increased, sometimes to -%a significant factor, data copy times, despite the fact the data copied in both -%applications are similar and that the \NAME{} runtime makes use of a memory -%tracking mechanism to avoid unnecessary data copies. We are working on getting -%a -%clear picture of the overheads that the \NAME{} representation or compilation may -%be imposing on the program execution. - -%In the vector case, we see that the performance of \NAME{} is within about -%30\% in all cases, and within a factor of 1.6x in the worst case. -%We again -%observe the same inefficiencies in kernel and copy time, albeit less pronounced -%due to the fact that the total running times are generally larger, which -%minimizes the effect of constant overheads to the total execution time. - -Finally, we note that none of our benchmarks made use of vector code at the leaf -dataflow nodes. This choice was made after comparing the performance of two \NAME{} -versions: (a) the \NAME{} object code as generated from the modified Clang -frontend, and (b) the \NAME{} code after altering the number of dynamic instances -of the leaf nodes as well as their code, in order to perform a bigger amount of -computation so that vectorization can be achieved. This transformation may have -improved the performance in some cases for one of the two targets, but it never -achieved reasonable performance on both. This is due to the competing -representation required to achieve good performance for GPUs and vector units. -In the GPU case, code executing by a thread should perform carefully strided -memory accesses in order to achieve coalescing of the memory requests performed -by multiple threads, and vector instructions get serialized at the hardware thus -no performance gain occurs from their use. In the vector case, a thread aims to -access consecutive locations so as to perform vectorized memory operations and -computations. Thus, a simple code where all threads perform independent -operations and access consecutive locations has the potential to achieve good -performance on both targets, by allowing memory coalescing on the GPU side and -vectorization across work items in the vector case. To conclude, for simple -benchmarks where vectorization across work items can be achieved automatically, -our experiment shows that the presence of vector instructions does not improve -performance on both targets. We expect the vector instructions to lead to -performance gains for more complicated kernels where automatic vectorization -will not be effective. - - -\begin{figure*}[hbt] -\begin{minipage}{0.48\textwidth} -\begin{center} - \includegraphics[height=4cm]{Figures/gpusmall.png} - \caption{\footnotesize{GPU Experiments - Small Test Normalized Execution - Time}} - \label{fig:gpusmall} -\end{center} -\end{minipage}~~~~\begin{minipage}{0.48\textwidth} -\begin{center} - \centering - %\hspace*{4ex} - \includegraphics[height=4cm]{Figures/gpularge.png} - \caption{\footnotesize{GPU Experiments - Large Test Normalized Execution - Time}} - \label{fig:gpularge} -\end{center} -\end{minipage} -\end{figure*} - -\begin{figure*}[hbt] -\begin{minipage}{0.48\textwidth} -\begin{center} - \centering - %\hspace*{4ex} - \includegraphics[height=4cm]{Figures/cpusmall.png} - \caption{\footnotesize{Vector Experiments - Small Test Normalized Execution - Time}} - \label{fig:cpusmall} -\end{center} -\end{minipage}~~~~\begin{minipage}{0.48\textwidth} -\begin{center} - \centering - %\hspace*{4ex} - \includegraphics[height=4cm]{Figures/cpularge.png} - \caption{\footnotesize{Vector Experiments - Large Test Normalized Execution - Time}} - \label{fig:cpularge} -\end{center} -\end{minipage} -\end{figure*} - -%------------------------------------------------------------------------------ -\subsection{Expressing parallelism beyond GPUs} -\label{sec:evaluation:streaming} -%------------------------------------------------------------------------------ - -\NAME~is aimed to be extensible beyond the devices that are most commonly found -in today's accelerators and represent parallelism models in a broad class of -available hardware. Apart from data parallelism, many accelerators expose a -streaming paallelism model and would benefit greatly by a representation that -can capture this feature. \NAME~presents the unique advantages of representing a -program as a dataflow graph, which is a natural way of representing the -communication between producers and consumers, as well as describing the -repeated transfer of multiple data items via streaming edges. This section uses -an image processing pipeline to demonstrate the benefits of expressing a -streaming application in \NAME. - -\begin{center} - \begin{figure*}[hbt] - \centering - %\hspace*{4ex} - \includegraphics[height=6cm]{Figures/pipeline.png} - \caption{Edge Detection in gray scale images in \NAME{}} - \label{fig:pipeline} - \end{figure*} -\vspace*{-1.5\baselineskip} -\end{center} - -Figure~\ref{fig:pipeline} presents an application for Edge Detection in -gray scale images in \NAME. At a high level, this application is a dataflow node -that acceps a greyscale image $I$ and a binary structuring element $B$ and -computes a binary image $E$ that represents the edges of $I$. The application -begins by computing an estimate of the Laplacian $L$ of $I$, as depicted in -figure~\ref{fig:pipeline}, and proceeds by computing its zerocrossings, -i.e. points of sign change in $L$. A different dataflow node computes the -gradient $G$ of $I$, operation that can proceed in parallel with the remaining -computations. The final dataflow node uses the output of the Gradient and the -ZeroCrossings to perform a thresholding operation that will allow it to reject -small variations in the brightness of the image and only detect more significant -variations that actually constitute edges. - -We implemented this pipeline using OpenCV computer vision library. -We used C++ thread library to create threads for each top level node in this -example, and implemented fixed size -circular buffers for each streaming edge between these nodes to pass data -between them. The pipeline, streaming and dataflow parallelism expressed in this -example is easy to capture in \NAME{}. The streaming edges, dataflow nodes -simply map to key features of \NAME{}. Our current implementation of \NAME{} is -only missing the implementation of circular buffers for streaming edges, and -thus we do not have a working \NAME{} version of this example. - -However, mapping pipeline and streaming parallelism model to SPIR, HSAIL -parallelism models of one kernel replicated across several cores, is -non-intuitive and difficult to achieve. OpenCL supports concurrent execution of -kernels running in two different streams, Expressing concurrency across kernels -working on different image sections would require complex synchronization and an -iimplementation of programmer managed scheduling of nodes. This is a tedious and -error-prone task, which is unlikely to scale to bigger and more complex -pipelines. - -Expressing this example in \NAME{}, would have the added advantage of flexibly -mapping computationally heavy parts of the pipeline to accelerators. The Laplacian node is the pipeline -bottleneck. Mapping Laplacian to GPU, achieved 2x speedup, as it balances the -two branches of the pipeline. However, mapping both Laplacian and Gradient to -GPU achieves a modest 1.1x speedup. This further shows the advantage of flexible -mapping, which allows the programmer or auto-tuner to easily tune an application. - -%\begin{center} -%\lstinputlisting[float=*, language=llvm]{Code/lincomb.ll} -%\end{center} - diff --git a/paper/Figures/Results.pdf b/paper/Figures/Results.pdf deleted file mode 100644 index fdb72073cc558821e9976583f90e9f927eab8618..0000000000000000000000000000000000000000 Binary files a/paper/Figures/Results.pdf and /dev/null differ diff --git a/paper/Figures/compilation-short.png b/paper/Figures/compilation-short.png deleted file mode 100644 index e6a5ab0f4589877bc9a706b0f76636e65be9628b..0000000000000000000000000000000000000000 Binary files a/paper/Figures/compilation-short.png and /dev/null differ diff --git a/paper/Figures/compilation.png b/paper/Figures/compilation.png deleted file mode 100644 index 9b8791e1daf864fe1fcf96c1f3beda626f3d48b6..0000000000000000000000000000000000000000 Binary files a/paper/Figures/compilation.png and /dev/null differ diff --git a/paper/Figures/cpularge.png b/paper/Figures/cpularge.png deleted file mode 100644 index f9463ec0d9fb525ee0806cf07e6f8274ef0ae4da..0000000000000000000000000000000000000000 Binary files a/paper/Figures/cpularge.png and /dev/null differ diff --git a/paper/Figures/cpusmall.png b/paper/Figures/cpusmall.png deleted file mode 100644 index 3b96c6c16ce9158948a85c7f58fd2800622b42c8..0000000000000000000000000000000000000000 Binary files a/paper/Figures/cpusmall.png and /dev/null differ diff --git a/paper/Figures/designexample.png b/paper/Figures/designexample.png deleted file mode 100644 index 8f65dc4af98d966e0f132c185240a5b4849c3826..0000000000000000000000000000000000000000 Binary files a/paper/Figures/designexample.png and /dev/null differ diff --git a/paper/Figures/gpularge.png b/paper/Figures/gpularge.png deleted file mode 100644 index eb09aeed6e2d90325ca89b23a7fb2813fe0b7d2a..0000000000000000000000000000000000000000 Binary files a/paper/Figures/gpularge.png and /dev/null differ diff --git a/paper/Figures/gpusmall.png b/paper/Figures/gpusmall.png deleted file mode 100644 index 8186414ca10490b84a15f2f72aa7cff5b314497e..0000000000000000000000000000000000000000 Binary files a/paper/Figures/gpusmall.png and /dev/null differ diff --git a/paper/Figures/lincomb.png b/paper/Figures/lincomb.png deleted file mode 100644 index e8731b198bf46d2dab271256f19c3383edb593c9..0000000000000000000000000000000000000000 Binary files a/paper/Figures/lincomb.png and /dev/null differ diff --git a/paper/Figures/pipeline.png b/paper/Figures/pipeline.png deleted file mode 100644 index e61f4a5e05ad8aad3204a0609f9b5f4edacd16cf..0000000000000000000000000000000000000000 Binary files a/paper/Figures/pipeline.png and /dev/null differ diff --git a/paper/Figures/visc.pdf b/paper/Figures/visc.pdf deleted file mode 100644 index ced522511e6e689e344267c1ab776440a820c1dd..0000000000000000000000000000000000000000 Binary files a/paper/Figures/visc.pdf and /dev/null differ diff --git a/paper/Introduction.tex b/paper/Introduction.tex deleted file mode 100644 index cd2a3cb2dc96403b3721578b47bf77f27bc26515..0000000000000000000000000000000000000000 --- a/paper/Introduction.tex +++ /dev/null @@ -1,200 +0,0 @@ -%------------------------------------------------------------------------------ -\section{Introduction} -\label{sec:intro} -%------------------------------------------------------------------------------ - -In computing contexts where energy is an important consideration, such as in -mobile devices like smartphones, tablets, and e-book readers, or where power and -heat dissipation are important, such as in data centers, traditional homogeneous -multicore processors can be quite inefficient. These contexts are increasingly -seeing the advent of heterogeneous computing systems, which use specialized -computing elements that can deliver much greater efficiency in -performance-per-Joule or performance-per-Watt. For example, the ``application -processor'' on a modern smartphone or tablet is a heterogeneous System-on-chip -(SoC) that often includes not just a multicore host CPU, but also a GPU, a DSP, -and several more specialized processors for tasks such as audio and video -decoding, image processing, digital photography, and speech recognition. - -Programming applications for hardware that uses such diverse combinations of -computing elements is extremely challenging. The challenges include developing -portable algorithms, writing efficient yet portable source-level programs, -producing portable object code, and tuning the programs. At a more fundamental -level, these challenges arise from three root causes: (1) diverse parallelism -models; (2) diverse memory architectures; and (3) diverse hardware instruction -sets. To make use of the full range of available hardware to maximize -performance and energy efficiency, the programming environment needs to provide -common abstractions for all the available hardware compute units in -heterogeneous systems. Not only are these abstractions required at the level of -source-code, but also at object-code level to make the object-code portable -across the same and different manufacturer's devices, thus allowing -application vendors to be able to ship a single software version across a broad -range of devices. - -%\begin{center} -%\begin{figure}[hbt] -%\centering\hspace*{4ex}\includegraphics[height=6.5cm]{Figures/visc.pdf} -%\caption{\footnotesize{System Organization for Virtual Instruction Set Computing -%in a Heterogeneous System}} -%\label{fig:visc} -%\end{figure} -%\end{center} - -We believe that these issues are best addressed using a -virtual instruction set layer that abstracts away most of the low-level -details of different hardware components, but -provides a small number of abstractions of parallelism that can be mapped -down (or ``translated'') effectively to all the different kinds of parallel -hardware on a wide range of SoCs. -The (virtual) object code is translated down to specific hardware components -available on a particular device, at install time, load time or run-time. -This general approach, which we call Virtual Instruction Set Computing (VISC), -has been used very successfully for GPGPU computing, e.g., through the -PTX virtual ISA for several generations of nVidia GPUs, and more recently -HSAIL~\cite{HSAIL} and SPIR~\cite{SPIRKhronosSpec} for other classes of hardware. -Although HSAIL and SPIR can be mapped down to non-GPU hardware, their design -has been heavily influenced by the SIMT parallelism model of GPUs, which -supports both GPU and vector hardware well but limits their effectiveness -for other kinds of parallelism. -This is discussed in more detail in Sections~\ref{sec:evaluation:streaming} -and~\ref{sec:related}. -% -%The key point is that the only software components that can "see" the hardware -%details are the translators (i.e., compiler back ends), system-level and -%application-level schedulers, a minimal set of other low-level OS components and -%some device drivers. The rest of the software stack, including source-level -%language implementations, application libraries, and middleware, lives above the -%virtual ISA and is portable across different heterogeneous system -%configurations. Unlike previous VISC systems, our virtual instruction set design -%abstracts away and unifies the diverse forms of parallelism in hardware (using a -%combination of only two models of parallelism). It also provides abstractions -%for memory and communication, allowing back-end translators to generate code for -%efficient data movement across compute units. These abstractions enable -%programmers to write efficient software applications that are portable across a -%diverse range of hardware configurations. Moreover, we are exploiting the -%flexible translator-hardware communication in VISC systems to enable novel -%memory system designs that are more energy-efficient and higher performance than -%current designs. - -In this paper, we propose a virtual ISA design that abstracts away the -wide range of parallelism models and the disparate instruction sets used -within and across SoCs. -(In this work, we do not consider the different memory hierarchy architectures -used across compute units or devices, but it is a subject of our ongoing -work.) -In fact, we can represent these different parallelism models using only -\emph{two abstractions of parallelism}: -\begin{itemize} -\item Hierarchical dataflow graphs with side effects, and -\item Short-vector SIMD (Single Instruction Multiple Data) instructions. -\end{itemize} -% -Dataflow graphs are a very general model of data parallelism and, when -extended to allow shared memory accesses (side effects), can capture -many forms of parallel computing over data elements, including -vector SIMD parallelism, -the SIMT (Single Instruction Multiple Threads) parallelism model used in -general-purpose GPUs, -streaming or pipelined-dataflow parallelism, and -fine-grained data parallelism, which may be synchronous or asynchronous. -% -Although dataflow graphs can capture vector parallelism too, vector -instructions, when applicable, provide a representation that is far -more compact, efficient, and much easier to reason about and transform; -for this reason, we include explicit vector instructions in our model. - -We make the dataflow graphs hierarchical to express multiple -granularities of parallelism in a natural manner, e.g., -coarse-grain parallelism across different compute units vs. fine-grain -parallelism within a single compute unit. -% -In particular, a dataflow graph node is either an \emph{internal node} or a -\emph{leaf node}. -% -An internal node itself contains another dataflow graph within it. -% -A leaf node contains executable code that is some mixture of scalar and -vector instructions. -% -Each leaf node in a dataflow graph includes a parameter value, $N$, which -specifies that the node should be \emph{replicated} $N$ times for -independent parallel execution; the value of $N$ may be computed at -run-time. -% -This allows the graph to capture fine-grain parallelism, and is -similar to how a GPU kernel in CUDA, OpenCL or PTX is replicated across -the threads of a GPU device. - -One final feature of our representation is that a dataflow graph edge may -be either an ordinary edge or a ``streaming'' edge. -% -An ordinary edge represents a one-time data transfer from a producer node -to a consumer node; implicitly the two nodes connected by the edge are -executed only once. -% -A streaming edge specifies that the producer and consumer edges execute -repeatedly, transferring data items continuously with the semantics of -a bounded FIFO buffer. - -This code representation can be mapped down and executed effectively on -the full range of parallel hardware on a modern SoC, including GPUs, -vector hardware, multicore host processors, digital signal processors (DSPs), -and semi-custom hardware accelerators. -% -In this work, we describe a first prototype system that translates a single -virtual object code program to nVidia GPUs (using PTX), Intel's AVX vector -instructions, and X86 host processors. -% -We present preliminary experimental results comparing the performance of the -generated code for a set of benchmarks to hand-tuned code written using -OpenCL for the GPU and hand-vectorized for AVX. -% -Our results show that the code generated by \NAME{} is close in performance -to the hand-tuned code in many cases, and within about 2x in all cases. -% -These results were obtained with relatively little compiler optimization -for either GPU or vector hardware, which gives us confidence that \NAME{} can -provide object code portability with relatively low performance cost. - -We also present a detailed description of a pipelined streaming benchmark -and how it is represented in \NAME{}. -% -Representing this benchmark in PTX, HSAIL or SPIR would be extremely awkward: -it would require manually written tiling and buffering, with complicated -synchronization to achieve concurrent execution of different pipeline -stages. -% -Although we have not yet implemented the buffered message passing required -for streaming parallelism, the example shows that \NAME{} can naturally -express a broader class of parallelism than can be expressed with the -existing virtual ISAs. -% -We also briefly discuss an example class of programmable, custom accelerators -for machine learning algorithms, which can be naturally targeted using the -parallelism models in \NAME{}, although capturing all the details of the -hardware is a subject of future work. - -%*********MENTIONED THIS BRIEFLY EARLIER INSTEAD OF HERE.********* -%% -%One key limitation of our current work is that we do \emph{not} yet provide -%portable abstractions of the varying memory hierarchies used in different -%hardware components. -%% -%Although we have a preliminary design for such abstractions, implementing the -%design fully and evaluating it are beyond the scope of this work. - -The next section describes the high-level design goals of \NAME{}. -% -Section~\ref{sec:design} then presents the detailed design of the -\NAME{} virtual ISA, and its implementation as an extension of the LLVM -instruction set~\cite{LLVM:CGO04}. -% -Section~\ref{sec:compiler} describes our general compilation strategy, -and our prototype translators for PTX, AVX, and X86. -% -Section~\ref{sec:evaluation} presents our experimental results and -our qualitative discussion of the pipelined benchmark and our future work -on the machine learning accelerator. -% -Section~\ref{sec:related} compares our work with the state of the art, -and Section~\ref{sec:conclusion} concludes. - diff --git a/paper/Makefile b/paper/Makefile deleted file mode 100755 index 0ee1fc4c06f9218bb3768a4cb1236dc532e46a23..0000000000000000000000000000000000000000 --- a/paper/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -PAPER = paper -TEX = $(wildcard *.tex) -BIB = $(wildcard *.bib) -FIGS = $(wildcard Figures/*.pdf Figures/*.png Graphs/*.pdf Graphs/*.png) -CODE = $(wildcard Code/*.cl Code/*.c Code/*.cpp Code/*.ll) - -.PHONY: all clean - -$(PAPER).pdf: $(TEX) $(BIB) $(FIGS) $(CODE) sigplanconf.cls - echo $(FIGS) - pdflatex $(PAPER) - bibtex $(PAPER) - pdflatex $(PAPER) - pdflatex $(PAPER) - @/bin/echo "" - @/bin/echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" - @/bin/echo " ++++ ANY UNDEFINED REFERENCES ++++" - -@grep -i undef $(PAPER).log || echo "No undefined references." - @/bin/echo " ++++ ANY EMPTY REFERENCES ++++" - -@egrep -i -n -e 'cite{ *}' -e 'ref{ *}' $(TEX) $(FIGS) $(CODE) || echo "No empty references." - @/bin/echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" - @/bin/echo "" - -clean: - rm -f *.aux *.bbl *.blg *.log *.out *.tdo $(PAPER).pdf - diff --git a/paper/Outline.rtf b/paper/Outline.rtf deleted file mode 100644 index 08212fa62c720618fc380db3a957e90c17369a3d..0000000000000000000000000000000000000000 --- a/paper/Outline.rtf +++ /dev/null @@ -1,124 +0,0 @@ -{\rtf1\ansi\ansicpg1252\cocoartf1348\cocoasubrtf170 -{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -{\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid1\'02\'00.;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid2\'02\'01.;}{\levelnumbers\'01;}\fi-360\li1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid3\'02\'02.;}{\levelnumbers\'01;}\fi-360\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid4\'02\'03.;}{\levelnumbers\'01;}\fi-360\li2880\lin2880 }{\listname ;}\listid1}} -{\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}} -\margl1440\margr1440\vieww25400\viewh13220\viewkind0 -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural - -\f0\fs24 \cf0 VISC PPoPP Outline\ -\ -\pard\tx220\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\li720\fi-720\pardirnatural -\ls1\ilvl0\cf0 {\listtext 1. }\expnd0\expndtw0\kerning0 -Abstract (1/4 page)\ -\pard\tx220\tx720\pardeftab720\li720\fi-720 -\ls1\ilvl0\cf0 \kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -Introduction (1+ 3/4 page) \ -\pard\tx940\tx1440\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }\expnd0\expndtw0\kerning0 -Heterogeneous computing and its programmability issues\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -Virtual Instruction Set Computing\ -\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160 -\ls1\ilvl2\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }What is the current state of art and what are its limitations (1 paragraph)\ -\pard\tx2380\tx2880\pardeftab720\li2880\fi-2880 -\ls1\ilvl3\cf0 {\listtext 1. }PTX, SPIR, HSAIL\expnd0\expndtw0\kerning0 -\ -\pard\tx940\tx1440\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 3. }\expnd0\expndtw0\kerning0 -Our Contributions\ -\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160 -\ls1\ilvl2\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }\expnd0\expndtw0\kerning0 -new Virtual ISA design by adding -\b \expnd0\expndtw0\kerning0 -novel -\b0 \expnd0\expndtw0\kerning0 - parallelism abstractions to widely used LLVM IR\ -\ls1\ilvl2\kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -Compilation strategy to target AVX/SPIR, PTX, and X86 backends\ -\ls1\ilvl2\kerning1\expnd0\expndtw0 {\listtext 3. }\expnd0\expndtw0\kerning0 -Evaluation to show new virtual ISA design compatible with current heterogeneous programming techniques/programming models such as OpenCL, SPIR\ -\ls1\ilvl2\kerning1\expnd0\expndtw0 {\listtext 4. }\expnd0\expndtw0\kerning0 -Demonstrate potential of new virtual ISA design in capturing streaming applications\ -\pard\tx220\tx720\pardeftab720\li720\fi-720 -\ls1\ilvl0\cf0 \kerning1\expnd0\expndtw0 {\listtext 3. }\expnd0\expndtw0\kerning0 -Virtual ISA Design Goals (1/2 page)\ -\pard\tx940\tx1440\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }\expnd0\expndtw0\kerning0 -Importance of Object-level portability\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -Low-level enough to easily translate to different architectures\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 3. }\expnd0\expndtw0\kerning0 -High-level enough to capture different forms of parallelism\\\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 4. }\expnd0\expndtw0\kerning0 -[Other goals \'85?]\ -\pard\tx220\tx720\pardeftab720\li720\fi-720 -\ls1\ilvl0\cf0 \kerning1\expnd0\expndtw0 {\listtext 4. }\expnd0\expndtw0\kerning0 -Virtual ISA Design (2.5 pages)\ -\pard\tx940\tx1440\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }Why we need \'85 \expnd0\expndtw0\kerning0 -\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -Hierarchical DFG \ -\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160 -\ls1\ilvl2\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }Nodes, edges\ -{\listtext 2. }Hierarchy\expnd0\expndtw0\kerning0 -\ -\pard\tx940\tx1440\tx2160\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 3. }\expnd0\expndtw0\kerning0 -Vector Instructions [We can think about if we necessarily need to include it in this paper or just say that we are open to include it]\ -\pard\tx940\tx1440\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 4. }\expnd0\expndtw0\kerning0 -Implementation\ -\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160 -\ls1\ilvl2\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }\expnd0\expndtw0\kerning0 -VISC Intrinsics including launch\ -\ls1\ilvl2\kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -VISC Example (use throughout the section)\ -\pard\tx220\tx720\pardeftab720\li720\fi-720 -\ls1\ilvl0\cf0 \kerning1\expnd0\expndtw0 {\listtext 5. }\expnd0\expndtw0\kerning0 -Compilation Strategy (1 page)\ -\pard\tx940\tx1440\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }Have VISC, single object code \expnd0\expndtw0\kerning0 -\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -Compilation flow\ -\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160 -\ls1\ilvl2\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }general description of how to get working code\expnd0\expndtw0\kerning0 -\ -\pard\tx940\tx1440\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 3. }\expnd0\expndtw0\kerning0 -Implementation\ -\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160 -\ls1\ilvl2\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }\expnd0\expndtw0\kerning0 -Backends AVX, PTX, SPIR, X86\ -\ls1\ilvl2\kerning1\expnd0\expndtw0 {\listtext 2. }VISC Runtime [somewhere it has to be there] - launching graph, data copy on edges (here or in compilation)\expnd0\expndtw0\kerning0 -\ -\pard\tx220\tx720\tx1440\pardeftab720\li720\fi-720 -\ls1\ilvl0\cf0 \kerning1\expnd0\expndtw0 {\listtext 7. }\expnd0\expndtw0\kerning0 -Evaluation (2 page for PTX and AVX results + 1 page for streaming and programmable accelerator discussion)\ -\pard\tx940\tx1440\pardeftab720\li1440\fi-1440 -\ls1\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }\expnd0\expndtw0\kerning0 -Platform Info\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -Apps Info\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 3. }\expnd0\expndtw0\kerning0 -PTX backend results compared to parboil OpenCL code\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 4. }\expnd0\expndtw0\kerning0 -AVX/SPIR results compared to parboil OpenCL CPU version\ -\ls1\ilvl1\kerning1\expnd0\expndtw0 {\listtext 5. }\expnd0\expndtw0\kerning0 -Discussion on benefits of Virtual ISA (1 page) [Is it ok for it to be here or should it be before or after evaulation?]\ -\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160 -\ls1\ilvl2\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }NOT PART OF EVALUATION: \expnd0\expndtw0\kerning0 -\ -\ls1\ilvl2\kerning1\expnd0\expndtw0 {\listtext 2. }\expnd0\expndtw0\kerning0 -Streaming example\ -\pard\tx2380\tx2880\pardeftab720\li2880\fi-2880 -\ls1\ilvl3\cf0 \kerning1\expnd0\expndtw0 {\listtext 1. }\expnd0\expndtw0\kerning0 -Edge Detection\ -\pard\tx220\tx720\tx1440\tx2160\tx2880\pardeftab720\li720\fi-720 -\ls1\ilvl0\cf0 \kerning1\expnd0\expndtw0 {\listtext 8. }\expnd0\expndtw0\kerning0 -Related Work (3/4 page)\ -\ls1\ilvl0\kerning1\expnd0\expndtw0 {\listtext 9. }\expnd0\expndtw0\kerning0 -Conclusion and Future Work (1/4 page)\ -} \ No newline at end of file diff --git a/paper/PPoPPSubmission.pdf b/paper/PPoPPSubmission.pdf deleted file mode 100644 index e0e1cac9b2b227c0feb38a4d91b9a85c18969837..0000000000000000000000000000000000000000 Binary files a/paper/PPoPPSubmission.pdf and /dev/null differ diff --git a/paper/RelatedWork.tex b/paper/RelatedWork.tex deleted file mode 100644 index b6cfba500ca1f43406fa009aa70ebd6c44dbc85b..0000000000000000000000000000000000000000 --- a/paper/RelatedWork.tex +++ /dev/null @@ -1,64 +0,0 @@ -\section{Related Work} -\label{sec:related} -\textbf{Virtual ISAs}: -PTX virtual ISA was deleveloped by nVidia to provide portability across GPUs of -different sizes and across multiple GPU generations. It is however designed to -target nVidia GPUs specifically and does not aim to support other hardware. -There are currently a few projects with the goal to develop a portable object -code distribution format for heterogeneous systems. HSAIL~\cite{HSAIL} and -SPIR~\cite{SPIRKhronosSpec} are two such standards which map well to GPUs and multicore -CPUs. However, they support only a restrictive throughput-oriented SIMT -parallelism model, which is not general enough to capture other models of -parallelism like pipeline or streaming parallelism (as explained in -Section~\ref{sec:evaluation:streaming}), whereas these are captured naturally -in our dataflow graph model. - -\textbf{Source Languages}: -Source-level programming languages for heterogeneous -systems such as OpenCL~\cite{OpenCL} and CUDA~\cite{CUDA}, and the -accelerator extensions in OpenACC~\cite{OpenACC} and in recent versions of -OpenMP~\cite{OpenMPAcceleratorModel:IWOMP14}, -all support a -common programming model where a single-threaded kernel function is replicated -across a large number of cores, -usually with explicit copying of data between host and device. -Intel ISPC~\cite{ispc} is a set of language extensions to -C, and an optimizing LLVM-based compiler, that -effectively uses the SPMD programming model to deliver performance using both -multiprocessor and SIMD vector units. -Like PTX and SPIR, all these languages map well to GPUs and vector parallelism. -None of them address object -code portability. Moreover, they all have the same limitations of being -unable to express more general models of data parallelism, like streaming -parallelism. - -RenderScript~\cite{Renderscript} aims to -provide performance and portability across heterogeneous SoC architectures for -Android devices. Like SPIR, it uses LLVM bitcode as its on-device portable -object code format. This format, however, does not have well-defined -parallelism abstractions, instead using some ad hoc -combination of LLVM (scalar and vector) code and run-time operations. -%The language has heavily focused on support for application domains like -%image processing and computer vision. - -Domain Specific Languages -(DSLs) such as Delite~\cite{Delite} and -Halide~\cite{Halide} can potentially target different -architectures efficiently using tuning based on domain specific knowledge, but -the techniques are largely limited to the intended domain. - -\textbf{Compiler and Autotuning Approaches}: -Besides virtual ISAs and source level languages, a number of autotuning -frameworks explore interesting methods to distribute computation between -compute units in a heteregenous system. Petabricks~\cite{PetaBricks} -explores the search space of different algorithm choices and how -they map to CPU and GPU processors. Similarly, in Tangram~\cite{Tangram} a -program is written in interchangeable, composable building blocks, which enables -architecture-specific algorithm and implementation selection. Exploring -algorithm choices is orthogonal to, and can be combined with, our approach. -Moreover, these techniques -though effective, put a huge burden on compiler and runtime system to explore a -potentially large search space to find the correct tuning parameters, and it -is not clear how such search strategies will scale up to more realistic -applications. - diff --git a/paper/VirtualISA.tex b/paper/VirtualISA.tex deleted file mode 100644 index d90f8384f1009635cf657c35bc9a740014482003..0000000000000000000000000000000000000000 --- a/paper/VirtualISA.tex +++ /dev/null @@ -1,431 +0,0 @@ -%------------------------------------------------------------------------------ -\section{Virtual ISA Design} -\label{sec:design} -%------------------------------------------------------------------------------ - -This section presents \NAME{}, a virtual ISA design that abstracts away -differences between parallelism models in hardware by exposing only two models -of parallelism: -hierarchical dataflow graphs with side effects -and vector parallelism. - -Figure~\ref{fig:designexample} shows how an example of using \NAME{} -for an image processing filter, -specifically, a non-linear estimate of the Laplacian of a greyscale image. The -estimate is computed by applying a dilation filter and an erosion filter in the -input image and then computing a linear combination of the initial, the dilated -and the eroded image. This example is used throughout the section to demonstrate -the features of \NAME{}. - -\NAME{} is implemented as an extension of the LLVM virtual instruction -set~\cite{LLVMOnlineRef}, -and the code fragments in our examples therefore use LLVM -syntax~\cite{LLVM:CGO04}. - -\begin{center} - \begin{figure*}[hbt] - \centering - %\hspace*{4ex} - \includegraphics[height=7cm]{Figures/designexample.png} - \caption{Non-linear Laplacian computation in \NAME{}} - \label{fig:designexample} - \end{figure*} -\vspace*{-1.5\baselineskip} -\end{center} - -%------------------------------------------------------------------------------ -\subsection{Dataflow Graph} -\label{sec:design:dfg} -%------------------------------------------------------------------------------ - -In \NAME{}, a program is represented as a hierarchical dataflow -graph with side effects, where -nodes represent units of execution, and -dataflow edges describe the explicit data transfer -requirements between these units of execution. -If a pair of nodes (source and destination) is connected by a dataflow edge, -the destination node logically must receive data from the source node before -beginning execution. - -The dataflow graph is a static representation. -However, in order to express data parallelism we may have to represent a -statically unknown number of node instances and/or edge instances, depending -possibly on the size of the input. To that end, we allow a single static -dataflow node to represent multiple dynamic instances of the node, i.e., a -static node can be replicated at runtime and the resulting dynamic -nodes can be executed independently of each other, -subject only to the dependencies imposed by -the dataflow edges. -As described in Section~\ref{sec:design:impl:intrinsics}, nodes may be replicated -to form an n-dimensional grid; our current implementation allows up to -three dimensions. -Similarly, a static dataflow edge between two static -dataflow nodes may represent multiple dynamic dataflow edges between dynamic -instances of the two dataflow nodes. - -For example, for an iterative four-point nearest-neighbor Jacobi solver -that computes -\begin{eqnarray*} -A_{new}[i,j] = 0.25 * ( A_{old}[i-1,j] + A_{old}[i+1,j] + \\ - A_{old}[i,j-1] + A_{old}[i,j+1] ) -\end{eqnarray*} -on $N \times N$ matrices in each iteration, -the static graph node could represent a single element-wise evaluation of -the above equation and would -be replicated to create $N \times N$ independent dynamic instances. - -Figure~\ref{fig:designexample} demonstrates the components of the non-linear -Laplacian estimate as seperate dataflow nodes: {\texttt DilationFilter}, -{\texttt ErosionFilter} and {\texttt LinearCombination}. - -%------------------------------------------------------------------------------ -\subsubsection{Dataflow Node Hierarchy} -\label{sec:design:hierarchy} -%------------------------------------------------------------------------------ - -To allow for modularity and to capture multiple granularities of parallelism, -the dataflow graph is hierarchical, i.e. each dataflow node can either be a -\emph{leaf node} or an \emph{internal node}. -A leaf node contains plain LLVM IR, expressing actual computations, which may -be a mixture of scalar and vector operations. -Vector parallelism is the only form of parallelism available in leaf nodes. - -An internal node contains a complete dataflow graph, called a -\emph{child graph} of the current graph, and the child graph itself can have -internal nodes and leaf nodes. This design allows for the programmer to -represent logically connected operations performed in several dataflow nodes as -a single dataflow node. This enhances the effectiveness of potential analyses by -providing hints about closely related operations, and allows for the scheduler -to efficiently orchestrate the execution of the dataflow graph by grouping -together appropriate sets of dataflow nodes. -For example, the run-time scheduler may choose to map a single top-level -internal node to a GPU or to each core of a multicore CPU, instead of having -to manage potentially large numbers of finer-grain nodes. - -Leaf nodes may contain instructions to query about the structure of the -underlying dataflow graph, as explained in more detail in -Section~\ref{sec:design:impl:query}. -Also, they may contain side effects, i.e., load and -store instructions accessing global shared memory, -which express implicit data movement through a memory hierarchy. -%Designing the memory hierarchy is beyond the scope of this paper. -Because of these side effects, \NAME{} is not a -``pure dataflow'' model. - -In Figure~\ref{fig:designexample}, the nodes comprising the Laplacian -computation are children, in the hierarchy, of a top level node, {\texttt -LaplacianEstimate}. -{\texttt DilationFilter}, {\texttt ErosionFilter}, and {\texttt -LinearCombination} are leaf dataflow nodes. The dilation and erosion filters -compute the maximum and minimum, respectively, brightness in an area of a pixel -defined by the binary structuring element $B$. The {\texttt LinearCombination} -dataflow node performs the final computation. Figure~\ref{fig:designexample} -shows the LLVM instructions for this node, demonstrating the use of side effects -and instructions querying the structure of the dataflow graph. - -Note that the {\texttt LaplacianEstimate} dataflow node, although it is a top -level node in this computation, it may itself become a child of a higher level -dataflow node performing an image processing computation that requires the -operation of a Laplacian. This highlights the importance of hierarchy for -providing modularity and code reuse. - -%------------------------------------------------------------------------------ -\subsubsection{Dataflow Edges and Bindings} -\label{sec:design:edges} -%------------------------------------------------------------------------------ - -Explicit data movement between compute nodes -is expressed with dataflow edges. A dataflow edge has the semantics of -copying the corresponding data from the source to the destination dataflow -node. Depending on where the execution of the source and destination is -scheduled, the dataflow edge may be translated down to an explicit copy -between compute units, or communication through shared memory. - -As with dataflow nodes, static dataflow -edges also represent multiple dynamic instances of dataflow edges between -the dynamic -instances of the source and the destination dataflow nodes. A dataflow edge -between two static dataflow nodes can be instantiated at runtime using two -different replication mechanisms: ``all-to-all'', where all dynamic instances of -the source node are connected with all the dynamic instances of the destination -node, thus expressing a barrier between the two groups of nodes, or -``one-to-one'' where a single dynamic instance of the source dataflow node is -connected with the corresponding instance of the destination node. -One-to-one replication requires that the grid structure (number of -dimensions and the extents in each dimension) of the -dynamic instances of the source and destination nodes is identical. -One-to-one replication -enables various optimizations at the dataflow graph level by expressing the -exact dependency between the instances of the source and destination dataflow -nodes. For example, an graph transformation pass could chose to merge two -consequtive dataflow nodes, since the ``one-to-one'' replication denotes that a dynamic -instance of the second node depends only on data generated from the -corresponding instance of the first node. -%Such transformations would be useful when targeting hardware that benefits -%from VLIW parallelism and thus from longer instruction sequences. - -Figure~\ref{fig:designexample} shows the dataflow edges describing the data -movement of input image $I$, dilated image $I_d$, eroded image -$I_e$, and matrix $B$ between dataflow nodes. - -When an internal (``parent'') graph node contains an inner graph, the incoming -edges of the parent node may provide the inputs to the one or more nodes of the -child graph, and conversely with the outgoing edges. -For example, in Figure~\ref{fig:designexample}, -the inputs labeled $I$ and $B$ to node \texttt{Laplacian Estimate} provide -inputs to the nodes \texttt{Dilation Filter}, \texttt{Erosion Filter} and -\texttt{Linear Combination} of the child graph. -Similarly, the output labeled $L$ of node \texttt{Linear Combination} provides -the output of the parent node. -Semantically, these are \emph{not} dataflow edges because no explicit data -movement is implied: rather, these simply represent a \emph{binding} -between the input of a dataflow node to the input of a node within it, -and the same for the outputs. -We show these bindings as undirected edges in our diagrams, as in the figure. -Dataflow edges always connect two nodes within the \emph{same} graph, -representing data transfer between the two nodes. -Bindings always connect inputs or outputs of a parent node with those of -the nodes in a child graph, and they represent a local assignment or renaming -of input and output data. - -%------------------------------------------------------------------------------ -\subsubsection{Streaming Edges} -\label{sec:design:streaming} -%------------------------------------------------------------------------------ - -Additionally, \NAME{} defines a special type of dataflow edge which we -call a streaming edge, shown as dashed arrows instead of solid ones. -Instead of a one-time data transfer that is expressed using -ordinary dataflow edges, a streaming edge denotes that data items will be -repeatedly transferred though this edge, and thus will need to be processed by -the destination dataflow node. This allows the dataflow graph to express -pipelining, as all nodes with incoming streaming edges will continue executing -until the stream of data is finished. -The stream processing is initiated and terminated by the code that sets up -and initiates execution of the dataflow graph. - -In Figure~\ref{fig:designexample}, the node \texttt{Laplacian Estimate} is a -stage in an image processing pipeline that operates on a stream of -incoming images. -The edge $I$ represents this stream. -Correspondingly, $I_d$, $I_e$ and the Laplacian estimate -$L$ are all streaming edges: they compute intermediate results and -outputs for the Laplacian for each input image. - -If a node has both streaming and ordinary input dataflow edges -(e.g., $I$ and $B$ to node \texttt{Laplacian Estimate}), -the simple edges repeatedly transfer the same data for each node execution, -which in practice can be treated as a constant across node executions. -This optimization allows unnecessary data transfers to be avoided. - -%------------------------------------------------------------------------------ -\subsection{Vector Instructions} -\label{sec:design:vector} -%------------------------------------------------------------------------------ - -The leaf nodes of a dataflow graph express the single-threaded -parts of the computation. -They contain ordinary LLVM IR, which includes both scalar and vector -instructions. -The LLVM virtual instruction set can be translated down for execution on -a wide range of hardware, which provides a high degree of retargetability -for \NAME{}. - -We extend the LLVM vector instruction set with parametric vector -lengths to enable better performance portability, i.e., more -efficient execution of the same code on various vector hardware. -Evaluating the effect of -parametric vector length on performance is out of the scope of this paper, as -for now we only support one vector target. - -The LLVM IR provided for the {\texttt LinearComputation} in -Figure~\ref{fig:designexample} contains vector instructions, showing vector -parallelism at the leaf level. -The vector lengths are parametric, and are computed from the hardware vector -length returned by \%llvm.visc.getVectorLength(i32 sz), which is a translation-time -constant for a given hardware compute unit. - -%------------------------------------------------------------------------------ -\subsection{Integration with Host Code} -\label{sec:design:host} -%------------------------------------------------------------------------------ - -\NAME{} is aimed to represent operations whose execution would benefit -from executing on data-parallel hardware such as GPUs, vectors, and other -accelerators. -It is not intended for code that performs operations that are -typically executed as host code. The host code contains ordinary LLVM IR for -performing operations that cannot or should not be executed in accelerators such -as file I/O, operations or calls to external libraries that may contain these -operations, as well as initialization, memory allocation, or high level control -flow decisions. - -To integrate \NAME{}, the host code creates one or more Root dataflow -nodes, each with a single dynamic instance, each containing a dataflow graph. -Instantiating a root node at runtime translates to launching the execution of -the contained dataflow graph. The result of this operation is the result of the -dataflow graph execution, and can be accessed by the host code. The launch -operation is asynchronous, allowing the host code to continue executing -concurrently with the dataflow graph. The host code can also wait on the result -of a dataflow graph execution at any point after launching the execution of that -graph, ensuring that the computation is complete before accessing the result. - -%Moreover, a kernel represented by a dataflow graph can itself be executed on -%the host, as well as on other compute units such as a GPU. -%For example, a program may consist of a set of data-parallel compute kernels -%and some (possibly parallel) host code, and the kernels may exchange data -%explicitly with each other and with the host. -%Such a program could be represented as a two-level dataflow graph, where nodes -%in the top level graph represent the individual kernels, -%and each node for a data-parallel kernel is (hierarchically) itself a dataflow -%graph representing the parallelism within the kernel. -%The nodes of the latter graph would be vector LLVM instructions, typically -%with side effects due to load and store instructions. - - -%------------------------------------------------------------------------------ -\subsection{Implementation} -\label{sec:design:impl} -%------------------------------------------------------------------------------ - -\lstinputlisting[float=*, language=llvm, caption=\NAME{} code for Laplacian -node in Figure~\ref{fig:designexample}, label={lst:laplacian}]{Code/laplacian.ll} - -We have implemented \NAME{} as an extension of the -LLVM virtual instruction set. -We define new instructions for manipulating and querying the -structure of the data flow graph, as well as initiating execution of a dataflow -graph. -To minimize interference with existing LLVM compiler passes, we express the -new instructions as function calls to intrinsic functions, a standard -LLVM mechanism to extend the instruction set and communicate back end-specific -information to a particular back end. A call to an -intrinsic function appears to existing LLVM passes as a function call to an -external function, i.e., it can only have side effects on externally visible -global variables and on memory reachable through pointer arguments. -This mechanism ensures that they do not -perform any transformations that interfere with these instructions. - -The functionality of each dataflow node is described by an explicit LLVM -function. Functions describing internal nodes may only contain -calls to \NAME{} intrinsics. -Functions describing leaf nodes contain LLVM code with scalar and vector -instructions and may -also contain \NAME{} intrinsics used to query -information about the structure of the dataflow graph; -in particular, leaf nodes cannot use the intrinsics to define new graphs. - -The LLVM dataflow intrinsics must refer to graph nodes and edges, -in order to manipulate or query information about them. -We represent dataflow nodes with opaque handles (pointers) and input and -output edges of a node as integer indices. This allows the -backend translator to define the structure and runtime representation of the -nodes and edges. The LLVM type i8* is used for the opaque node handles. -The \NAME{} intrinsics, divided according to their functionality, are described -briefly in the following subsections. - -%------------------------------------------------------------------------------ -\subsubsection{\NAME{} Graph Intrinsics} -\label{sec:design:impl:intrinsics} -%------------------------------------------------------------------------------ - -\NAME{} intrinsics manipulating the structure of the dataflow graph: -\begin{itemize} -%% -% \item \textbf{i8* llvm.visc.createNode (Function* F)}: Create a dataflow node -% associated with the function {\texttt F}. -% - \item \textbf{i8* llvm.visc.createNode1D(Function* F, int n)}: Create a - dataflow node with n dynamic instances, all associated with the function - {\texttt F}. Returns the opaque handle for the node. - There are also 2D and 3D versions of this intrinsic, - which take two and three integer arguments instead of one. -% - \item \textbf{void llvm.visc.createEdge (i8* Src, i8* Dst, i32 sp, i32 dp, i1 - ReplType)}: Create a dataflow edge - from node {\texttt Src} to node {\texttt Dst} in the static dataflow graph. - The {\texttt ReplType} argument specifies the pattern of replication for - the static edge: {\tt OneToOne} or {\tt AllToAll}. - $sp$ and $dp$ specify the indices of the output of node {\tt Src} and the - input of node {\tt Dst} that are connected by the edge; these - connections are the same for all dynamic instances of the nodes, in either - pattern of replication. -% - \item \textbf{void llvm.visc.createStreamingEdge (i8* Src, i8* Dst, i32 sp, - i32 dp, i1 ReplType)}: Similarly, but create a streaming dataflow - edge. -% - \item \textbf{void llvm.visc.bind.input (i8* N, i32 ip, i32 ic)}: Map input - {\texttt ip} of current dataflow node to input {\texttt ic} of child node - {\texttt N}. -% - \item \textbf{void llvm.visc.bind.output (i8* N, i32 ic, i32 ip)}: Map output - {\texttt ic} of - child node N to output {\texttt ip} of current dataflow node. - {\texttt N}. -% -\end{itemize} - -%------------------------------------------------------------------------------ -\subsubsection{\NAME{} Query Intrinsics} -\label{sec:design:impl:query} -%------------------------------------------------------------------------------ - -\NAME{} intrinsics quering the structure of the dataflow graph: -\begin{itemize} -% - \item \textbf{ i8* llvm.visc.getNode()}: Return a handle to the dataflow graph - node associated with the calling function, i.e. the current node. -% - \item \textbf{i8* llvm.visv.getParentNode (i8* N)}: Return a handle to the - hierarchical parent of dataflow graph node {\texttt N}. -%\item \textbf{i32 llvm.visc.getNumDims (i8* N)}: Return the num of dimensions -%of a dynamic node instance of dataflow node N with respect to its -%parent node. -% - \item \textbf{i32 llvm.visc.getNodeInstanceID.[xyz] (i8* N)}: Return the - index of the dynamic node instance of dataflow node N with respect to its - parent node in dimension {\texttt x}, {\texttt y} or {\texttt z}. - ({\tt z} is only valid if node $N$ is replicated in 3D, and {\tt y} in - 2D or 3D.) -% - \item \textbf{i32 llvm.visc.getNumNodeInstances.[xyz] (i8* N)}: Return the - number of dynamic instances of dataflow node N in dimension {\texttt x}, - {\texttt y} or {\texttt z}. -% - \item \textbf{i32 llvm.visc.getVectorLength(i32 typeSz)}: Return a symbolic - constant representing the vector register length in the underlying - architecture for a type of size {\texttt typeSz}. -% -\end{itemize} - -%------------------------------------------------------------------------------ -\subsubsection{\NAME{} Launch Intrinsics} -\label{sec:design:impl:launch} -%------------------------------------------------------------------------------ - -\NAME{} intrinsics integrating a dataflow graph in the host code: - -\begin{itemize} -% - \item \textbf{ i8* llvm.visc.launch(Function* F, argList, struct OutType* out)} : - This is a - variation of {\texttt i8* llvm.visc.createNode (Function* F)} designed to - allow for host variables to be passed to graph node inputs and results to be - returned (unlike - dataflow edges, which pass node outputs to other node inputs). - It creates a single dynamic instance of a Root - dataflow node and associates it with the function {\texttt F}, using - {\texttt argList} as arguments. - The struct {\tt out} is allocated by the caller and is used to - return results from the execution of - the Root node; its type {\tt OutType} must match the return type of - {\tt F}. - The new node is marked as ready for asynchronous execution and control is - returned to the host. - Returns an opaque handle for the node. -% - \item \textbf{void llvm.visc.wait(i8* N)}: Block until execution of dataflow - node {\texttt N} is complete. -% -\end{itemize} diff --git a/paper/hetero.bib b/paper/hetero.bib deleted file mode 100644 index 62fb8e91fbcebd699e76be8ff341926f6f56d1dc..0000000000000000000000000000000000000000 --- a/paper/hetero.bib +++ /dev/null @@ -1,1428 +0,0 @@ -@inproceedings{Ohshima:VECPAR06, - author = {Ohshima, Satoshi and Kise, Kenji and Katagiri, Takahiro and Yuba, Toshitsugu}, - title = {Parallel processing of matrix multiplication in a CPU and GPU heterogeneous environment}, - booktitle = {Proceedings of the 7th international conference on High performance computing for computational science}, - series = {VECPAR'06}, - year = {2007}, - isbn = {978-3-540-71350-0}, - location = {Rio de Janeiro, Brazil}, - pages = {305--318}, - numpages = {14}, - url = {http://dl.acm.org/citation.cfm?id=1761728.1761755}, - acmid = {1761755}, - publisher = {Springer-Verlag}, - address = {Berlin, Heidelberg}, -} - -@inproceedings{Lee:ISCA10, - author = {Lee, Victor W. and Kim, Changkyu and Chhugani, Jatin and Deisher, Michael and Kim, Daehyun and Nguyen, Anthony D. and Satish, Nadathur and Smelyanskiy, Mikhail and Chennupaty, Srinivas and Hammarlund, Per and Singhal, Ronak and Dubey, Pradeep}, - title = {Debunking the 100X GPU vs. CPU myth: an evaluation of throughput computing on CPU and GPU}, - booktitle = {Proceedings of the 37th annual international symposium on Computer architecture}, - series = {ISCA '10}, - year = {2010}, - isbn = {978-1-4503-0053-7}, - location = {Saint-Malo, France}, - pages = {451--460}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/1815961.1816021}, - doi = {http://doi.acm.org/10.1145/1815961.1816021}, - acmid = {1816021}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {cpu architecture, gpu architecture, performance analysis, performance measurement, software optimization, throughput computing}, -} - -@inproceedings{Wu:ICS05, - author = {Wu, Peng and Eichenberger, Alexandre E. and Wang, Amy and Zhao, Peng}, - title = {An integrated simdization framework using virtual vectors}, - booktitle = {Proceedings of the 19th annual international conference on Supercomputing}, - series = {ICS '05}, - year = {2005}, - isbn = {1-59593-167-8}, - location = {Cambridge, Massachusetts}, - pages = {169--178}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/1088149.1088172}, - doi = {http://doi.acm.org/10.1145/1088149.1088172}, - acmid = {1088172}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@inproceedings{VaporSIMD:CGO11, - author = { Nuzman, D. and Dyshel, S. and Rohou, E. and Rosen, I. and - Williams, K. and Yuste, D. and Cohen, A. and Zaks, A.}, - title = {Vapor SIMD: Auto-vectorize once, run everywhere}, - booktitle = {9th Annual IEEE/ACM International Symposium on Code Generation - and Optimization (CGO)}, - year = 2011, - month = April, - pages = {151--160}, - addresss={Chamonix, France} -} - -@techreport{MLCReport:TR10, -author = { K. Nahrstedt and L. Angrave and M. Caccamo and R. Campbell and B. Godfrey and I. Gupta and K. Karahalios and R. Kravets and S. Kamin and S. Poole and W. Sanders }, -title = "Mobile Learning Communities à Are We There Yet?", -institution = "Information Trust Institute, University of Illinois at Urbana-Champaign", -month = {Nov}, -year = {2010} -} - -@article{CogniServe:Micro11, - title = {CogniServe: Heterogeneous Server Architecture for Large-Scale Recognition}, - author = {Ravi Iyer and Sadagopan Srinivasan and Omesh Tickoo and Zhen Fang and Ramesh Illikkal and Steven Zhang and Vineet Chadha and Paul M. Stillwell and Seung Eun Lee}, - year = {2011}, - doi = {http://dx.doi.org/10.1109/MM.2011.37}, - tags = {architecture}, - researchr = {http://researchr.org/publication/IyerSTFIZCSL11}, - cites = {0}, - citedby = {0}, - journal = {IEEE Micro}, - volume = {31}, - number = {3}, - pages = {20-31}, -} - - -@techreport{SARNeeds:TR04, -author = { James Wong and Cassandra Robinson }, -title = "Urban Search and Rescue Technology Needs: Identification of Needs", -number = 207771, -institution = "Savannah River National Laboratory", -month = {Nov}, -year = {2004}, -note = "Available at https://www.ncjrs.gov/pdffiles1/nij/grants/207771.pdf" -} - -@ARTICLE{CDSC:IEEEDTC11, -author={Cong, J. and Reinman, G. and Bui, A. and Sarkar, V.}, -journal={Design Test of Computers, IEEE}, -title={Customizable Domain-Specific Computing}, -year={2011}, -month={march-april }, -volume={28}, -number={2}, -pages={6 -15}, -keywords={computing industry;customizable domain specific computing;heat dissipation;parallel general purpose computing system;parallelization era;power density limitation;power performance efficiency;general purpose computers;parallel processing;}, -doi={10.1109/MDT.2010.141}, -ISSN={0740-7475},} - -@BOOK{KFBOOK, -TITLE = {Probabilistic Graphical Models: Principles and Techniques}, -AUTHOR = { Daphne Koller and Nir Friedman}, -PUBLISHER = {The MIT Press}, -EDITION = {1st}, -YEAR = {2009}, -} - -@BOOK{MLBOOK, -TITLE = {Machine Learning}, -AUTHOR = {Tom M. Mitchel}, -PUBLISHER = {McGraw-Hill}, -EDITION = {1st}, -YEAR = {1997}, -} - -@ARTICLE{ICCVTUT, -author={A. Blake and P. Kohli and M.P. Kumar and C. Rother}, -journal={tutorial at IEEE International Conference on Computer Vision}, -title={Introduction to MAP Inference in Discrete Models}, -year={2009}, -url={http://research.microsoft.com/en-us/um/cambridge/projects/tutorial/}, -} - -@INPROCEEDINGS{CUDACUTS, -author={Vineet, V. and Narayanan, P.J.}, -booktitle={Computer Vision and Pattern Recognition Workshops, 2008. CVPRW '08. IEEE Computer Society Conference on}, -title={CUDA cuts: Fast graph cuts on the GPU}, -year={2008}, -month={june}, -volume={}, -number={}, -pages={1 -8}, -keywords={CUDA cuts;GPU;Nvidia 8800 GTX;graph cuts;graphics processor unit;image restoration;image segmentation;maxflow algorithm;mincut algorithm;push-relabel algorithm;stereo vision;computer graphic equipment;computer graphics;image restoration;image segmentation;stereo image processing;}, -doi={10.1109/CVPRW.2008.4563095}, -ISSN={},} - -@ARTICLE{LBPVLSI, -author={Chia-Kai Liang and Chao-Chung Cheng and Yen-Chieh Lai and Liang-Gee Chen and Chen, H.H.}, -journal={Circuits and Systems for Video Technology, IEEE Transactions on}, -title={Hardware-Efficient Belief Propagation}, -year={2011}, -month={may }, -volume={21}, -number={5}, -pages={525 -537}, -keywords={Markov random field;graphical model;graphics processing unit;hardware implementation;hardware-efficient belief propagation;loopy belief propagation;message passing;robust functions;stereo matching;tile-wise processing;very large-scale integration circuit;Markov processes;VLSI;belief networks;graphical user interfaces;message passing;}, -doi={10.1109/TCSVT.2011.2125570}, -ISSN={1051-8215},} - -@techreport{CHOI, -author = {Jaesik Choi}, -title={Hardware implementation of MRF MAP estimation on FPGA platform, Ph.D. qualifying examination report, unpublished mss}, -institution={Dept of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign}, -month = {Nov}, -year = {2011} -} - -@ARTICLE{TRWS, -author={Kolmogorov, V.}, -journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, -title={Convergent Tree-Reweighted Message Passing for Energy Minimization}, -year={2006}, -month={oct. }, -volume={28}, -number={10}, -pages={1568 -1583}, -keywords={computer vision;convergent tree-reweighted message passing;discrete energy minimization;max-product message passing;computer vision;message passing;trees (mathematics);}, -doi={10.1109/TPAMI.2006.200}, -ISSN={0162-8828},} - -@misc{CONVEY, - author = {}, - title = {{Convey HC-1 Family}}, - howpublished = {http:// www.conveycomputer.com} -} - -@article{AutoPilot, - address = {Dordrecht}, - author = {Zhang, Zhiru and Fan, Yiping and Jiang, Wei and Han, Guoling and Yang, Changqi and Cong, Jason}, - booktitle = {High-Level Synthesis}, - chapter = {6}, - doi = {10.1007/978-1-4020-8588-8\_6}, - editor = {Coussy, Philippe and Morawiec, Adam}, - isbn = {978-1-4020-8587-1}, - keywords = {autoesl, hls, springer-book}, - pages = {99--112}, - posted-at = {2011-10-03 14:09:35}, - priority = {2}, - publisher = {Springer Netherlands}, - title = {{AutoPilot}: A {Platform-Based} {ESL} Synthesis System}, - url = {http://dx.doi.org/10.1007/978-1-4020-8588-8\_6}, - year = {2008} -} - -@misc{Impulse, - author = {}, - title = {{Impulse Accelerated Technologies, Impulse CoDeveloper}}, - howpublished = {http://www.impulseaccelerated.com/}, - year = {2010}, -} - -@misc{Altera, - author = {}, - title = {{Altera, ONIOS II C2H Compiler User Guide}}, - howpublished = {http://www.altera.com/literature/ug/ug_nios2_c2h_compiler.pdf}, - year = {2009}, -} - -@misc{MentorGraphics, - author = {}, - title = {{Mentor Graphics, Catapult C Synthesis}}, - howpublished = {http://www.mentor.com/products/esl/high_level_synthesis/catapult_synthesis/}, - year = {2010}, -} - -@misc{RenderScript, - author = {}, - title = {{RenderScript}}, - howpublished = {\url {http://developer.android.com/guide/topics/renderscript/compute.html}}, -} - -@misc{OpenACC, - author = {}, - title = {{OpenACC-Standard}}, - howpublished = {\url {http://www.openacc-standard.org/}}, -} - -@misc{HSAIL, - author = {}, - title = {{HSAIL}}, - howpublished = {\url {http://www.hsafoundation.com/standards/}}, -} - -@INPROCEEDINGS{GAUT, -author={P. Coussy and G. Lhairech-Lebreton}, -booktitle={DATE}, -title={GAUT: An Open Source High-Level Synthesis Tool}, -year={2009}, -month={}, -volume={}, -number={}, -ISSN={},} - -@INPROCEEDINGS{NISC, -author={B. Gorjiara and D. Gajski}, -booktitle={Workshop on Embedded Systems for Real-time Multimedia (ESTIMEDIA)}, -title={Design Space Exploration of C Programs Using NISC: A Case-Study on DCT algorithm}, -year={2005}, -month={}, -volume={}, -number={}, -ISSN={},} - -@article{Diniz2005, -title = "Automatic mapping of C to FPGAs with the DEFACTO compilation and synthesis system", -journal = "Microprocessors and Microsystems", -volume = "29", -number = "2-3", -pages = "51 - 62", -year = "2005", -note = "<ce:title>Special Issue on FPGA Tools and Techniques</ce:title>", -issn = "0141-9331", -doi = "10.1016/j.micpro.2004.06.007", -url = "http://www.sciencedirect.com/science/article/pii/S0141933104000869", -author = "Pedro Diniz and Mary Hall and Joonseok Park and Byoungro So and Heidi Ziegler", -keywords = "Design automation", -keywords = "Parallelizing compiler technology and data dependence analysis", -keywords = "Behavioral synthesis and estimation", -keywords = "Reconfigurable computing", -keywords = "Field-programmable-gate-arrays (FPGAs)" -} - -@BOOK{SPARK, -TITLE = {SPARK: a parallelizing approach to the high-level synthesis of digital circuits}, -AUTHOR = {Sumit Gupta and Rajesh Gupta and Nikil D. Dutt}, -PUBLISHER = {Springer}, -EDITION = {}, -month = {June}, -YEAR = {2009}, -} - -@article{PAPA2009, -author = {Alexandros Papakonstantinou and Karthik Gururaj and John A. Stratton and Deming Chen and Jason Cong and Wen-Mei W. Hwu}, -title = {FCUDA: Enabling efficient compilation of CUDA kernels onto FPGAs}, -journal ={Application Specific Processors, Symposium on}, -volume = {0}, -isbn = {978-1-4244-4939-2}, -year = {2009}, -pages = {35-42}, -doi = {http://doi.ieeecomputersociety.org/10.1109/SASP.2009.5226333}, -publisher = {IEEE Computer Society}, -address = {Los Alamitos, CA, USA}, -} - -@INPROCEEDINGS{PAPA2011, - author={Papakonstantinou, A. and Yun Liang and Stratton, J.A. and Gururaj, K. and Deming Chen and Hwu, W.-M.W. and Cong, J.}, - booktitle={2011 IEEE 19th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)}, - title={Multilevel Granularity Parallelism Synthesis on FPGAs}, - year={2011}, - month={may}, - volume={}, - number={}, - pages={178 -185}, - keywords={CUDA kernel mapping;FPGA programming;FPGA-based accelerator;abstraction level;coarse grained parallelism;design layout information;design space search heuristic;hardware spatial parallelism;high-level synthesis technique;lengthy logic synthesis;multigranularity parallelism extraction;multilevel granularity parallelism synthesis;performance evaluation;physical design flow;reconfigurable computing;field programmable gate arrays;integrated circuit layout;logic design;}, - doi={10.1109/FCCM.2011.29}, - ISSN={}, -} - -@INPROCEEDINGS{vu-iShare-globecomm2010, - author={Long Vu and Nahrstedt, K. and Rimac, I. and Hilt, V. and Hofmann, M.}, - booktitle={2010 IEEE GLOBECOM Workshops (GC Wkshps)}, - title={iShare: Exploiting opportunistic ad hoc connections for improving data download of cellular users}, - year={2010}, - month={December}, - volume={}, - number={}, - pages={1475 -1480}, - keywords={ad hoc communication;ad hoc mesh network;cellular broadcast channel;cellular link user;cellular unicast channel;data download;iShare;mobile device;sharing protocol;tit-for-tat incentive mechanism;tree-based protocol;broadcast channels;cellular radio;mobile ad hoc networks;protocols;wireless mesh networks;}, - doi={10.1109/GLOCOMW.2010.5700183}, - ISSN={}, -} - -@ARTICLE{yuan-grace1-ieeeMobComp, - author={Wanghong Yuan and Nahrstedt, K. and Adve, S.V. and Jones, D.L. and Kravets, R.H.}, - journal={IEEE Transactions on Mobile Computing}, - title={GRACE-1: cross-layer adaptation for multimedia quality and battery energy}, - year={2006}, - month={July}, - volume={5}, - number={7}, - pages={ 799 - 815}, - keywords={ GRACE-1; battery energy; cross-layer adaptation; mobile devices; multimedia quality; multiple system layers; video quality; mobile computing; multimedia communication; quality of service;}, - doi={10.1109/TMC.2006.98}, - ISSN={1536-1233}, -} - -@inproceedings{yuan-rtMobileSched-sosp03, - author = {Yuan, Wanghong and Nahrstedt, Klara}, - title = {Energy-efficient soft real-time CPU scheduling for mobile multimedia systems}, - booktitle = {Proceedings of the Nineteenth ACM symposium on Operating Systems Principles}, - series = {SOSP '03}, - year = {2003}, - isbn = {1-58113-757-5}, - location = {Bolton Landing, NY, USA}, - pages = {149--163}, - numpages = {15}, - url = {http://doi.acm.org/10.1145/945445.945460}, - doi = {http://doi.acm.org/10.1145/945445.945460}, - acmid = {945460}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {mobile computing, multimedia, power management}, -} - -@article{yuan-multimediaSched-acmTOCS06, - author = {Yuan, Wanghong and Nahrstedt, Klara}, - title = {Energy-efficient CPU scheduling for multimedia applications}, - journal = {ACM Trans. Comput. Syst.}, - volume = {24}, - issue = {3}, - month = {August}, - year = {2006}, - issn = {0734-2071}, - pages = {292--331}, - numpages = {40}, - url = {http://doi.acm.org/10.1145/1151690.1151693}, - doi = {http://doi.acm.org/10.1145/1151690.1151693}, - acmid = {1151693}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {Power management, mobile computing, multimedia, soft real-time}, -} - -@INPROCEEDINGS{yuan-recalendar-percom03, - author={Wanghong Yuan and Nahrstedt, K.}, - booktitle={Proceedings of the First IEEE International Conference on Pervasive Computing and Communications, 2003 (PerCom 2003)}, - title={ReCalendar: calendaring and scheduling applications with CPU and energy resource guarantees for mobile devices}, - year={2003}, - month={march}, - volume={}, - number={}, - pages={ 425 - 432}, - keywords={ CPU advance reservation; CPU reservations; CPU resource guarantees; CPUfrequency/voltage adaptation; ReCalendar; admitted reservations; advance reservation scheme; calendaring applications; energy reservations; energy resource guarantees; mobile devices; resource manager; scheduling applications; soft real-time applications; computer power supplies; mobile communication; personal computing; real-time systems; resource allocation; scheduling; telecommunication computing; telecommunication congestion control;}, - doi={10.1109/PERCOM.2003.1192767}, - ISSN={}, -} - -@inproceedings{vu-btWiFi-mswim10, - author = {Vu, Long and Nahrstedt, Klara and Retika, Samuel and Gupta, Indranil}, - title = {Joint bluetooth/wifi scanning framework for characterizing and leveraging people movement in university campus}, - booktitle = {Proceedings of the 13th ACM international conference on Modeling, analysis, and simulation of wireless and mobile systems}, - series = {MSWIM '10}, - year = {2010}, - isbn = {978-1-4503-0274-6}, - location = {Bodrum, Turkey}, - pages = {257--265}, - numpages = {9}, - url = {http://doi.acm.org/10.1145/1868521.1868563}, - doi = {http://doi.acm.org/10.1145/1868521.1868563}, - acmid = {1868563}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {android phone, bluetooth trace, people movement characterization, wifi trace}, -} - -@article{vu-jyotish-percom11, - author = {Long Vu and Quang Do and Klara Nahrstedt}, - title = {Jyotish: Constructive approach for context predictions of people movement from joint Wifi/Bluetooth trace}, - journal = {Pervasive and Mobile Computing}, - booktitle = {The Ninth Annual IEEE International Conference on Pervasive Computing and Communications (PerCom 2011)}, - volume = {7}, - number = {6}, - pages = {690 - 704}, - year = {2011}, - issn = {1574-1192}, - doi = {10.1016/j.pmcj.2011.07.004}, - url = {http://www.sciencedirect.com/science/article/pii/S1574119211001167}, - keywords = {People movement prediction, People movement trace, Wifi trace, Bluetooth trace}, -} - -% MDS: I had to fill this in manually, so there may be some slightly incorrect -% information, especially in the volume/number. -@article{vu-coada-ijaras11, - author = {Vu, Long and Nahrstedt, Klara and Malik, Rahul and Wang, Qiyan}, - title = {COADA: Leveraging Dynamic Coalition Peer-to-Peer Network for Adaptive Content Download of Cellular Users}, - journal = {International Journal of Adaptive, Resilient, and Autonomic Systems (IJARAS)}, - year = {2011}, - volume = {2}, - number = {2}, - pages = {1 - 22}, - url = {http://www.igi-global.com/viewtitlesample.aspx?id=53463}, - publisher = {IGI Publisher}, -} - -@INPROCEEDINGS{vu-3R-wowmom11, - author={Long Vu and Quang Do and Nahrstedt, K.}, - booktitle={2011 IEEE International Symposium on a World of Wireless, Mobile and Multimedia Networks (WoWMoM 2011)}, - title={3R: Fine-grained encounter-based routing in Delay Tolerant Networks}, - year={2011}, - month={june}, - volume={}, - number={}, - pages={1 -6}, - keywords={3R routing protocol;delay tolerant networks;epidemic routing protocols;fine-grained encounter-based routing;large-scale Bluetooth;message delivery probability;mobile nodes;mobile users;prophet routing protocols;Bluetooth;mobile radio;routing protocols;}, - doi={10.1109/WoWMoM.2011.5986470}, - ISSN={} -} - -% DeNovo papers -@inproceedings{KomuravelliAdve2012, - author = {Rakesh Komuravelli and Sarita V. Adve and Ching-Tsun Chou}, - title = {{Revisiting the Complexity of Hardware Cache Coherence and Some Implications}}, - booktitle = {{Submitted for publication to the 26th IEEE International Parallel and Distributed Processing Symposium (IPDPS)}}, - year = {2012}, -} - -@inproceedings{ChoiKomuravelli2011, - author = {Byn Choi and Rakesh Komuravelli and Hyojin Sung and Robert Smolinski and Nima Honarmand and Sarita V. Adve and Vikram S. Adve and Nicholas P. Carter and Ching-Tsun Chou}, - title = {{DeNovo: Rethinking the Memory Hierarchy for Disciplined Parallelism}}, - booktitle = {{20th International Conference on Parallel Architectures and Compilation Techniques (PACT 2011)}}, - year = {2011}, - month = {October}, -} - -@inproceedings{SungKomuravelli2012, - author = {Hyojin Sung and Rakesh Komuravelli and Sarita V. Adve}, - title = {{Efficient Hardware Support for Disciplined Non-Determinism}}, - booktitle = {{Submitted for publication to Proceedings of the 39th annual International Symposium on Computer Architecture (ISCA), 2012}}, - year = {2012} -} - -@INPROCEEDINGS{RanganathanAdve1999, - author={Ranganathan, P. and Adve, S. and Jouppi, N.P.}, - booktitle={Proceedings of the 27th International Symposium on Computer Architecture, 2000.}, - title={Reconfigurable caches and their application to media processing}, - year={2000}, - month={june}, - volume={}, - number={}, - pages={214 -224}, - keywords={cache SRAM arrays;databases;general-purpose processors;media processing;on-chip transistors;reconfigurable cache design;reconfigurable caches;reconfigurable caches-instruction reuse;SRAM chips;cache storage;reconfigurable architectures;}, - doi={}, - ISSN={1063-6897}} - -@inproceedings{BanakarSteinke2002, - author = {Banakar, Rajeshwari and Steinke, Stefan and Lee, Bo-Sik and Balakrishnan, M. and Marwedel, Peter}, - title = {{Scratchpad memory: design alternative for cache on-chip memory in embedded systems}}, - booktitle = {Proceedings of the tenth international symposium on Hardware/software codesign}}, - series = {CODES '02}, - year = {2002}, - isbn = {1-58113-542-4}, - location = {Estes Park, Colorado}, - pages = {73--78}, - year = 2002, - numpages = {6}, - url = {http://doi.acm.org/10.1145/774789.774805}, - doi = {http://doi.acm.org/10.1145/774789.774805}, - acmid = {774805}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@incollection {DominguezUdayakumaran2005, - journal = {{Journal of Embedded Computing}}, - booktitle = {{Journal of Embedded Computing}}, - title = {{Heap data allocation to scratch-pad memory in embedded systems}}, - volume = {1}, - issue = {4}, - pages = {521 - 540}, - year = {2005}, - month = {January}, - author = {Angel Dominguez and Sumesh Udayakumaran and Rajeev Barua}, - url = {http://iospress.metapress.com/content/967JFFUMFVPWJ0Y9}, -} - -@article{SasankaLi2007, - author = {Sasanka, Ruchira and Li, Man-Lap and Adve, Sarita V. and Chen, Yen-Kuang and Debes, Eric}, - title = {{ALP: Efficient support for all levels of parallelism for complex media applications}}, - journal = {{ACM Trans. Archit. Code Optim.}}, - volume = {4}, - issue = {1}, - month = {March}, - year = {2007}, - issn = {1544-3566}, - articleno = {3}, - url = {http://doi.acm.org/10.1145/1216544.1216546}, - doi = {http://doi.acm.org/10.1145/1216544.1216546}, - acmid = {1216546}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {DLP, Parallelism, SIMD, TLP, data-level parallelism, media applications, multimedia, vector}, -} - -%%%%%%%%% -% These come from various sections, but I put them all here for now to have -% them in a centralized place. -%%% motivation.tex -% I'm not convinced this is the correct article to cite for the first one. -@inproceedings{Borkar:2010, - author={Borkar, S.}, - title= {The Exascale Challenge}, - booktitle = {Keynote at 20th International Conference on Parallel Architectures and Compilation Techniques (PACT 2011)}, - year={2011}, - month={October}, - volume={}, - number={}, - pages={}, -} - -@inproceedings{DarkSilicon:ISCA11, - author={Hadi Esmaeilzadeh and Emily Blem and Renee St. Amant and Karthikeyan Sankaralingam and Doug Burger}, - title={{Dark Silicon and the End of Multicore Scaling}}, - booktitle="{Proceedings of the 38th International Symposium on Computer Architecture}", - year={2011}, - bib2html_dl_pdf = {http://bit.ly/fmPjY4}, - bib2html_pubtype = {Refereed Conference}, - bib2html_rescat = {Architecture}, - MONTH = {June} -} - -@INPROCEEDINGS{Hameed:ISCA10, - author = {Rehan Hameed and Wajahat Qadeer and Megan Wachs and Omid Azizi and Alex Solomatnikov and Benjamin C. Lee and Stephen Richardson and Christos Kozyrakis and Mark Horowitz}, - title = {Understanding sources of inefficiency in general-purpose chips}, - booktitle = {In Proceedings of the 37th Annual International Symposium on Computer Architecture (ISCA 2010)}, - year = {2010}, - url = {http://www.duke.edu/~BCL15/documents/hameed2010-isca-h264.pdf}, -} - -@misc{ARM, - author = {Kevin Krewell}, - title = {ARM Pairs Cortex-A7 With A15}, - howpublished = {http://www.linleygroup.com/newsletters/newsletter\_detail.php?num=4764}, - year = {2011}, - month = {November}, -} - -@INPROCEEDINGS{FCUDA:SASP09, - author={Papakonstantinou, A. and Gururaj, K. and Stratton, J.A. and Chen, D. and Cong, J. and Hwu, W.-M.W.}, - booktitle={Application Specific Processors, 2009. SASP '09. IEEE 7th Symposium on}, - title={FCUDA: Enabling efficient compilation of CUDA kernels onto FPGAs}, - year={2009}, - month={July}, - volume={}, - number={}, - pages={35 -42}, - keywords={CUDA kernel;FPGA programming;Moores law;application program interface;clock frequency;compute unified device architecture;computing industry;field programmable gate array;graphics processing unit;multicore system;multiprocessor system;parallel processing;performance per watt boosting;power dissipation;application program interfaces;field programmable gate arrays;multiprocessing systems;parallel architectures;}, - doi={10.1109/SASP.2009.5226333}, - ISSN={}, -} - -@misc{PTX2.3Manual:URL, - author = {Nvidia Compute}, - title = {PTX: Parallel Thread Execution ISA Version 2.3}, - howpublished = {\url{http://developer.download.nvidia.com/compute/DevZone/docs/html/C/doc/ptx_isa_2.3.pdf}}, - year = {2011}, -} - -@inproceedings{LLVA:MICRO03, - author = {Adve, Vikram and Lattner, Chris and Brukman, Michael and Shukla, Anand and Gaeke, Brian}, - title = {LLVA: A Low-level Virtual Instruction Set Architecture}, - booktitle = {Proceedings of the 36th annual IEEE/ACM International Symposium on Microarchitecture}, - series = {MICRO 36}, - year = {2003}, - isbn = {0-7695-2043-X}, - pages = {205--}, - url = {http://dl.acm.org/citation.cfm?id=956417.956545}, - acmid = {956545}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, -} - -@inproceedings{VectorLLVA:VEE06, - author = {Bocchino,Jr., Robert L. and Adve, Vikram S.}, - title = {{Vector LLVA: a virtual vector instruction set for media processing}}, - booktitle = {Proceedings of the 2nd international conference on Virtual execution environments}, - series = {VEE '06}, - year = {2006}, - isbn = {1-59593-332-8}, - location = {Ottawa, Ontario, Canada}, - pages = {46--56}, - numpages = {11}, - url = {http://doi.acm.org/10.1145/1134760.1134769}, - doi = {http://doi.acm.org/10.1145/1134760.1134769}, - acmid = {1134769}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {SIMD, multimedia, vector, virtual instruction sets}, -} - -@inproceedings{SVA:SOSP07, - author = {Criswell, John and Lenharth, Andrew and Dhurjati, Dinakar and Adve, Vikram}, - title = {Secure virtual architecture: a safe execution environment for commodity operating systems}, - booktitle = {Proceedings of twenty-first ACM SIGOPS symposium on Operating systems principles}, - series = {SOSP '07}, - year = {2007}, - isbn = {978-1-59593-591-5}, - location = {Stevenson, Washington, USA}, - pages = {351--366}, - numpages = {16}, - url = {http://doi.acm.org/10.1145/1294261.1294295}, - doi = {http://doi.acm.org/10.1145/1294261.1294295}, - acmid = {1294295}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {compiler, memory safety, operating systems, security, type safety, typed assembly language, virtual machine}, -} - -@inproceedings{SVA:UsenixSecurity09, - author = {Criswell, John and Geoffray, Nicolas and Adve, Vikram}, - title = {Memory safety for low-level software/hardware interactions}, - booktitle = {Proceedings of the 18th conference on USENIX security symposium}, - series = {SSYM'09}, - year = {2009}, - location = {Montreal, Canada}, - pages = {83--100}, - numpages = {18}, - url = {http://dl.acm.org/citation.cfm?id=1855768.1855774}, - acmid = {1855774}, - publisher = {USENIX Association}, - address = {Berkeley, CA, USA}, -} - -@inproceedings{Lime:OOPSLA10, - author = {Auerbach, Joshua and Bacon, David F. and Cheng, Perry and Rabbah, Rodric}, - title = {Lime: a Java-compatible and synthesizable language for heterogeneous architectures}, - booktitle = {Proceedings of the ACM international conference on Object oriented programming systems languages and applications}, - series = {OOPSLA '10}, - year = {2010}, - isbn = {978-1-4503-0203-6}, - location = {Reno/Tahoe, Nevada, USA}, - pages = {89--108}, - numpages = {20}, - url = {http://doi.acm.org/10.1145/1869459.1869469}, - doi = {http://doi.acm.org/10.1145/1869459.1869469}, - acmid = {1869469}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {fpga, functional programming, high level synthesis, object oriented, reconfigurable architecture, streaming, value type}, -} - -@misc{UPCRC:URL, - howpublished = {http://upcrc.illinois.edu/}, - author = {Universal Parallel Computing Research Center}, -} - -@misc{UPCRCSummerSchool2011:URL, - howpublished = {http://upcrc.illinois.edu/summer/2011/index.html}, - author = {UPCRC Illinois Summer School on Multicore Programming}, - month = {July}, - year = {2011}, -} - -@misc{CUDASummerSchool09:URL, - howpublished = {http://www.greatlakesconsortium.org/events/GPUMulticore/agenda.html}, - author = {Summer School 2008: Accelerators for Science and Engineering Applications: GPUs and Multicores}, - month = {August}, - year = {2008}, -} - -%%% programmability.tex -@misc{OpenCL:URL, - howpublished = {http://www.khronos.org/opencl/}, - author = {{Khronos Group -- OpenCL}}, -} - - -@misc{IntelOpenCL:URL, - howpublished = {https://software.intel.com/en-us/intel-opencl}, - author = {{Intel}}, -} - -@misc{NVVM:URL, - howpublished = {http://docs.nvidia.com/cuda/nvvm-ir-spec}, - author = {{NVVM IR Specification 1.2}}, -} - -@misc{Renderscript:URL, - title = {Renderscript}, - howpublished = {http://developer.android.com/reference/android/renderscript/package-summary.html}, - author = {{Android Developers}}, -} - -@misc{LLVMReferenceManual:URL, - howpublished = {http://llvm.org/releases/2.9/docs/LangRef.html}, - title = {{LLVM Language Reference Manual}}, -} - -@misc{ArBB:WHERE, - title = {{Array Building Blocks: A Flexible Parallel Programming Model for Multicore and Many-Core Architectures}}, - author = {Anwar Ghuloum and Amanda Sharp and Noah Clemons and Stefanus Du Toit and Rama Malladi and Mukesh Gangadhar and Michael McCool and Hans Pabst}, - howpublished = {http://drdobbs.com/go-parallel/article/showArticle.jhtml?articleID=227300084}, - year = {2010}, - month = {September}, - note = {URL}, -} - -@article{CnC:SciProg10, - title = {{Concurrent Collections}}, - author = {Zoran Budimlic and Michael Burke and Vincent Cavé and Kathleen Knobe and Geoff Lowney and Ryan Newton and Jens Palsberg and David Peixotto and Vivek Sarkar and Frank Schlimbach and Sagnak Tasirlar}, - journal = {Scientific Programming}, - year = {2010}, - pages = {203-217}, - keywords = {Computer & Communication Sciences}, - volume = {18}, - number = {3-4}, - url = {http://iospress.metapress.com/content/83w0360mk786443n/}, -} - -@incollection {StreamIt:WHERE, - author = {Thies, William and Karczmarek, Michal and Amarasinghe, Saman}, - affiliation = {Massachusetts Institute of Technology Laboratory for Computer Science Cambridge MA 02139}, - title = {StreamIt: A Language for Streaming Applications}, - booktitle = {Compiler Construction}, - series = {Lecture Notes in Computer Science}, - editor = {Horspool, R.}, - publisher = {Springer Berlin / Heidelberg}, - isbn = {978-3-540-43369-9}, - keyword = {Computer Science}, - pages = {49-84}, - volume = {2304}, - url = {http://dx.doi.org/10\.1007/3\-540\-45937\-5\_14}, - note = {10.1007/3\-540\-45937\-5\_14}, - year = {2002} -} - -@inproceedings{APA:PLDI05, - author = {Chris Lattner and Vikram Adve}, - title = "{Automatic Pool Allocation: Improving Performance by Controlling Data Structure Layout in the Heap}", - booktitle = "{Proceedings of the 2005 ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'05)}", - address = {Chigago, Illinois}, - month = {June}, - year = {2005} -} - -@inproceedings{DPJ:HotPar09, - author = {Robert Bocchino and Vikram Adve and Sarita Adve and Marc -Snir}, - title = {{Parallel programming must be deterministic by default}}, - booktitle = {First USENIX Workshop on Hot Topics in Parallelism -(HotPar)}, - year = {2009}, -} - -@inproceedings{DPJ:OOPSLA09, - author = {Bocchino,Jr., Robert L. and Adve, Vikram S. and Dig, Danny and Adve, Sarita V. and Heumann, Stephen and Komuravelli, Rakesh and Overbey, Jeffrey and Simmons, Patrick and Sung, Hyojin and Vakilian, Mohsen}, - title = {A type and effect system for deterministic parallel Java}, - booktitle = {Proceeding of the 24th ACM SIGPLAN conference on Object oriented programming systems languages and applications}, - series = {OOPSLA '09}, - year = {2009}, - isbn = {978-1-60558-766-0}, - location = {Orlando, Florida, USA}, - pages = {97--116}, - numpages = {20}, - url = {http://doi.acm.org/10.1145/1640089.1640097}, - doi = {http://doi.acm.org/10.1145/1640089.1640097}, - acmid = {1640097}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {commutativity, determinism, deterministic parallelism, effect systems, effects}, -} - -@inproceedings{DPJ:POPL11, - author = {Bocchino, Robert and Stephen Heumann and Nima Honarmand and -Sarita Adve and Vikram Adve and Adam Welc and Tatiana Shpeisman}, - title = {{Safe nondeterminism in a deterministic-by-default parallel -language}}, - booktitle = {POPL}, - year = {2011}, -} - -@inproceedings{Bocchino:ECOOP11, - author = {Bocchino, Robert L. and Adve, Vikram S.}, - title = {Types, regions, and effects for safe programming with -object-oriented parallel frameworks}, - booktitle = {Proceedings of the 25th European conference on Object-oriented -programming}, - series = {ECOOP'11}, - year = {2011}, - isbn = {978-3-642-22654-0}, - location = {Lancaster, UK}, - pages = {306--332}, - numpages = {27}, - url = {http://dl.acm.org/citation.cfm?id=2032497.2032519}, - acmid = {2032519}, - publisher = {Springer-Verlag}, - address = {Berlin, Heidelberg}, -} - - -@inproceedings{bikshandi:htas:ppopp:06, - author = {Bikshandi, Ganesh and Guo, Jia and Hoeflinger, Daniel and Almasi, Gheorghe and Fraguela, Basilio B. and Garzar\'{a}n, Mar\'{\i}a J. and Padua, David and von Praun, Christoph}, - title = {Programming for parallelism and locality with hierarchically tiled arrays}, - booktitle = {Proceedings of the eleventh ACM SIGPLAN symposium on Principles and practice of parallel programming}, - series = {PPoPP '06}, - year = {2006}, - isbn = {1-59593-189-9}, - location = {New York, New York, USA}, - pages = {48--57}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/1122971.1122981}, - doi = {http://doi.acm.org/10.1145/1122971.1122981}, - acmid = {1122981}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {data-parallel, locality enhancement, parallel programming, tiling}, -} - -@inproceedings{guo:htas:ppopp:08, - author = {Guo, Jia and Bikshandi, Ganesh and Fraguela, Basilio B. and Garzaran, Maria J. and Padua, David}, - title = {Programming with tiles}, - booktitle = {Proceedings of the 13th ACM SIGPLAN Symposium on Principles and practice of parallel programming}, - series = {PPoPP '08}, - year = {2008}, - isbn = {978-1-59593-795-7}, - location = {Salt Lake City, UT, USA}, - pages = {111--122}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/1345206.1345225}, - doi = {http://doi.acm.org/10.1145/1345206.1345225}, - acmid = {1345225}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {data-parallel, locality, parallel programming, tiling}, -} - - - -@InProceedings{KimNarayanan2011, - author = {E. P. Kim and S. P. Narayanan and N. R. Shanbhag and D. L. Jones}, - title = {Low-power and error-resilient PN code acquisition filter via statistical error compensation}, - OPTcrossref = {}, - OPTkey = {}, - OPTbooktitle = {IEEE Custom Integrated Circuits Conference}, - OPTpages = {}, - OPTyear = {2011}, - OPTeditor = {}, - OPTvolume = {}, - OPTnumber = {}, - OPTseries = {}, - OPTaddress = {}, - OPTmonth = {}, - OPTorganization = {}, - OPTpublisher = {}, - OPTnote = {}, - OPTannote = {} -} - - - -@Article{HegdeShanbhag2001, - author = {R. Hegde and N. R. Shanbhag}, - title = {Soft digital signal processing}, - journal = {IEEE Trans. VLSI Systems}, - year = {2001}, - OPTkey = {}, - OPTvolume = {9}, - OPTnumber = {6}, - OPTpages = {813--823}, - OPTmonth = {}, - OPTnote = {}, - OPTannote = {} -} - - - - -@Article{KimShanbhag2010, - author = {E. Kim and Naresh R. Shanbhag}, - title = {Soft N-modular redundancy}, - journal = {IEEE Computer}, - year = {2010}, - OPTkey = {}, - OPTvolume = {}, - OPTnumber = {}, - OPTpages = {}, - OPTmonth = {dec}, - OPTnote = {}, - OPTannote = {} -} - - - -@Article{NarayananVaratkar2010, - author = {S. Narayanan and G. Varatkar and D. L. Jones and N. R. Shanbhag}, - title = {Computation as estimation: A general framework for robustness and energy-efficiency in SoCs}, - journal = {IEEE Transactions on Signal Processing}, - year = {2010}, - OPTkey = {}, - OPTvolume = {58}, - OPTnumber = {8}, - OPTpages = {4416--4421}, - OPTmonth = {}, - OPTnote = {}, - OPTannote = {} -} - - - -@InProceedings{AbdallahShanbhag2010, - author = {R. Abdallah and N. Shanbhag}, - title = {Robust energy-efficient DSP systems via output probability processing}, - OPTcrossref = {}, - OPTkey = {}, - OPTbooktitle = {Proc. of Int. Conf. on Computer Design}, - OPTpages = {}, - OPTyear = {2010}, - OPTeditor = {}, - OPTvolume = {}, - OPTnumber = {}, - OPTseries = {}, - OPTaddress = {}, - OPTmonth = {oct}, - OPTorganization = {}, - OPTpublisher = {}, - OPTnote = {}, - OPTannote = {} -} - - -@Article{LowPowerDSPs:IEEEJSSC02, - title = {A Design Environment for High-Throughput Low-Power - Dedicated Signal Processing Systems}, - author = {W. Rhett Davis and Ning Zhang and Kevin Camera and - Dejan Markovic and Tina Smilkstein and M. Josie Ammer and - Engling Yeo and Stephanie Augsburger and Borivoje Nikolic - and Robert W. Brodersen}, - year = {2011}, - journal = {IEEE Journal of Solid-State Circuits}, - volume = {37}, - number = {3}, - pages = {420-431} -} - -@conference{MicronMemory, - author = {J. Thomas Pawlowski}, - title = {{Hybrid Memory Cube (HMC)}}, - journal = {Hot Chips 23}, - year = {2011}, - month = {August}, - howpublished = {hotchips.org/uploads/hc23/HC23.18.3-memory-FPGA/HC23.18.320-HybridCube-Pawlowski-Micron.pdf}, -} - -@INPROCEEDINGS{MaiPaaske2000, -author={Mai, K. and Paaske, T. and Jayasena, N. and Ho, R. and Dally, W.J. and Horowitz, M.}, -booktitle={Computer Architecture, 2000. Proceedings of the 27th International Symposium on}, title={Smart Memories: a modular reconfigurable architecture}, -year={2000}, -month={june}, -volume={}, -number={}, -pages={161 -171}, -keywords={Hydra speculative multiprocessor;Imagine stream processor;Smart Memories;VLSI technology scaling;conflicting requirements;modular reconfigurable architecture;performance degradation;simulations;digital simulation;multiprocessing systems;performance evaluation;reconfigurable architectures;}, -doi={}, -ISSN={1063-6897}, -} - -@inproceedings{LeverichArakida2007, - author = {Leverich, Jacob and Arakida, Hideho and Solomatnikov, Alex and Firoozshahian, Amin and Horowitz, Mark and Kozyrakis, Christos}, - title = {Comparing memory systems for chip multiprocessors}, - booktitle = {Proceedings of the 34th annual international symposium on Computer architecture}, - series = {ISCA '07}, - year = {2007}, - isbn = {978-1-59593-706-3}, - location = {San Diego, California, USA}, - pages = {358--368}, - numpages = {11}, - url = {http://doi.acm.org/10.1145/1250662.1250707}, - doi = {10.1145/1250662.1250707}, - acmid = {1250707}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {chip multiprocessors, coherent caches, locality optimizations, parallel programming, streaming memory}, -} - -@inproceedings{FiroozshahianSolomatnikov2009, - author = {Firoozshahian, Amin and Solomatnikov, Alex and Shacham, Ofer and Asgar, Zain and Richardson, Stephen and Kozyrakis, Christos and Horowitz, Mark}, - title = {A memory system design framework: creating smart memories}, - booktitle = {Proceedings of the 36th annual international symposium on Computer architecture}, - series = {ISCA '09}, - year = {2009}, - isbn = {978-1-60558-526-0}, - location = {Austin, TX, USA}, - pages = {406--417}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/1555754.1555805}, - doi = {10.1145/1555754.1555805}, - acmid = {1555805}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {cache coherence, memory access protocol, memory systems, multi-core processors, protocol controller, reconfigurable architecture, stream programming, transactional memory}, -} - -@misc{SPIRKhronosSpec, - howpublished = {http://www.khronos.org/registry/cl/specs/spir\_spec-1.0-provisional.pdf}, - author = {{Khronos Group}}, - title = {{SPIR 1.0 Specification for OpenCL}}, -} - - -@misc{CUDA, - howpublished = {http://docs.nvidia.com/cuda/}, - author = {{nVidia}}, - title = {{CUDA Toolkit Documentation v7.5}}, -} - -@article{Delite:SIGPLAN11, - author = {Rompf, Tiark and Odersky, Martin}, - title = {Lightweight modular staging: a pragmatic approach to runtime code generation and compiled DSLs}, - journal = {SIGPLAN Not.}, - issue_date = {Febuary 2011}, - volume = {46}, - number = {2}, - month = oct, - year = {2010}, - issn = {0362-1340}, - pages = {127--136}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/1942788.1868314}, - doi = {10.1145/1942788.1868314}, - acmid = {1868314}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {code generation, domain-specific languages, language virtualization, multi-stage programming}, -} - -@misc{RiverTrail:WHERE, - howpublished = {https://github.com/RiverTrail/RiverTrail/wiki}, - author = {{Intel Corp.}}, - title = {{River Trail API}}, -} - -@inproceedings{DPJNondet:POPL2011, - author = {Bocchino,Jr., Robert L. and Heumann, Stephen and Honarmand, Nima and Adve, Sarita V. and Adve, Vikram S. and Welc, Adam and Shpeisman, Tatiana}, - title = {Safe nondeterminism in a deterministic-by-default parallel language}, - booktitle = {Proceedings of the 38th annual ACM SIGPLAN-SIGACT symposium on Principles of programming languages}, - series = {POPL '11}, - year = {2011}, - isbn = {978-1-4503-0490-0}, - location = {Austin, Texas, USA}, - pages = {535--548}, - numpages = {14}, - url = {http://doi.acm.org/10.1145/1926385.1926447}, - doi = {10.1145/1926385.1926447}, - acmid = {1926447}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {atomic sections, atomicity, data race freedom, determinism, effect systems, effects, nondeterminism, parallel programming languages, strong isolation, tranasctions, transactional memory}, -} - -@mastersthesis{Komuravelli2011, - author = {Komuravelli Rakesh}, - title = {Verification and Performance of the DeNovo Cache Coherence Protocol}, - year = {2010}, - school = {University of Illinois at Urbana-Champaign}, - } - -@inproceedings{ChoiKomuravelli2010, - author = {Choi, Byn and Komuravelli, Rakesh and Lu, Victor and Sung, Hyojin and Bocchino, Robert L. and Adve, Sarita V. and Hart, John C.}, - title = {Parallel SAH k-D tree construction}, - booktitle = {Proceedings of the Conference on High Performance Graphics}, - series = {HPG '10}, - year = {2010}, - location = {Saarbrucken, Germany}, - pages = {77--86}, - numpages = {10}, - url = {http://dl.acm.org/citation.cfm?id=1921479.1921492}, - acmid = {1921492}, - publisher = {Eurographics Association}, - address = {Aire-la-Ville, Switzerland, Switzerland}, -} - -@misc{HSA, - author = {George Kyriazis}, - title = {{Heterogeneous System Architecture: A Technical Review}}, - howpublished = {http://developer.amd.com/Resources/hc/heterogeneous-systems-architecture/Asset/hsa10.pdf}, - year = {2012}, -} - -@article{MiloHillSorinCACM, - author = {Martin, Milo M. K. and Hill, Mark D. and Sorin, Daniel J.}, - title = {Why on-chip cache coherence is here to stay}, - journal = {Commun. ACM}, - issue_date = {July 2012}, - volume = {55}, - number = {7}, - month = jul, - year = {2012}, - issn = {0001-0782}, - pages = {78--89}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/2209249.2209269}, - doi = {10.1145/2209249.2209269}, - acmid = {2209269}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@article{SARC, - author={Kaxiras, S. and Keramidas, G.}, - journal={{IEEE Micro}}, - title={{SARC Coherence: Scaling Directory Cache Coherence in Performance and Power}}, - year={2010}, - month={sept.-oct.}, - volume={30}, - number={5}, - pages={54 -65}, - keywords={SARC coherence;directory cache coherence;directory coherence protocols;power scalability;shared-memory chip multiprocessors;microprocessor chips;shared memory systems;}, - doi={10.1109/MM.2010.82}, - ISSN={0272-1732}, -} - -@inproceedings{KelmJohnson2009, - author = {Kelm, John H. and Johnson, Daniel R. and Johnson, Matthew R. and Crago, Neal C. and Tuohy, William and Mahesri, Aqeel and Lumetta, Steven S. and Frank, Matthew I. and Patel, Sanjay J.}, - title = {{Rigel: An Architecture and Scalable Programming Interface for a 1000-core Accelerator}}, - booktitle = {ISCA}, - year = {2009}, - isbn = {978-1-60558-526-0}, - pages = {}, - location = {Austin, TX, USA}, - doi = {http://doi.acm.org/10.1145/1555754.1555774}, - weblink = {http://doi.acm.org/10.1145/1555754.1555774} -} - -@misc{Tilera, - title={Tilera TILEPro64 processor}, - author={Tilera}, - note={\url{http://www.tilera.com/products/processors/TILEPRO64}}, -} - -@inproceedings{MattsonRiepen2010, - author = {Mattson, Timothy G. and Riepen, Michael and Lehnig, Thomas and Brett, Paul and Haas, Werner and Kennedy, Patrick and Howard, Jason and Vangal, Sriram and Borkar, Nitin and Ruhl, Greg and Dighe, Saurabh}, - title = {{The 48-core SCC Processor: the Programmer's View}}, - booktitle = {{Proceedings of the 2010 ACM/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis}}, - series = {SC '10}, - year = {2010}, - isbn = {978-1-4244-7559-9}, - pages = {1--11}, - numpages = {11}, - url = {http://dx.doi.org/10.1109/SC.2010.53}, - doi = {10.1109/SC.2010.53}, - acmid = {1884676}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, -} - -@inproceedings{MattsonRiepen2010, - author = {Michael Bauer and Sean Treichler and Elliot Slaughter and Alex Aiken}, - title = {{Legion: Expressing Locality and Independence with Logical Regions}}, - booktitle = {{Proceedings of the 2012 ACM/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis}}, - series = {SC '12}, - year = {2012}, - pages = {1--11}, - numpages = {11}, - url = {http://dx.doi.org/10.1109/SC.2010.53}, -} - -@misc{Fermi, - author = {{nVidia}}, - title = {{NVIDIA's Next Generation CUDA Compute Architecture: Fermi}}, - howpublished = {http://www.nvidia.com/content/PDF/fermi_white_papers/NVIDIA_Fermi_Compute_Architecture_Whitepaper.pdf} -} - -@INPROCEEDINGS{Hofstee2005, - author={Hofstee, H.P.}, - booktitle={High-Performance Computer Architecture, 2005. HPCA-11. 11th International Symposium on}, - title={Power efficient processor architecture and the cell processor}, - year={2005}, - month={feb.}, - volume={}, - number={}, - pages={ 258 - 262}, - keywords={ architecture decision; cell processor; design decision; media applications; microarchitectural enhancement; microprocessor design; nonhomogeneous\ - SMP; cellular radio; microprocessor chips; power supply circuits;}, - doi={10.1109/HPCA.2005.26}, - ISSN={1530-0897}, -} - -@ARTICLE{GschwindHofstee2006, - author={Gschwind, M. and Hofstee, H.P. and Flachs, B. and Hopkin, M. and Watanabe, Y. and Yamazaki, T.}, - journal={Micro, IEEE}, - title={Synergistic Processing in Cell's Multicore Architecture}, - year={2006}, - month={march-april }, - volume={26}, - number={2}, - pages={10 -24}, - keywords={Cell Broadband Engine;Cell multicore architecture;RISC principle;SIMD processing;compiler optimization;data-parallel architecture;multithreaded ex\ -ecution environment;synergistic processor unit;thread-level parallelism;logic design;microprocessor chips;multi-threading;parallel architectures;program com\ -pilers;reduced instruction set computing;}, - doi={10.1109/MM.2006.41}, - ISSN={0272-1732}, -} - -@article{LyonsHempstead2012, - author = {Lyons, Michael J. and Hempstead, Mark and Wei, Gu-Yeon and Brooks, David}, - title = {The accelerator store: A shared memory framework for accelerator-based systems}, - journal = {ACM Trans. Archit. Code Optim.}, - issue_date = {January 2012}, - volume = {8}, - number = {4}, - month = jan, - year = {2012}, - issn = {1544-3566}, - pages = {48:1--48:22}, - articleno = {48}, - numpages = {22}, - url = {http://doi.acm.org/10.1145/2086696.2086727}, - doi = {10.1145/2086696.2086727}, - acmid = {2086727}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {Hardware acceleration, low power, memory systems, shared memory}, -} - -@article{UdayakumaranDominguez2006, - author = {Udayakumaran, Sumesh and Dominguez, Angel and Barua, Rajeev}, - title = {Dynamic allocation for scratch-pad memory using compile-time decisions}, - journal = {ACM Trans. Embed. Comput. Syst.}, - issue_date = {May 2006}, - volume = {5}, - number = {2}, - month = may, - year = {2006}, - issn = {1539-9087}, - pages = {472--511}, - numpages = {40}, - url = {http://doi.acm.org/10.1145/1151074.1151085}, - doi = {10.1145/1151074.1151085}, - acmid = {1151085}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {Memory allocation, compiler, embedded systems, scratch pad, software caching, software-managed cache}, -} - - -@misc{NDK:ABIChoicesWebPage, - author = {Google}, - title = {ABI Management for Applications Using the Android Native Development Kit}, - year = 2014, - url = {https://developer.android.com/ndk/guides/abis.html}, -} - - -@misc{VRIR:TechReport14, - author = {{R}ahul {G}arg and {L}aurie {H}endren}, - title = {Design of {VRIR}}, - year = 2014, - url = {http://www.raijincl.org/velociraptor/vrir.pdf}, -} - - -@inproceedings{Velociraptor:PACT14, - author = {Garg, Rahul and Hendren, Laurie}, - title = {Velociraptor: An Embedded Compiler Toolkit for Numerical Programs Targeting CPUs and GPUs}, - booktitle = {Proc. 23rd Int'l Conf. on Parallel Architectures and Compilation Techniques}, - series = {PACT '14}, - year = {2014}, - isbn = {978-1-4503-2809-8}, - location = {Edmonton, AB, Canada}, - pages = {317--330}, - numpages = {14}, - url = {http://doi.acm.org/10.1145/2628071.2628097}, - doi = {10.1145/2628071.2628097}, - acmid = {2628097}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {compiler framework for array-based language, gpu hybrid systems, matlab, python}, -} - -@inproceedings{Halide, - author = {Ragan-Kelley, Jonathan and Barnes, Connelly and Adams, Andrew and Paris, Sylvain and Durand, Fr{\'e}do and Amarasinghe, Saman}, - title = {Halide: A Language and Compiler for Optimizing Parallelism, Locality, and Recomputation in Image Processing Pipelines}, - booktitle = {Proceedings of the 34th ACM SIGPLAN Conference on Programming Language Design and Implementation}, - series = {PLDI '13}, - year = {2013}, - isbn = {978-1-4503-2014-6}, - location = {Seattle, Washington, USA}, - pages = {519--530}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/2491956.2462176}, - doi = {10.1145/2491956.2462176}, - acmid = {2462176}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {autotuning, compiler, domain specific language, gpu, image processing, locality, optimization, parallelism, redundant computation, vectorization}, -} - -@inproceedings{PetaBricks, - author = {Ansel, Jason and Chan, Cy and Wong, Yee Lok and Olszewski, Marek and Zhao, Qin and Edelman, Alan and Amarasinghe, Saman}, - title = {PetaBricks: A Language and Compiler for Algorithmic Choice}, - booktitle = {Proceedings of the 30th ACM SIGPLAN Conference on Programming Language Design and Implementation}, - series = {PLDI '09}, - year = {2009}, - isbn = {978-1-60558-392-1}, - location = {Dublin, Ireland}, - pages = {38--49}, - numpages = {12}, - url = {http://doi.acm.org/10.1145/1542476.1542481}, - doi = {10.1145/1542476.1542481}, - acmid = {1542481}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {adaptive, algorithmic choice, autotuning, compiler, implicitly parallel, language}, -} - -@inproceedings{ispc, - title={ispc: A SPMD compiler for high-performance CPU programming}, - author={Pharr, Matt and Mark, William R}, - booktitle={Innovative Parallel Computing (InPar), 2012}, - pages={1--13}, - year={2012}, - organization={IEEE} -} - - -@article{Delite, - author = {Sujeeth, Arvind K. and Brown, Kevin J. and Lee, Hyoukjoong and Rompf, Tiark and Chafi, Hassan and Odersky, Martin and Olukotun, Kunle}, - title = {Delite: A Compiler Architecture for Performance-Oriented Embedded Domain-Specific Languages}, - journal = {ACM Trans. Embed. Comput. Syst.}, - issue_date = {July 2014}, - volume = {13}, - number = {4s}, - month = apr, - year = {2014}, - issn = {1539-9087}, - pages = {134:1--134:25}, - articleno = {134}, - numpages = {25}, - url = {http://doi.acm.org/10.1145/2584665}, - doi = {10.1145/2584665}, - acmid = {2584665}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {Domain-specific languages, code generation, language virtualization, multistage programming}, -} - -@article{Parboil, - title={Parboil: A revised benchmark suite for scientific and commercial throughput computing}, - author={Stratton, John A and Rodrigues, Christopher and Sung, I-Jui and Obeid, Nady and Chang, Li-Wen and Anssari, Nasser and Liu, Geng Daniel and Hwu, Wen-Mei W}, - journal={Center for Reliable and High-Performance Computing}, - year={2012} -} - - -@article{Opencl, - title={OpenCL: A parallel programming standard for heterogeneous computing systems}, - author={Stone, John E and Gohara, David and Shi, Guochun}, - journal={Computing in science and engineering}, - volume={12}, - number={1-3}, - pages={66--73}, - year={2010}, - publisher={Institute of Electrical and Electronics Engineers, Inc., United States} -} - -@misc{Liwen:Personal, - author = {{L}i-wen {C}hang}, - title = {Personal Communication}, - year = 2015, - month = Aug, -} - -@techreport{OpenMPAcceleratorModel:IWOMP14, - author = {Eric Stotzer}, - title = {Tutorial: OpenMP Accelerator Model}, - year = 2014, - month = Sept, - url = {http://portais.fieb.org.br/senai/iwomp2014/presentations/tutorial_accelerator_model.pdf} -} - -@misc{Tangram, - author = {Li-wen Chang and Abdul Dakkak and Christopher I. Rodrigues and Wen-mei Hwu}, - title = {Tangram: a High-level Language for Performance Portable Code Synthesis}, - journal = {Programmability and Architectures for Heterogeneous Multicores (MULTIPROG-2015)}, - year = 2015, -} - - diff --git a/paper/optimization.bib b/paper/optimization.bib deleted file mode 100644 index 225751e801fbce4c94bf9cae4f7ef7cb7b94d93e..0000000000000000000000000000000000000000 --- a/paper/optimization.bib +++ /dev/null @@ -1,3680 +0,0 @@ -%%===----------------------------------------------------------------------===%% -%% -%% LLVM Research Group Bibtex File -%% -%%===----------------------------------------------------------------------===%% -%% -%% When adding an entry to this file, please keep the following in mind: -%% -%% -- Please make all entries consistent :-) -%% -- Include the authors names spelled out, first and last with an optional -%% middle initial. e.g. Chris A. Lattner or Chris Lattner, but NOT C. -%% Lattner. -%% -- Please use the abbreviations at the top of the file for publications, -%% e.g. booktitle = PLDI, instead of booktitle = "Proceedings of -%% ...". This makes it much easier to shorten the citations when trying -%% to cram a paper in (make a local change to change the substitution -%% strings at the top of the file). It also keeps all of the cites for a -%% conference identical. -%% -- Please spell things right! :) -%% -- Include city and date in all InProceedings entries. -%% -- Also, please use a consistent naming scheme for cite keys, where -%% possible. If 1-3 authors: Lastname1Lastname2Lastname3:WHEREYY -%% e.g., LattnerAdve:MSP05. If 4 or more authors: InitialsList:WHEREYY -%% with the initials of the last names of the first 4 authors, e.g, -%% DKAL:TECS05. For more weird cases, there's not much hope but most -%% cases work as above. -%% -- Please make all entries consistent! :-) -%% -- Did I say, please make all entries consistent? -%% -%% When the final version of a paper is done, please *COPY* the current -%% version of this file to the paper directory and check it into CVS for -%% that paper. -%% -%% TO-DO: -%% -- Replace full conference names with abbrevs -%% -- Make all entries consistent -%% -- Replace ...WHEREYY with ...WHERE:YYYY in all cite keys -%% -%%===----------------------------------------------------------------------===%% - -%%% -%%% Substitutions: Replace these with shorter string to shrinkify refs. -%%% - - -%% Journals - -@string{CACM = "Communications of the ACM"} -@string{JACM = "Journal of the ACM"} -@string{SPE = "Software--Practice and Experience"} - -%% Conferences - -@string{PLDI = "ACM SIGPLAN Conference on Programming Language Design and Implementation"} -@string{ISSTA = "Proc. ACM SIGSOFT Int'l Symp. on Software Testing and Analysis"} -@string{CASES = "Proc. Int'l Conf. on Compilers, Architecture and Synthesis for Embedded Systems (CASES)"} -@string{SAS = "Proc. Int'l Symp. on Static Analysis (SAS)"} -@string{ISCA = "Proc. Int'l Conf. on Computer Architecture (ISCA)"} -@string{CC = "Proc. Int'l Conf. on Compiler Construction (CC)"} -@string{PACT = "Proc. Int'l Conf. on Parallel Architectures and Compilation Techniques (PACT)"} -@string{ISMM = "Proc. Int'l Symp. On Memory Management (ISMM)"} -@string{ESEC = "Proc. European Software Engineering Conf. (ESEC)"} -@string{SIGMOD = "Proc. ACM SIGMOD Int'l Conf. on Management of Data (SIGMOD)"} -@string{ICS = "Proc. Int'l Conf. on Supercomputing (ICS)"} -@string{TECS = "ACM Transactions in Embedded Computing Systems (TECS)"} - -%% Workshops - -@string{LCPC = "Proc. Int'l Workshop on Languages and Compilers for - Parallel Computing (LCPC)"}, -@string{MSP = "Proc. ACM Workshop on Memory System Performance"} -%@string{MSP = "MSP"} -@string{PASTE = "Proc. ACM SIGPLAN-SIGSOFT Workshop on Program Analysis for Software Tools and Engineering (PASTE)"} -@string{IWMM = "Proc. Int'l Workshop on Memory Management"} - -@string{WCRE = "Proc. Working Conf. on Reverse Engineering (WCRE)"} -@string{FMPC = "Proc. Symp. on the Frontiers of Massively Parallel Computation"} -@string{ICCD = "Proc. Int'l Conf. on Computer Design (CDES)"} -@string{CC = "Proc. Int'l Conf. on Compiler Construction (CC)"} -@string{IPDPS = "Proc. Int'l Parallel and Distributed Processing Symp."} -@string{SC = "Proc. Int'l Conf. on High Performance Computing, Networking, Storage And Analysis"} -@string{CGO = "Proc. Conf. on Code Generation and Optimization"} - -%%============================================================================ -%% CATEGORY: Current list of categories: -%%============================================================================ -%% COMPILERS: LANGUAGE SPECS -%% COMPILERS: SCALAR: SYSTEMS, -%% COMPILERS: SCALAR: SAFETY CHECKING -%% COMPILERS: SCALAR: SOFTWARE RELIABILITY -%% COMPILERS: SCALAR: POINTER, HEAP, AND SHAPE ANALYSIS -%% COMPILERS: SCALAR: DATAFLOW OPTIMIZATION -%% COMPILERS: SCALAR: MEMORY HIERARCHY OPTIMIZATIONS -%% COMPILERS: SCALAR: INTERPROCEDURAL OPTIMIZATION -%% COMPILERS: SCALAR: INSTRUCTION SELECTION -%% COMPILERS: SCALAR: INSTRUCTION SCHEDULING -%% COMPILERS: SCALAR: REGISTER ALLOCATION -%% COMPILERS: SCALAR: RUNTIME COMPILATION AND OPTIMIZATION -%% COMPILERS: PARALLEL: PARALLEL PROGRAMMING LANGUAGES -%% COMPILERS: VECTOR: SUBWORD SIMD -%% ARCHITECTURES: SCALAR: CHIP PARALLELISM -%% ARCHITECTURES: STREAMING AND VECTOR -%%============================================================================ - - -%%============================================================================ -%% COMPILERS: SCALAR: SAFETY CHECKING -%%============================================================================ - - -@InProceedings{KDA:CASES02, - author = {Sumant Kowshik and Dinakar Dhurjati and Vikram Adve}, - title = {Ensuring Code Safety Without Runtime Checks for Real-time Control Systems}, - booktitle = CASES, - year = 2002, - address = {Grenoble}, - month = {Oct}, - annote = {COMPILERS: SCALAR: SAFETY CHECKING} -} - -@InProceedings{DKAL:LCTES03, - Author = {Dinakar Dhurjati and Sumant Kowshik and Vikram Adve and - Chris Lattner}, - Title = {Memory Safety Without Runtime Checks or Garbage Collection}, - booktitle = LCTES, - year = {2003}, - month = {June}, - annote = {COMPILERS: SCALAR: SAFETY CHECKING} -} -% address = {San Diego}, - -@Article{DKAL:TECS05, - author = {Dinakar Dhurjati and Sumant Kowshik and Vikram Adve and - Chris Lattner}, - title = {Memory Safety Without Garbage Collection for Embedded Applications}, - Journal = TECS, - month = Feb, - year = {2005}, - annote = {COMPILERS: SCALAR: SAFETY CHECKING} -} -%% volume = "4", -%% number = "1", -%% pages = "73--111", -% {\rm Special issue on the best papers of the} -% {\it 2003 Conf. on Languages, Compilers and Tools for Embedded Systems (LCTES)}}, - - -@Misc{DKA:OSDISubmission04, - Author = {Dinakar Dhurjati and Sumant Kowshik and Vikram Adve}, - Title = {SAFECode: Safe Execution Without Garbage Collection for - System Software}, - Note = {(Submitted for publication)}, - year = {2004}, - month = {May}, - annote = {COMPILERS: SCALAR: SAFETY CHECKING} -} - -@Misc{DKA:Submitted03, - Author = {Dinakar Dhurjati and Sumant Kowshik and Vikram Adve}, - Title = {SAFECode: Safe Execution Without Garbage Collection for - Type-Safe and Low-level Code}, - Note = {(Submitted for publication)}, - year = {2003}, - month = {Nov}, - annote = {COMPILERS: SCALAR: SAFETY CHECKING} -} - -@inproceedings{Omniware:PLDI96, - author = {Ali-Reza Adl-Tabatabai and Geoff Langdale and Steven Lucco and Robert Wahbe}, - title = {Efficient and language-independent mobile programs}, - booktitle = PLDI, - year = {1996}, - isbn = {0-89791-795-2}, - pages = {127--136}, - location = {Philadelphia, Pennsylvania, United States}, - doi = {http://doi.acm.org/10.1145/231379.231402}, -} - -@inproceedings{DISC:FCCM95, - author = "M. Wirthlin and B. Hutchings", - title = "A Dynamic Instruction Set Computer", - booktitle = "{IEEE} Symp. on {FPGA}s for Custom Computing Machines", - publisher = "{IEEE} Computer Society Press", - address = "Los Alamitos, CA", - editor = "Peter Athanas and Kenneth L. Pocek", - pages = "99--107", - year = "1995", - url = "citeseer.nj.nec.com/wirthlin95dynamic.html" -} - -@inproceedings{DARK:WCAE00, - author = {Ola {\AA}gren}, - title = {Virtual Machines as an Aid in Teaching Computer Concepts}, - booktitle = {Workshop on Computer Architecture Education}, - year = {2000}, - month = {jun}, -} - -@InProceedings{VISC:PACT02, - author = "Jack Liu and Tim Kong and Fred Chow", - title = "Effective Compilation Support for - Variable Instruction Set Architecture", - booktitle = PACT, - location = "Charlottesville, USA", - month = Sept, - year = 2002, -} - -@Book{Gosling:JavaSpec, - Author = {James Gosling and Bill Joy and Guy Steele and Gilad Bracha}, - Title = {The {J}ava {L}anguage {S}pecification}, - Publisher = {Sun Microsystems}, - Edition = {2nd}, - Year = {2000}} - -@InProceedings{Sha:Simplex1, - author = "Lui Sha", - title = "Dependable System Upgrades", - booktitle = "Proceedings of IEEE Real Time System Symp.", - year = 1998 -} - -@article{Sha:Simplex, - author = "Lui Sha", - title = "Using Simplicity to Control Complexity", - journal = "IEEE Software", - month = "July/August", - year = 2001 -} - -@article{UNCOL, - author = "T.B. Steel", - title = "UNCOL: The Myth and the Fact", - journal = "Annual Review in Automated Programming 2", - year = "1961" -} - -@misc{ANDF, - author = "{{ANDF Consortium}}", - title = "{The Architectural Neutral Distribution Format}", - HowPublished = "{\tt http://www.andf.org/}" -} - -@misc{ManagedC++, - author = "{{Microsoft Corp.}}", - title = {Managed Extensions for {C++} Specification}, - HowPublished = "{.NET Framework Compiler and Language Reference}" -} - - -@Article{Chase94:Exceptions, - author = "David Chase", - title = "Implementation of exception handling", - journal = "The Journal of {C} Language Translation", - volume = "5", - number = "4", - pages = "229--240", - month = jun, - year = "1994", - ISSN = "1042-5721", - bibdate = "Fri Nov 21 14:40:20 1997", - remark = "The details of handling exceptions in C++ and similar - languages.", -} - -@inproceedings{Shao98:TypedIL, - author = "Zhong Shao and Christopher League and Stefan Monnier", - title = "{Implementing Typed Intermediate Languages}", - booktitle = ICFP, - pages = "313-323", - year = "1998", - url = "citeseer.nj.nec.com/shao98implementing.html" } - - -%%============================================================================ -%% COMPILERS: SCALAR: SOFTWARE RELIABILITY -%%============================================================================ - - -@InProceedings{ESP:PLDI02, - author = {Manuvir Das and Sorin Lerner and Mark Siegle}, - title = {ESP: Path-Sensitive Program Verification in Polynomial Time}, - booktitle = PLDI, - address = {Berlin, Germany}, - month = {June}, - year = {2002} -} - -@InProceedings{Metal:PLDI02, - author = {Seth Hallem and Benjamin Chelf and Yichen Xie and - Dawson Engler}, - title = {A System and Language for Building System-Specific, - Static Analyses}, - booktitle = PLDI, - address = {Berlin, Germany}, - month = {June}, - year = {2002} -} - -%%============================================================================ - -@misc{LLVMOnlineRef, - author = {Chris Lattner and Vikram Adve}, - title = {{LLVM Language Reference Manual}}, - note = "{http://llvm.org/docs/LangRef.html}" -} - -@misc{LLVMCommandGuide, - title = {{LLVM Command Guide}}, - note = "http://llvm.org/docs/CommandGuide/index.html", - url = "http://llvm.org/docs/CommandGuide/index.html" -} - -@misc{LLVMLinkTimeOpt, - title = {{LLVM Link Time Optimization: Design and Implementation}}, - note = "http://llvm.org/docs/LinkTimeOptimization.html", - url = "http://llvm.org/docs/LinkTimeOptimization.html" -} - -@misc{LLVMPassManager, - author = {Chris Lattner and Jim Laskey}, - title = {{Writing an LLVM Pass}}, - note = "http://llvm.org/docs/WritingAnLLVMPass.html", - url = "http://llvm.org/docs/WritingAnLLVMPass.html" -} - -@misc{LLVMGC, - author = {Chris Lattner}, - title = {Accurate Garbage Collection with {LLVM}}, - note = "http://llvm.org/docs/GarbageCollection.html", - url = "http://llvm.org/docs/GarbageCollection.html" -} - -@misc{Clang, - title = {clang: a {C} language family frontend for {LLVM}}, - note = "http://clang.llvm.org", -} - -@misc{DragonEgg, - title = {DragonEgg - Using {LLVM} as a {GCC} backend}, - note = "http://dragonegg.llvm.org", -} - -@misc{Lattner:llvm:AliasAnalysis, - author = {Chris Lattner}, - title = {{LLVM Alias Analysis Infrastructure}}, - note = "{\tt http://llvm.cs.uiuc.edu/docs/AliasAnalysis.html}" -} - -@MastersThesis{Lattner:MSThesis02, - author = {Chris Lattner}, - title = {{LLVM}: An Infrastructure for Multi-Stage Optimization}, - school = {Computer Science Dept., University of Illinois at Urbana-Champaign}, - year = {2002}, - address = {Urbana, IL}, - month = {Dec}, - note = {{\em See {\tt http://llvm.cs.uiuc.edu}.}} -} - -@PhdThesis{Lattner:PHD, - author = {Chris Lattner}, - title = {{Macroscopic Data Structure Analysis and Optimization}}, - school = {Comp. Sci. Dept., Univ. of Illinois}, - year = {2005}, - address = {Urbana, IL}, - month = {May}, -} -%% note = {{\em See {\tt http://llvm.cs.uiuc.edu}.}} - -@Misc{Hidden:Lattner:PHD, - author = {Anonymous}, - title = {{Omitted for blind review}}, - year = {2005}, - month = {May}, -} -%% note = {{\em See {\tt http://llvm.cs.uiuc.edu}.}} - -%% Shukla:MSThesis03i -@MastersThesis{Shukla:MSThesis03, - author = {Anand Shukla}, - title = {Lightweight, Cross-procedure Tracing for Runtime Optimization}, - school = {Computer Science Deptartment, University of Illinois at Urbana-Champaign}, - year = {2003}, - address = {Urbana, IL}, - month = {Aug} -} - -@TechReport{berndl:cgo03, - Author = {Marc Berndl and Laurie Hendren}, - Title = {Dynamic Profiling and Trace Cache Generation for a -Java Virtual Machine}, - Institution = {McGill University, Canada}, - Type = {Tech. Report}, - Month = {Oct}, - Year = {2002} -} - -%%% -%%% NOTE: New papers should not cite this paper. Instead, they should cite -%%% "Lattner:MSThesis02" -%%% -@TechReport{LattnerAdve:LLVMTR, - Author = {Chris Lattner and Vikram Adve}, - Title = {The {LLVM} {I}nstruction {S}et and {C}ompilation {S}trategy}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2002-2292}, - Type = {Tech. Report}, - Month = {Aug}, - Year = {2002}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/LLVMCompilationStrategy.pdf}, - Annote = { CATEGORY: COMPILERS: SCALAR: SYSTEMS } -} - -@TechReport{LLVM:LifeLongOptTR03, - Author = {Chris Lattner and Vikram Adve}, - Title = {{LLVM}: {A} {C}ompilation {F}ramework for {L}ifelong {P}rogram {A}nalysis and {T}ransformation}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2003-2380}, - Type = {Tech. Report}, - Month = {Sept}, - Year = {2003}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/2003-10-01-LLVMLifelongOpt.html}, - Annote = { CATEGORY: COMPILERS: SCALAR: SYSTEMS } -} - -@InProceedings{LLVM:CGO04, - Author = {Chris Lattner and Vikram Adve}, - Title = {{LLVM}: A Compilation Framework for Lifelong Program Analysis and Transformation}, - Booktitle = CGO, - Address = {San Jose, CA, USA}, - Month = {Mar}, - Year = {2004}, - pages = {75--88}, -} - -@InProceedings{LLVM:GCCSummit03, - Author = {Chris Lattner and Vikram Adve}, - Title = {{A}rchitecture for a {N}ext-{G}eneration {GCC}}, - Booktitle = {Proc. First Annual GCC Developers' Summit}, - Address = {Ottawa, Canada}, - Month = {May}, - Year = {2003}, -} - -@InProceedings{LattnerAdve:MSP02, - Author = {Chris Lattner and Vikram Adve}, - Title = {{A}utomatic {P}ool {A}llocation for {D}isjoint {D}ata {S}tructures}, - Booktitle = MSP, - Address = {Berlin, Germany}, - Month = {June}, - Year = {2002}, - URL = {{\tt http://llvm.cs.uiuc.edu/pubs/MSP2002-FinalSubmission.pdf}}, -} - -@InProceedings{LattnerAdve:PtrComp:MSP05, - Author = {Chris Lattner and Vikram Adve}, - Title = {{T}ransparent {P}ointer {C}ompression for {L}inked {D}ata {S}tructures}, - Booktitle = MSP, - Address = {Chicago, IL}, - Month = {June}, - Year = {2005}, -} - -@TechReport{LattnerAdve:DSGTR03, - Author = {Chris Lattner and Vikram Adve}, - Title = {{D}ata {S}tructure {A}nalysis: {A} {F}ast and {S}calable {C}ontext-{S}ensitive {H}eap {A}nalysis}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2003-2340}, - Type = {Tech. Report}, - Month = {Apr}, - Year = {2003}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/2003-04-29-DataStructureAnalysisTR.html} -} - -@TechReport{LattnerAdve:PoolAllocTR04, - Author = {Chris Lattner and Vikram Adve}, - Title = {Automatic Pool Allocation: Compile-Time Control of - Data Structure Layout in the Heap}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2004-2465}, - Type = {Tech. Report}, - Month = {July}, - Year = {2004} -} -% URL = {\tt http://llvm.cs.uiuc.edu/pubs/}, -% URL = {\tt http://llvm.cs.uiuc.edu/pubs/2003-04-29-DataStructureAnalysisTR.html}, - -@InProceedings{DSA:PLDI07, - author = {Chris Lattner and Andrew D. Lenharth and Vikram S. Adve}, - title = {Making Context-sensitive Points-to Analysis with Heap Cloning - Practical For The Real World}, - booktitle = PLDI, - address = {San Diego, CA, USA}, - month = {June}, - year = {2007}, - pages = {278--289}, -} - -@TechReport{DSAEvaluation:TR05, - Author = {Patrick Meredith and Balpreet Pankaj and Swarup Sahoo and - Chris Lattner and Vikram Adve}, - Title = "How Successful Is Data Structure Analysis in Isolating and -Analyzing Linked Data Structures?", - Institution= {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2005-2658}, - Type = {Tech. Report}, - Month = {Nov}, - Year = {2005} -} - -@Misc{Hidden:DSAEvaluation:TR05, - author = {Anonymous}, - title = {{Omitted for blind review}}, - year = {2005}, - month = {Nov}, -} - -@InProceedings{LA:PLDI05, - author = {Chris Lattner and Vikram Adve}, - title = {Automatic Pool Allocation: Improving Performance by Controlling Data Structure Layout in the Heap}, - booktitle = PLDI, - address = {Chicago, IL}, - month = {June}, - year = {2005} -} - -@Misc{Hidden:LA:PLDI05, - author = {Anonymous}, - title = {{Omitted for blind review}}, - year = {2005}, - month = {June}, -} - -@inproceedings{CLA:PLDI01, - author = {Heintze, Nevin and Tardieu, Olivier}, - title = {Ultra-fast aliasing analysis using CLA: a million lines of C code in a second}, - booktitle = {Proceedings of the ACM SIGPLAN 2001 conference on Programming language design and implementation}, - series = {PLDI '01}, - year = {2001}, - isbn = {1-58113-414-2}, - location = {Snowbird, Utah, United States}, - pages = {254--263}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/378795.378855}, - doi = {http://doi.acm.org/10.1145/378795.378855}, - acmid = {378855}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - - -@inproceedings{ConnectivityGC:OOPSLA03, - author = {Martin Hirzel and Amer Diwan and Matthew Hertz}, - title = {Connectivity-based Garbage Collection}, - booktitle = OOPSLA, - year = {2003}, - pages = {359--373}, - location = {Anaheim, California, USA}, - } - -@inproceedings{GCLocality:OOPSLA04, - author = {Xianglong Huang and Stephen Blackburn and Kathryn McKinley and Eliot Moss and Zhenlin Wang and Perry Cheng}, - title = {The garbage collection advantage: improving program locality}, - booktitle = OOPSLA, - year = {2004}, - isbn = {1-58113-831-9}, - pages = {69--80}, - location = {Vancouver, BC, Canada} - } - -@inproceedings{BergerZornMcKinley:OOPSLA2002, - author = {Emery D. Berger and Benjamin G. Zorn and Kathryn S. McKinley}, - title = {Reconsidering custom memory allocation}, - booktitle = OOPSLA, - year = {2002}, - isbn = {1-58113-471-1}, - pages = {1--12}, - location = {Seattle, Washington, USA}, - doi = {http://doi.acm.org/10.1145/582419.582421}, -} - -@inproceedings{Takagi:FieldArrayComp:ISHPC03, - author = {Masamichi Takagi and Kei Hiraki}, - title = {Field Array Compression in Data Caches for Dynamically Allocated - Recursive Data Structure.}, - booktitle = {Proceedings of 5th International Symp. on High Performance Computing (ISHPC'03)}, - location = {Tokyo-Odaiba, Japan}, - month = {October}, - year = {2003}, - pages = {127-145}, -} - - -@InProceedings{JavaPtrComp:CGO04, - Author = {Ali-Reza Adl-Tabatabai and Jay Bharadwaj and Michal Cierniak and Marsha Eng and Jesse Fang and Brian T. Lewis and Brian R. Murphy and James M. Stichnoth}, - Title = {Improving 64-bit {Java} {IPF} performance by compressing heap references}, - Booktitle = CGO, - Pages = "100--110", - month = {March}, - year = "2004" -} - -@inproceedings{Mogul:USENIX95:PointerSizes, - author = "Jeffrey C. Mogul and Joel F. Bartlett and Robert N. Mayo and Amitabh Srivastava", - title = "Performance Implications of Multiple Pointer Sizes", - booktitle = "{USENIX} Winter", - pages = "187-200", - year = "1995", - url = "citeseer.ist.psu.edu/mogul89performance.html" } - -@inproceedings{Ghiya:PointerAnalysisToWork:POPL98, - author = {Rakesh Ghiya and Laurie J. Hendren}, - title = {Putting pointer analysis to work}, - booktitle = "POPL", - year = {1998}, - isbn = {0-89791-979-3}, - pages = {121--133}, - location = {San Diego, California, United States}, - doi = {http://doi.acm.org/10.1145/268946.268957}, - address = {New York, NY, USA}, - } - - -@inproceedings{Yarvin:USENIX93, - author = "Curtis Yarvin and Richard Bukowski and Thomas Anderson", - title = "Anonymous {RPC}: Low-Latency Protection in a 64-Bit Address Space", - booktitle = "{USENIX} Summer", - pages = "175-186", - year = "1993", - url = "citeseer.ist.psu.edu/131881.html" } - -@article{Heiser:Mungi:SPE98, - author = "Gernot Heiser and Kevin Elphinstone and Jerry Vochteloo and Stephen Russell and Jochen Liedtke", - title = "The {Mungi} Single-Address-Space Operating System", - journal = SPE, - volume = "28", - number = "9", - pages = "901--928", - year = "1998", - url = "citeseer.ist.psu.edu/heiser98mungi.html" } - -@inproceedings{HindPioli:ISSTA00, - author = {Michael Hind}, - title = "{Which Pointer analysis Should I Use?}", - booktitle = ISSTA, - year = {2000}, - mon = {Aug}} - -@inproceedings{Hind:PASTE01, - author = {Michael Hind}, - title = {Pointer Analysis: Haven't we solved this problem yet?}, - booktitle = "PASTE", - year = {2001}, - location = {Snowbird, United States}, - isbn = {1-58113-413-4}, - doi = {http://doi.acm.org/10.1145/379605.379665}, -} -; pages = {54--61}, - -@inproceedings{BurkCarini:IWLCPC95, - author = {Michael G. Burke and Paul R. Carini and Jong-Deok Choi and Michael Hind}, - title = {Flow-Insensitive Interprocedural Alias Analysis in the Presence of Pointers}, - booktitle = LCPC, - year = {1995}, - isbn = {3-540-58868-X}, - pages = {234--250}, - publisher = {Springer-Verlag}, - } - -@inproceedings{Fanhdrich:OnlineCycleElim:PLDI98, - author = {Manuel F\"{a}hndrich and Jeffrey S. Foster and Zhendong Su and Alexander Aiken}, - title = {Partial online cycle elimination in inclusion constraint graphs}, - booktitle = PLDI, - year = {1998}, - isbn = {0-89791-987-4}, - pages = {85--96}, - location = {Montreal, Quebec, Canada}, - doi = {http://doi.acm.org/10.1145/277650.277667}, -} - -@inproceedings{Rountev:OfflineVarSub:PLDI00, - author = {Atanas Rountev and Satish Chandra}, - title = {Off-line variable substitution for scaling points-to analysis}, - booktitle = PLDI, - year = {2000}, - isbn = {1-58113-199-2}, - pages = {47--56}, - location = {Vancouver, British Columbia, Canada}, - doi = {http://doi.acm.org/10.1145/349299.349310}, -} - -@inproceedings{Pearce:CycleDet:SCAM03, - author = {David J. Pearce and Paul H. J. Kelly and Chris Hankin}, - title = {Online Cycle Detection and Difference Propagation for Pointer Analysis}, - booktitle = {Proc. Int'l IEEE Workshop on Source Code Analysis and Manipulation (SCAM)}, - year = {2003}, - location = {Amsterdam}, -} - - -@inproceedings{PearceKellyHankin:PASTE04, - author = {David J. Pearce and Paul H. J. Kelly and Chris Hankin}, - title = {Efficient field-sensitive pointer analysis for C}, - booktitle = PASTE, - year = {2004}, - isbn = {1-58113-910-1}, - pages = {37--42}, - location = {Washington DC, USA}, - doi = {http://doi.acm.org/10.1145/996821.996835}, - } - -@inproceedings{WhaleyLam:PLDI04, - author = {John Whaley and Monica S. Lam}, - title = {Cloning-based Context-Sensitive Pointer Alias Analysis using Binary Decision Diagrams}, - booktitle = PLDI, - year = {2004}, - isbn = {1-58113-807-5}, - location = {Washington DC, USA}, - doi = {http://doi.acm.org/10.1145/996841.996859}, -} -; pages = {131--144}, - -@inproceedings{Foster:ContextSensPrecision:SAS00, - author = {Jeffrey S. Foster and Manuel F\"{a}hndrich and Alexander Aiken}, - title = {Polymorphic versus Monomorphic Flow-Insensitive Points-to Analysis for C}, - booktitle = SAS, - year = {2000}, - isbn = {3-540-67668-6}, - address = {London, UK}, -} -; pages = {175--198}, - -@inproceedings{CAS:SAS09, - author = {Niedzielski, David and Ronne, Jeffery and Gampe, Andreas and Psarris, Kleanthis}, - title = {A Verifiable, Control Flow Aware Constraint Analyzer for Bounds Check Elimination}, - booktitle = SAS, - year = {2009}, - isbn = {978-3-642-03236-3}, - address = {Berlin, Heidelberg}, -} - -@inproceedings{Nystrom:Cloning:PASTE04, - author = {Erik M. Nystrom and Hong-Seok Kim and Wen-mei W. Hwu}, - title = {Importance of heap specialization in pointer analysis}, - booktitle = PASTE, - year = {2004}, - isbn = {1-58113-910-1}, - pages = {43--48}, - location = {Washington DC, USA}, - doi = {http://doi.acm.org/10.1145/996821.996836}, - address = {New York, NY, USA}, - } - -@inproceedings{Nystrom:SubsetBasedCS:SAS04, - author = {Erik M. Nystrom and Hong-Seok Kim and Wen-mei W. Hwu}, - title = {Bottom-up and Top-down Context-Sensitive Summary-based Pointer Analysis}, - booktitle = "SAS 2004", - year = {2004}, - } - - - -@inproceedings{ChoiBurkCarini:POPL93, - author = {Jong-Deok Choi and Michael Burke and Paul Carini}, - title = {Efficient flow-sensitive interprocedural computation of pointer-induced aliases and side effects}, - booktitle = POPL, - year = {1993}, - isbn = {0-89791-560-7}, - pages = {232--245}, - location = {Charleston, South Carolina, United States}, - doi = {http://doi.acm.org/10.1145/158511.158639}, - } - -@inproceedings{Golf:SAS01, - author = {Manuvir Das and Ben Liblit and Manuel F\"{a}hndrich and Jakob Rehof}, - title = {Estimating the Impact of Scalable Pointer Analysis on Optimization}, - booktitle = "SAS", - year = {2001}, - isbn = {3-540-42314-1}, - } -; pages = {260--278}, -; publisher = {Springer-Verlag}, - -@InProceedings{RothSohi:ISCA99, - author = "Amir Roth and Gurindar S. Sohi", - title = "Effective jump-pointer prefetching for linked data structures", - booktitle = ISCA, - pages = "111-121", - month = May, - year = 1999 -} - - -@InProceedings{Tullsen:ISCA95, - author = "Dean M. Tullsen and Susan Eggers and Henry M. Levy", - title = "Simultaneous Multithreading: Maximizing On-Chip Parallelism", - booktitle = ISCA, - year = "1995", - url = "citeseer.nj.nec.com/32969.html" } - - -@InProceedings{Tullsen:ISCA96, - author = "Dean M. Tullsen and Susan J. Eggers and Joel S. Emer and Henry M. Levy and Jack L. Lo and Rebecca L. Stamm", - title = "Exploiting Choice: Instruction Fetch and Issue on an Implementable Simultaneous Multithreading Processor", - booktitle = ISCA, - pages = "191-202", - month = May, - year = "1996", - url = "citeseer.nj.nec.com/tullsen96exploiting.html" } - - -@Article{RinardDiniz:TOPLAS97, - author = {Martin C. Rinard and Pedro C. Diniz}, - title= {Commutativity analysis: a new analysis technique for parallelizing compilers}, - journal = TOPLAS, - volume = {19}, - number = {6}, - year = {1997}, - issn = {0164-0925}, - pages = {942--991}, - doi = {http://doi.acm.org/10.1145/267959.269969}, - } - - -@InProceedings{Chin:RegionInference:PLDI04, - author = {Wei-Ngan Chin and Florin Craciun and Shengchao Qin and - Martin Rinard }, - title = {Region Inference for an Object-Oriented Language}, - booktitle = PLDI, - year = {2004}, - address = {Washington, DC}, - month = {June}, -} - - -@InProceedings{WilsonLamMoher:PLDI91, - author = {Paul R. Wilson and Michael S. Lam and Thomas G. Moher}, - title= {Effective "static-graph" reorganization to improve locality in garbage-collected systems}, - booktitle = PLDI, - year = {1991}, - isbn = {0-89791-428-7}, - pages = {177--191}, - location = {Toronto, Ontario, Canada}, - doi = {http://doi.acm.org/10.1145/113445.113461}, - } - -@Article{Courts:CACM88, - author = {Robert Courts}, - title= {Improving locality of reference in a garbage-collecting memory management system}, - journal = CACM, - volume = {31}, - number = {9}, - year = {1988}, - issn = {0001-0782}, - pages = {1128--1138}, - doi = {http://doi.acm.org/10.1145/48529.48536}, - } - -@inproceedings{Chilimbi:PLDI99:CacheConscious, - author = {Trishul M. Chilimbi and Bob Davidson and James R. Larus}, - title = {Cache-conscious structure definition}, - booktitle = PLDI, - year = {1999}, - isbn = {1-58113-094-5}, - pages = {13--24}, - location = {Atlanta, Georgia, United States}, - doi = {http://doi.acm.org/10.1145/301618.301635}, - } - -@inproceedings{Chilimbi:PLDI99:StructureReorg, - author = {Trishul M. Chilimbi and Mark D. Hill and James R. Larus}, - title = {Cache-conscious structure layout}, - booktitle = PLDI, - year = {1999}, - isbn = {1-58113-094-5}, - pages = {1--12}, - location = {Atlanta, Georgia, United States}, - doi = {http://doi.acm.org/10.1145/301618.301633}, - } - -@InProceedings{TruongEtAl:PACT98, - author = "Dan N. Truong and Fran\c{c}ois Bodin and Andr\'e Seznec", - title = "Improving Cache Behavior of Dynamically Allocated Data Structures", - pages = "322--329", - booktitle = PACT, - location = "Paris, France", - month = Oct, - year = 1998, - url = "citeseer.nj.nec.com/truong98improving.html" } - -@article{RabbahPalem:TECS03, - author = {Rodric M. Rabbah and Krishna V. Palem}, - title = {Data remapping for design space optimization of embedded memory systems}, - journal = TECS, - volume = {2}, - number = {2}, - year = {2003}, - pages = {186--218}, - address = {New York, NY, USA}, - } - -@InProceedings{ZhangGupta:ICCC02, - author = "Youtao Zhang and Rajiv Gupta", - title = "Data Compression Transformations for Dynamically Allocated Data Structures", - booktitle = CC, - location = "Grenoble, France", - month = "Apr", - year = "2002", - url = "citeseer.nj.nec.com/zhang02data.html" } - - -@inproceedings{ValueCompression:MICRO00, - author = {Jun Yang and Youtao Zhang and Rajiv Gupta}, - title = {Frequent value compression in data caches}, - booktitle = {MICRO 33: Proceedings of the 33rd annual ACM/IEEE international symposium on Microarchitecture}, - year = {2000}, - isbn = {1-58113-196-8}, - pages = {258--265}, - location = {Monterey, California, United States}, - doi = {http://doi.acm.org/10.1145/360128.360154}, - publisher = {ACM Press}, - address = {New York, NY, USA}, -} - -@InProceedings{CompressedMemoryInterfaces:MemoryWall00, - author = {C.D. Benveniste and P.A. Franaszek and and J.T. Robinson}, - title = {Cache-Memory Interfaces in Compressed Memory Systems}, - booktitle = {Workshop on Solving the Memory Wall Problem}, - year = {2000}, - month = June, -} - -@InProceedings{OSHwCompression:MemoryWall00, - author = {B. Abali and H. Franke}, - title = {Operating System Support for Fast Hardware Compression of Main Memory Contents}, - booktitle = {Workshop on Solving the Memory Wall Problem}, - year = {2000}, - month = June, -} - -@inproceedings{CompressedMemory:ICCD99, - author = { J-S. Lee and W-K. Hong and S-D. Kim}, - title = {Design and Evaluation of a Selective Compressed Memory System}, - booktitle = {ICCD '99: Proceedings of the 1999 IEEE International Conference on Computer Design}, - year = {1999}, - isbn = {0-7695-0406-X}, - pages = {184}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, - } - -@phdthesis{PhD:Larin:2000, - author = {Sergei Yuri Larin}, - note = {Chair-Thomas Conte}, - title = {Exploiting program redundancy to improve performance, cost and power consumption in embedded systems}, - year = {2000}, - isbn = {0-493-46648-7}, - } - - -@InProceedings{AnanianRinard:LCTES03, - Author = {C. Scott Ananian and Martin Rinard}, - Title = {Data {S}ize {O}ptimizations for {J}ava {P}rograms}, - booktitle = LCTES, - address = {San Diego, CA}, - year = {2003}, - month = {June} -} - -@Article{BurkeTorczon:TOPLAS93, - author = {Michael Burke and Linda Torczon}, - title= {Interprocedural optimization: eliminating unnecessary recompilation}, - journal = TOPLAS, - volume = {15}, - number = {3}, - year = {1993}, - issn = {0164-0925}, - pages = {367--399}, - doi = {http://doi.acm.org/10.1145/169683.169678}, - } - - -@InProceedings{SeidlZorn:ASPLOS98, - author = {Matthew L. Seidl and Benjamin G. Zorn}, - title= {Segregating heap objects by reference behavior and lifetime}, - booktitle = ASPLOS, - year = {1998}, - pages = {12--23}, - address = {San Jose, USA} - } - -@InProceedings{Calder:ASPLOS98, - author = "Brad Calder and Chandra Krintz and Simmi John and Todd Austin", - title = "Cache-Conscious Data Placement", - booktitle = ASPLOS, - year = "1998", - pages = {139--149}, - address = {San Jose, USA} -} - -@inproceedings{JavaHeapProfiling:PLDI01, - author = {Ran Shaham and Elliot K. Kolodner and Mooly Sagiv}, - title = {Heap Profiling for Space-Efficient Java}, - booktitle = PLDI, - year = {2001}, - month = June, - location = {Snowbird, USA} -} - - -@Book{Jones:GCAlgorithms:Wiley99, - Author = {Richard Jones}, - Title = {Garbage Collection. {A}lgorithms for Automatic - Dynamic Memory Management}, - Publisher = {John Wiley \& Sons}, - Year = {1999}} - -@inproceedings{Wilson:GCSurvey:IWMM92, - author = "Paul R. Wilson", - title = "Uniprocessor Garbage Collection Techniques", - booktitle = IWMM, - number = "637", - publisher = "Springer-Verlag", - address = "Saint-Malo (France)", - year = "1992" } - -@inproceedings{Shaham:StaticGC:SAS01, - author = "Ran Shaham and Eran Yahav and Elliot K. Kolodner and Mooly Sagiv", - title = "Establishing Local Temporal Heap Safety Properties with - Applications to Compile-Time Memory Management", - booktitle = SAS, - year = "2003", - month = June, - address = "San Diego, USA"} - -@inproceedings{BergerZornMckinley:Reaps:OOPSLA02, - author = "Emery D. Berger and Benjamin G. Zorn and Kathryn S. McKinley", - title = "Reconsidering Custom Memory Allocation", - booktitle = OOPSLA, - address = "Seattle, Washington", - month = Nov, - year = "2002", - url = "citeseer.nj.nec.com/berger01reconsidering.html" } - -@InProceedings{GhiyaHendren:POPL96, - author = "Rakesh Ghiya and Laurie J. Hendren", - title = "Is it a Tree, a {DAG}, or a Cyclic Graph? {A} Shape Analysis for Heap-Directed Pointers in {C}", - booktitle = "POPL", - year = "1996", - url = "citeseer.nj.nec.com/ghiya96is.html" } -% pages = "1-15", - - -@inproceedings{ChongRugina:SAS03:AccessRegionsRDS, - author = {Stephen Chong and Radu Rugina}, - title = {Static Analysis of Accessed Regions in Recursive Data Structures}, - booktitle = "SAS", - year = {2003}, - isbn = {3-540-40325-6}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} -; month = June, -; address = {San Diego, CA}, - -@inproceedings{YahavRamalingam:PLDI04, - author = {Eran Yahav and G. Ramalingam}, - title = {Verifying safety properties using separation and heterogeneous abstractions}, - booktitle = PLDI, - year = {2004}, - isbn = {1-58113-807-5}, - pages = {25--34}, - location = {Washington DC, USA}, - doi = {http://doi.acm.org/10.1145/996841.996846}, - address = {New York, NY, USA} - } - -@inproceedings{HackettRugina:POPL05, - author = {Brian Hackett and Radu Rugina}, - title = {Region-based shape analysis with tracked locations}, - booktitle = "POPL", - year = {2005}, - isbn = {1-58113-830-X}, - pages = {310--323}, - location = {Long Beach, California, USA}, - doi = {http://doi.acm.org/10.1145/1040305.1040331}, - address = {New York, NY, USA} - } - -@inproceedings{JeannetEtAl:SAS04, - author = {Bertrand Jeannet and Alexey Loginov and Thomas Reps and Mooly Sagiv}, - title = {A relational approach to interprocedural shape analysis}, - booktitle = SAS, - address = {Verona, Italy}, - month = Aug, - year = {2004} -} - -@inproceedings{CheremRugina:RegionsForJava:ISMM04, - author = {Sigmund Cherem and Radu Rugina}, - title = {Region Analysis and Transformation for Java Programs}, - booktitle = ISMM, - address = {Vancouver, Canada}, - month = Oct, - year = {2004} -} - - -@Article{GhiyaHendren:IJPP96, - Author = "Rakesh Ghiya and Laurie J. Hendren", - Title = {Connection Analysis: A Practical Interprocedural Heap Analysis for {C}}, - Journal = {International Journal of Parallel Programming}, - Volume = {24}, - Number = {6}, - Pages = {547-578}, - Year = {1996}} - -@InProceedings{KRS:LCM, - author = "Jens Knoop and O. Ruthing and B. Steffen", - title = "Lazy Code Motion", - booktitle = PLDI, - pages = "249-260", - address = "San Francisco, CA", - month = June, - year = 1992 -} - -@InProceedings{CahoonMcKinley:ICS01, - author = "Brendon Cahoon and Kathryn S. McKinley", - title = "Data Flow Analysis for Software Prefetching Linked Data Structures in Java", - booktitle = PACT, - address = "Barcelona, Spain", - month = Sep, - year = 2001 -} - -@InProceedings{CKP:Prefetch, - Author = "David Callahan and Ken Kennedy and Allan Porterfield", - Title = "Software Prefetching", - BookTitle = ASPLOS, - Address = {Santa Clara, USA}, - Month = Apr, - Year = 1991, - Pages = "40-52"} - - -@article{LM99:GreedyPrefetch, - author = "Chi-Keung Luk and Todd C. Mowry", - title = "Automatic Compiler-Inserted Prefetching for Pointer-Based Applications", - journal = "IEEE Transactions on Computers", - volume = "48", - number = "2", - pages = "134-141", - year = "1999", - url = "citeseer.ist.psu.edu/luk99automatic.html" } - -@InProceedings{MLG:Prefetch, - Author = "Todd Mowry and Monica S. Lam and Anoop Gupta", - Title = "Design and Evaluation of a Compiler Algorithm for Prefetching", - BookTitle = ASPLOS, - Pages = {62--73}, - Address = "Boston, USA", - Month = Oct, - Year = 1992} - -@Article{grunwald93customalloc, - author = "Dirk Grunwald and Benjamin Zorn", - title = "CustoMalloc: Efficient Synthesized Memory Allocators", - journal = {SP\&E}, - volume = "23", - number = "8", - pages = "851-869", - year = "1993", - url = "citeseer.nj.nec.com/grunwald92customalloc.html" } - -@InProceedings{LukMowry:ASPLOS96, - Author = "C. Luk and T. Mowry", - Title = "{C}ompiler-based {P}refetching for {R}ecursive {D}ata {S}tructures", - Booktitle = ASPLOS, - Address = "Boston", - Month = Oct, - Year = 1996, - Annote = { CATEGORY: ARCHITECTURE: SHARED-MEMORY: SOFTWARE DSM } -} - -@InProceedings{PCMO:PLDI98, - author = "Andrew Ayers and Stuart de Jong and John Peyton and Richard Schooler", - Title = {Scalable Cross-Module Optimization}, - Booktitle = PLDI, - Address = "Montreal", - Month = June, - Year = 1998 -} - - -@inproceedings{IntelPointsTo:PLDI01, - author = {Rakesh Ghiya and Daniel Lavery and David Sehr}, - title = {On the importance of points-to analysis and other memory disambiguation methods for {C} programs}, - booktitle = PLDI, - year = {2001}, - isbn = {1-58113-414-2}, - location = {Snowbird, Utah, United States}, - doi = {http://doi.acm.org/10.1145/378795.378806}, -} - - - -@Article{ConvexAppsCompiler:Sigplan94, - Title = {Developing an Interprocedural Optimizing Compiler}, - Author = {J. Loeliger and R. Metzger}, - Journal = "ACM Sigplan Notices", - Month = Apr, - Year = 1994, - Volume = 29, - Number = 4 -} - -@InProceedings{Wall:PLDI86, - Author = {David Wall}, - Title = {Global Register Allocation at Link-Time}, - Booktitle = PLDI, - Address = "Palo Alto, CA", - Year = 1986, - Annote = { CATEGORY: COMPILERS: PARALLEL } -} - -@Misc{IBM:XLFWhitePaper, - Author = "{IBM Corp.}", - Title = "{XL FORTRAN: Eight Ways to Boost Performance}", - Year = 2000, - HowPublished = "White Paper" -} - -@InProceedings{Calder:PLDI01, - Author = {B. Calder}, - Title = {Using Annotations to Reduce Dynamic Optimization Time}, - Booktitle = PLDI, - Address = "Salt Lake City, UT", - Month = June, - Year = 2001 -} - -@Article{ExceptOpt:Sigplan98, - Title = {Optimizing Away C++ Exception Handling }, - Author = {Jonathan L. Schilling}, - Journal = "ACM Sigplan Notices", - Month = Aug, - Year = 1998, - Volume = 33, - Number = 8 -} - -@Book{SedgewickBook:1988, - author = "Robert Sedgewick", - title = "Algorithms", - publisher = "Addison-Wesley, Inc.", - address = "Reading, MA", - year = 1988 -} - -@Article{ChilimbiLarus:ISSM98, - author = {Trishul M. Chilimbi and James R. Larus}, - title= {Using generational garbage collection to implement cache-conscious data placement}, - journal = {ACM SIGPLAN Notices}, - volume = {34}, - number = {3}, - year = {1999}, - issn = {0362-1340}, - pages = {37--48}, - doi = {http://doi.acm.org/10.1145/301589.286865}, -} - -@Article{KistlerFranz:TOPLAS03, - author = {Thomas Kistler and Michael Franz}, - title = {Continuous Program Optimization: A Case Study}, - journal = TOPLAS, - year = {2003}, - volume = {25}, - number = {4}, - pages = {500-548}, - month = {Jul} -} - -@Article{franz97communications, - author = "Michael Franz and Thomas Kistler", - title = "Slim binaries", - journal = CACM, - volume = {40}, - number = {12}, - year = "1997" -} - -@InProceedings{SafeTSA:Amme:PLDI01, - Author = "Wolfram Amme and Niall Dalton and Jeffery {von Ronne} and Michael Franz", - Title = "Safe{TSA}: A type safe and referentially secure mobile-code representation based on static single assignment form", - Booktitle = PLDI, - City = "Snowbird, Utah", - Month = June, - Year = 2001 -} - -@InProceedings{DAISY:ISCA97, - author = "Kemal Ebcioglu and Erik R. Altman", - title = "{DAISY}: Dynamic Compilation for 100\% Architectural Compatibility", - booktitle = ISCA, - pages = {26-37}, - year = 1997, - url = "citeseer.nj.nec.com/2006.html" -} - -@Article{Transmeta:MPR00, - author = "T. Halfhill", - title = "Transmeta Breaks x86 Low-Power Barrier", - Journal = "Microprocessor Report", - Volume = 14, - Number = "Archive 2", - Publisher = "Digital Equipment Corporation", - Month = Feb, - Year = 2000 -} - -@InProceedings{Dynamo:PLDI00, - author = "Vasanth Bala and Evelyn Duesterwald and Sanjeev Banerjia", - title = "Dynamo: {A} Transparent Dynamic Optimization System", - Booktitle = PLDI, - Month = June, - Year = 2000, - pages = "1-12", - url = "citeseer.nj.nec.com/vasanth00dynamo.html" -} - -@TechReport{Dynamo:TR99, - author = "V. Bala and E. Duesterwald and S. Banerjia", - title = "Transparent Dynamic Optimization", - institution = {HP Laboratories}, - number = {Report \#HPL-1999-77}, - type = {Tech. Report}, - year = "1999", - url = "citeseer.nj.nec.com/bala99transparent.html" } - -@Article{Spike:1997, - Author = "Robert S. Cohn and David W. Goodwin and P. Geoffrey Lowney", - Title = "Optimizing {A}lpha Executables on {W}indows {NT} with {S}pike", - Journal = "Digital Technical Journal", - volume = 9, - number = 4, - publisher = "Digital Equipment Corporation", - year = 1997 -} - -@Misc{Spike:Cohn:DTJ97, - author = "R. Cohn and D. Goodwin and P. Lowney and N. Rubin", - title = "Spike: An Optimizer for {Alpha/NT} Executables", - text = "R. Cohn, D. Goodwin, P. G. Lowney, and N. Rubin, Spike: An Optimizer for - Alpha/NT Executables, The USENIX Windows NT Workshop Proceedings, Seattle, - Wash. (August 1997): 17--24.", - year = "1997" -} - -@InProceedings{Etch:Romer:Usenix97, - author = "Ted Romer and Geoff Voelker and Denis Lee and Alec Wolman and Wayne Wong and Hank Levy and Brian Bershad and Brad Chen", - title = "Instrumentation and Optimization of {Win32/Intel} Executables Using {Etch}", - booktitle = {Proc. USENIX Windows NT Workshop}, - location = {Seattle WA}, - month = {August}, - year = "1997" -} - - -@Article{OM:Srivastava:JOPL93, - author = "Amitabh Srivastava and David Wall", - title = "{A} practical system for intermodule code optimization at link-time", - journal = "Journal of Programming Languages", - volume = "1", - number = "1", - month = "Dec.", - pages = "1--18", - year = "1992", - url = "citeseer.nj.nec.com/srivastava92practical.html" } - - -@Article{TAL:Morrisett:TOPLAS99, - Author = {Greg Morrisett and David Walker and Karl Crary and Neal Glew}, - Title = {From {System F} to typed assembly language}, - Journal = TOPLAS, - Volume-comment = 21, - Number-comment = 3, - Pages-comment = {528-569}, - Month = May, - Year = 1999} - - -@InProceedings{LTAL:PLDI03, - author = {Juan Chen and Dinghao Wu and Andrew W. Appel and Hai Fang}, - title = {A Provably Sound {TAL} for Back-end Optimization}, - booktitle= PLDI, - year = {2003}, - address = {San Diego, CA}, - month = {June} -} - - -@PhdThesis{Alto:MuthThesis99, - author = "Robert M. Muth", - title = "{A}lto: A Platform for Object Code Modification", - type = "Ph.D. {T}hesis, {D}epartment of {C}omputer {S}cience", - school = "University of Arizona", - year = "1999", - url = "citeseer.nj.nec.com/Article/muth99alto.html" -} - -@Article{MLD:Fernandez:PLDI95, - author = "Mary F. Fern{\'a}ndez", - title = "Simple and effective link-time optimization of {Modula-3} programs", - booktitle = PLDI, - year = "1995", - url = "citeseer.nj.nec.com/fernandez95simple.html" } - -@misc{Fisher:TraceScheduling, - author = "J. Fisher", - title = "Trace Scheduling: A General Technique for Global Microcode Compaction", - text = "J. Fisher. Trace Scheduling: A General Technique for Global Microcode Compaction. - IEEE Transactions on Computers, C-30(7):478--490, 1981.", - year = "1981" -} - -@misc{CLR, - author = "{Microsoft Corporation}", - title = "The {.NET} {C}ommon {L}anguage {R}untime", - note = "See web site at: http://msdn.microsoft.com/net" -} - -@misc{CLIOverview:TR01, - author = "Erik Meijer and John Gough", - title = {{A Technical Overview of the Common Language Infrastructure}}, - howpublished = "{http://research.microsoft.com/$\tilde{\ }$emeijer}", - year = 2002 -} - - -@Article{WalkTime:Computer97, - author = {Joseph Fisher}, - title = {Walk-Time Techniques: Catalyst for Architectural Change}, - journal = {IEEE Computer}, - year = {1997}, - volume = {30}, - number = {9}, - pages = {46-42}, - month = {Sept} -} - -@inproceedings{SmithHeil:IWIA99, - author = {James E. Smith and Timothy Heil and Subramanya Sastry and Todd Bezenek}, - title = "Achieving High Performance via Co-designed Virtual Machines" , - booktitle = "Proc. Int'l Workshop on Innovative Architecture (IWIA)", - year = "1999"} - -@misc{SmithDhodapkar:WCED2001, - author = {Dhodapkar, A. and Smith, J. }, - booktitle = {Workshop on Complexity-Effective Design}, - month = {June}, - title = {Saving and Restoring Implementation Contexts with co-Designed Virtual Machines}, - url = {http://citeseer.ist.psu.edu/dhodapkar01saving.html}, - year = {2001} -} - -@InProceedings{Self:OOPSLA87, - author = "David Ungar and Randall B. Smith", - title = "Self: The Power of Simplicity", - booktitle = OOPSLA, - year = "1987" -} - - -%% Note this really is in POPL despite the cite name -@InProceedings{SmallTalk80:PLDI84, - author = {L. Peter Deutsch and Allan M. Schiffman}, - title = {Efficient implementation of the Smalltalk-80 system}, - booktitle = POPL, - pages = {297-302}, - year = {1984}, - month = {Jan}, -} - -@InProceedings{GayAiken:PLDI01, - author = "David Gay and Alex Aiken", - title = "Language Support for Regions", - booktitle = PLDI, - address = "Snowbird, UT", - pages = "70-80", - month = June, - year = 2001 -} - -@InProceedings{BarrettZorn:PLDI93, - author = "David A. Barrett and Ben G. Zorn", - title = "Using Lifetime Predictors to Improve Memory Allocation Performance", - booktitle = PLDI, - address = "Albuquerque, New Mexixo", - pages = "187-196", - month = June, - year = 1993 -} - -@Article{Hanson:SPE90, - author = {David R. Hanson}, - title = "{Fast Allocation and Deallocation of Memory Based on - Object Lifetimes}", - journal = SPE, - year = {1990}, - volume = {20}, - number = {1}, - pages = {5-12}, - month = {Jan}, -} - -@InProceedings{Demers:POPL90, - author = "Alan Demers and Mark Weiser and Barry Hayes and Hans Boehm and - Daniel Bobrow and Scott Shenker", - title = "Combining generational and conservative garbage collection: - framework and implementations", - booktitle = POPL, - pages = "261--269", - year = "1990", -} - -@InProceedings{CraryWalkerMorrisett:POPL99, - author = "Karl Crary and David Walker and Greg Morrisett", - title = "Typed Memory Management in a Calculus of Capabilities", - booktitle = POPL, - address = "San Antonio, USA", - pages = "262--275", - year = "1999" -} - -@InProceedings{GayAiken:PLDI98, - author = "David Gay and Alexander Aiken", - title = "Memory Management with Explicit Regions", - booktitle = PLDI, - pages = "313-323", - year = "1998", - address = "Montreal, Canada" -} - -@Article{TofteTalpin:IC97, - author = "Mads Tofte and Jean-Pierre Talpin", - title = "Region-Based Memory Management", - journal = "Information and Computation", - year = "1997", - month = Feb, - pages = "132(2):109-176" -} - -@InProceedings{TofteTalpin:POPL94, - author = "Mads Tofte and Jean-Pierre Talpin", - title = "Implementation of the Typed Call-by-value $\lambda$-calculus Using a Stack of Regions", - booktitle = POPL, - pages = "188-201", - year = "1994", -} - -@InProceedings{Aiken:PLDI95, - author = {Alex Aiken and Manuel F\"{a}hndrich and Ralph Levien}, - title = "Better Static Memory Management: Improving Region-Based Analysis of Higher-Order Languages", - booktitle = PLDI, - pages = "174-185", - address = "La Jolla, CA", - month = June, - year = 1995 -} - -@InProceedings{EmamiEtAl:PLDI94, - author = "Maryam Emami and Rakesh Ghiya and Laurie J. Hendren", - title = "Context-Sensitive Interprocedural Points-to Analysis in the Presence of Function Pointers", - booktitle = PLDI, - pages = "242-256", - year = "1994", - address = "Orlando, FL", - month = Jun -} - -@InProceedings{HendrenEtAl:PLDI92, - author = "Laurie J. Hendren and Joseph Hummel and Alexandru Nicolau", - title = "Abstractions for Recursive Pointer Data Structures: Improving the Analysis and Transformation of Imperative Programs", - booktitle = PLDI, - pages = "249-260", - address = "San Francisco, CA", - month = June, - year = 1992 -} - -@inproceedings{HindPioli:SAS98:FlowSensitive, - author = "Michael Hind and Anthony Pioli", - title = "Assessing the Effects of Flow-Sensitivity on Pointer Alias Analyses", - booktitle = SAS, - pages = "57-81", - year = "1998", - url = "citeseer.ist.psu.edu/hind98assessing.html" } - - - -@InProceedings{ChengHwu:PLDI00, - author = "Ben-Chung Cheng and Wen-mei Hwu", - title = "Modular Interprocedural Pointer Analysis Using Access Paths: Design, Implementation, and Evaluation", - booktitle = "PLDI", - address = "Vancouver, British Columbia, Canada", - month = June, - year = 2000 -} -; pages = "57-69", - -@InProceedings{WilsonLam:PLDI95, - author = "Robert P. Wilson and Monica S. Lam", - title = "Effective Context Sensitive Pointer Analysis for {C} Programs", - booktitle = PLDI, - pages = "1-12", - month = June, - year = 1995 -} - -@inproceedings{HeineLam:PLDI03, - author = {David L. Heine and Monica S. Lam}, - title = {A Practical Flow-sensitive and Context-sensitive C and C++ Memory Leak Detector}, - booktitle = PLDI, - year = {2003}, - isbn = {1-58113-662-5}, - pages = {168--181}, - location = {San Diego}, - doi = {http://doi.acm.org/10.1145/781131.781150}, - } - -@InProceedings{Deutsch:PLDI94, - author = "Alain Deutsch", - title = "Interprocedural may-alias analysis for pointers: Beyond k-limiting", - booktitle = PLDI, - pages = "230-241", - month = June, - year = 1994 -} - -@inproceedings{Cooper:PLDI97:RegisterPromotion, - author = "Keith D. Cooper and John Lu", - title = "Register Promotion in {C} Programs", - booktitle = PLDI, - pages = "308-319", - year = "1997", - url = "citeseer.ist.psu.edu/cooper97register.html" } - -@InProceedings{Steensgaard:POPL96, - author = {Bjarne Steensgaard}, - title = {Points-to analysis in almost linear time}, - booktitle = POPL, - year = {1996}, - isbn = {0-89791-769-3}, - location = {St. Petersburg Beach, Florida, United States}, - doi = {http://doi.acm.org/10.1145/237721.237727}, -} - -@inproceedings{Steensgaard:CC96:FieldSensitive, - author = {Bjarne Steensgaard}, - title = {Points-to Analysis by Type Inference of Programs with Structures and Unions}, - booktitle = "Compiler Construction", - year = {1996}, - isbn = {3-540-61053-7}, - pages = {136--150}, - address = {London, UK}, - } - - -@inproceedings{FahndrichEtAl:PLDI00, - author = {Manuel F\"{a}hndrich and Jakob Rehof and Manuvir Das}, - title = "Scalable Context-Sensitive Flow Analysis Using Instantiation Constraints", - booktitle = PLDI, - year = "2000", - url = "citeseer.nj.nec.com/colby00certifying.html" -} -; month = June, -; address = "Vancouver", - -@inproceedings{YongHorwitzReps:PLDI99, - author = {Suan Hsi Yong and Susan Horwitz and Thomas Reps}, - title = {Pointer analysis for programs with structures and casting}, - booktitle = PLDI, - year = {1999}, - isbn = {1-58113-094-5}, - pages = {91--103}, - location = {Atlanta, Georgia, United States}, - doi = {http://doi.acm.org/10.1145/301618.301647}, -} - -@inproceedings{LiangHarrold:ESEC99, - author = "Donglin Liang and Mary Jean Harrold", - title = "Efficient Points-to Analysis for Whole-Program Analysis", - booktitle = "ESEC", - year = "1999", - url = "citeseer.nj.nec.com/liang99efficient.html" } -; pages = "199-215", - -@inproceedings{LiangHarrold:SAS01, - author = "Donglin Liang and Mary Jean Harrold", - title = "Efficient Computation of Parameterized Pointer Information for Interprocedural Analysis", - booktitle = "SAS 2001", - year = "2001", - month = "July"} - -@inproceedings{DAS:PLDI00, - author = {Manuvir Das}, - title = {Unification-based Pointer Analysis with Directional Assignments}, - booktitle = PLDI, - year = {2000}, - isbn = {1-58113-199-2}, - pages = {35--46}, - location = {Vancouver, British Columbia, Canada}, - doi = {http://doi.acm.org/10.1145/349299.349309} -} - -@PhdThesis{Andersen:PhD, - author = "Lars O. Andersen", - title = "Program Analysis and Specialization for the C Programming Language", - school = "DIKU, University of Copenhagen", - month = May, - year = 1994 -} - -@inproceedings{VivienRinard:PLDI01, - author = {Frédéric Vivien and Martin Rinard}, - title = {Incrementalized pointer and escape analysis}, - booktitle = PLDI, - year = {2001}, - isbn = {1-58113-414-2}, - pages = {35--46}, - location = {Snowbird, Utah, United States}, - doi = {http://doi.acm.org/10.1145/378795.378804}, -} - -@InProceedings{LarusHilfinger:PLDI88, - author = "James R. Larus and Paul N. Hilfinger", - title = "Detecting conflicts between structure accesses", - booktitle = PLDI, - month = "July", - year = "1988", - pages = "21-34", -} - -@Article{HendrenNicolau:TPDS90, - author = "Laurie J. Hendren and Alexandru Nicolau", - title = "Parallelizing programs with recursive data structures", - journal = "IEEE Transactions on Parallel and Distributed System", - year = "1990", - pages = "35-47" -} - -@article{Zilles:llubench, - author = {Craig B. Zilles}, - title = {Benchmark Health Considered Harmful}, - journal = {ACM SIGARCH Computer Architecture News}, - volume = {29}, - number = {3}, - year = {2001}, - issn = {0163-5964}, - pages = {4--5}, - doi = {http://doi.acm.org/10.1145/503205.503206}, - } - -@Article{Olden:Dynamic:TOPLAS95, - Author = {Anne Rogers and Martin C. Carlisle and John H. Reppy and Laurie J. Hendren}, - Title = {Supporting Dynamic Data Structures on Distributed - Memory Machines}, - Journal = TOPLAS, - Volume = {17}, - Number = 2, - Month = Mar, - Year = 1995} - -@Misc{PtrDist:URL95, - Title = {{The Pointer-intensive Benchmark Suite}}, - Author = {{Todd Austin}}, - Month = {September}, - Year = {1995}, - HowPublished = "\verb+www.cs.wisc.edu/~austin/ptr-dist.html+" -} - -@Misc{FreeBench:URL, - Title = {{The FreeBench v1.0 Benchmark Suite}}, - Author = "Peter Rundberg and Fredrik Warg", - Month = "Jan", - Year = "2002", - HowPublished = "\verb+http://www.freebench.org+" -} - - -@inproceedings{fpgrowth:SIGMOD00, - author = {Jiawei Han and Jian Pei and Yiwen Yin}, - title = {Mining frequent patterns without candidate generation}, - booktitle = SIGMOD, - year = {2000}, - isbn = {1-58113-217-4}, - pages = {1--12}, - location = {Dallas, TX}, - doi = {http://doi.acm.org/10.1145/342009.335372}, - } - -@Article{TheSSAPaper, - author = "Ron Cytron and Jeanne Ferrante and Barry K. Rosen and Mark N. Wegman and F. Kenneth Zadeck", - title = "Efficiently computing static single assignment form and the control dependence graph", - journal = TOPLAS, - pages = "13(4):451-490", - month = "October", - year = 1991 -} - - -@inproceedings{Pearce:WEA04, - title = {A dynamic algorithm for topologically sorting directed acyclic graphs}, - author = {David J. Pearce and Paul H. J. Kelly}, - booktitle = {Proc. 3rd Int'l Workshop on Efficient and Experimental Algorithms (WEA 2004)}, - series = {Lecture Notes in Computer Science}, - publisher = {Springer-Verlag}, - year = {2004}, - url = {http://www.doc.ic.ac.uk/~phjk/Publications/DynTopoSortWEA2004.pdf} -} - -@Article{Sagiv:TOPLAS98, - author = "Mooly Sagiv and Thomas Reps and Reinhard Wilhelm", - title = "Solving shape-analysis problems in languages with destructive updating", - journal = TOPLAS, - volume = 20, - number = 1, - month = Jan, - year = 1998 -} - - -@Article{Cobrera:ShapeAnalysis:TPDS04, - author = {Francisco Cobrera and Rafael Asenjo and Emilio R. Zapata}, - title = {A Framework to Capture Dynamic Data Structures in Pointer-Based Codes}, - journal = {IEEE Transactions on Parallel and Distributed Systems}, - year = {2004}, - volume = {15}, - number = {2}, - pages = {151--166}, - month = {Feb}, -} - -@inproceedings{Corbera:ISC:ShapeAnalysis, - author = {Francisco Corbera and Rafael Asenjo and Emilio L. Zapata}, - title = {New shape analysis techniques for automatic parallelization of C codes}, - booktitle = ICS, - year = {1999}, - isbn = {1-58113-164-X}, - pages = {220--227}, - location = {Rhodes, Greece}, - doi = {http://doi.acm.org/10.1145/305138.305196}, - } - - -@Article{TofteBirkedal:TOPLAS98, - author = "Mads Tofte and Lars Birkedal", - title = "A region inference algorithm", - journal = TOPLAS, - volume = 20, - number = 4, - month = "July", - year = 1998, - pages = "724-768", -} - - -@InProceedings{Birkedal:POPL96, - author = "Lars Birkedal and Mads Tofte and M. Vejlstrup", - title = "From Region Inference to von Neumann Machines via Region Representation Inference", - booktitle = POPL, - pages = "171-183", - year = "1996", -} - -@Article{Blanchet:TOPLAS03, - author = {Bruno Blanchet}, - title = {{Escape Analysis for Java(TM): Theory and Practice}}, - journal = TOPLAS, - year = {2003}, - volume = {25}, - number = {6}, - pages = {713-775}, - month = {Nov}, -} - -@InProceedings{Hallenberg:PLDI02, - author = "Niels Hallenberg and Martin Elsman and Mads Tofte", - title = "Combining region inference and garbage collection", - booktitle = PLDI, - address = "Berlin, Germany", - month = June, - year = 2002 -} - -@InProceedings{Wilhelm:CC00, - author = "Reinhard Wilhelm and Mooly Sagiv and Thomas Reps", - title = "Shape analysis", - booktitle = CC, - month = "Mar-Apr", - year = 2000 -} -% address = "Berlin, Ger.", - -@Misc{DavidSehr:Personal02, - Author = "David Sehr", - Month = "April", - Year = "2002", - Annote = { CATEGORY: PERSONAL COMMUNICATION }, - HowPublished = "Personal communication" -} - - -@inproceedings{ABCD:PLDI00, - author = "Rastislav Bodik and Rajiv Gupta and Vivek Sarkar", - title = "{ABCD}: eliminating array bounds checks on demand", - booktitle = PLDI, - year = "2000", - url = "citeseer.nj.nec.com/bodik00abcd.html" } - -@Article{LimLeeSha:PDCP, - Title = {Ensuring Integrity and Serivce Availability in a Web Based - Control Laboratory}, - Author = {Sungsoo Lim and Kihwal Lee and Lui Sha}, - Journal = "Journal of Parallel and Distributed Computing Practices" -} - - - -@article{ rtjava:COMPUTER00, - author = "Greg Bollella and James Gosling", - title = "The Real-Time Specification for {Java}", - journal = "IEEE Computer", - volume = "33", - number = "6", - pages = "47--54", - year = "2000", - url = "citeseer.nj.nec.com/bollella00realtime.html" -} - - -@book{JVM, - author = "Tim Lindholm and Frank Yellin", - title = "The {Java} {V}irtual {M}achine {S}pecification", - publisher = "Ad{\-d}i{\-s}on-Wes{\-l}ey", - address = "Reading, MA", - isbn = "0-201-63452-X", - year = "1997" -} - -@book{JavaSpec, - author = "James Gosling and Bill Joy and Guy Steele and Gilad Bracha", - title = "The {Java} {L}anguage {S}pecification, 2$^{nd}$ Ed.", - publisher = "Ad{\-d}i{\-s}on-Wes{\-l}ey", - address = "Reading, MA", - isbn = "0-201-310082", - year = "2000" -} - -@manual{Ada:Manual, -note = {International Standard ISO/IEC 8652:1995}, -organization = {International Organisation for Standardisation}, -title = {Ada95 Reference Manual}, -year = {1995} -} - -@misc{TinyOS, - Title = "{TinyOS}, A Component-based {OS} for the {N}etworked {S}ensor {R}egime", - HowPublished = "http://webs.cs.berkeley.edu/tos/" -} - -@InProceedings{EMachine:PLDI02, - author = "Thomas A. Henzinger and Christoph M. Kirsch", - title = "The Embedded Machine: Predictable, Portable Real-Time Code", - booktitle = PLDI, - address = "Berlin, Germany", - month = June, - year = 2002 -} - -@InProceedings{ XuMillerReps:PLDI01, - author = "Zhichen Xu and Barton P. Miller and Thomas Reps", - title = "Safety checking of machine code", - booktitle = PLDI, - address = "Vancouver B.C., Canada", - pages = "70--82", - year = "2000", - } - - - -@inproceedings{Boyapati:PLDI03, - author = "Chandrasekhar Boyapati and Alexandru Salcianu and William Beebee and Martin Rinard", - title = "Ownership Types for Safe Region-Based Memory Management in Real-Time Java", - booktitle = PLDI, - year = "2003" -} - -@inproceedings{PCC:POPL97, - author = {George C. Necula}, - title = {Proof-Carrying Code}, - booktitle = POPL, - month = Jan, - address-comment = {Paris}, - pages-comment = {106--119}, - year = 1997, - url = "citeseer.nj.nec.com/50371.html" } - -@inproceedings{PCCForKernels:OSDI96, - author = "George C. Necula and Peter Lee", - title = "Safe Kernel Extensions Without Run-Time Checking", - booktitle = OSDI, - publisher-comment = "USENIX", - address-comment = "Berkeley, CA, USA", - editor-comment = "{USENIX}", - pages-comment = "229--243", - city-comment = "Seattle, {WA}", - month = Oct, - year = "1996" -} - -%% url = "citeseer.nj.nec.com/necula96safe.html" - -@inproceedings{CertifyingCompilerC:PLDI98, - author = "G. C. Necula and P. Lee", - title = "The Design and Implementation of a Certifying Compiler", - booktitle = PLDI, - pages = "333--344", - year = "1998", - url = "citeseer.nj.nec.com/necula98design.html" -} - -@inproceedings{CertifyingCompilerJava:PLDI00, - author = "Christopher Colby and Peter Lee and George C. Necula and Fred Blau and Mark Plesko and Kenneth Cline", - title = "A certifying compiler for {Java}", - booktitle = PLDI, - month = June, - year = "2000", - url = "citeseer.nj.nec.com/colby00certifying.html" -} -% address = "Vancouver, Canada", - - -@InProceedings{Vault:PLDI01, - author = {Robert DeLine and Manuel F\"{a}hndrich}, - title = "Enforcing high-level protocols in low-level software", - booktitle = PLDI, - address = "Snowbird, UT", - month = June, - year = 2001 -} - -@InProceedings{Cyclone:Usenix02, - author = "Trevor Jim and Greg Morrisett and Dan Grossman and Michael Hicks and James Cheney and Yanling Wang", - title = "Cyclone: A Safe Dialect of {C}", - booktitle = "USENIX Annual Technical Conference", - address = "Monterey, CA", - year = 2002 -} - -@InProceedings{Cyclone:PLDI02, - author = "Dan Grossman and Greg Morrisett and Trevor Jim and Michael Hicks and Yanling Wang and James Cheney", - title = "Region-based Memory Management in Cyclone", - booktitle = PLDI, - month = June, - year = 2002 -} -% address = "Berlin, Germany", - -@InProceedings{CCured:POPL02, - author = "George C. Necula and Scott McPeak and Westley Weimer", - title = "CCured: Type-Safe Retrofitting of Legacy Code", - booktitle = POPL, - address = "London", - month = Jan, - year = 2002 -} - -@Article{Pugh:CACM, - Author = {W. Pugh}, - Title = {A practical algorithm for exact array dependence analysis}, - Journal = CACM, - Volume = {35}, - Number = 8, - Pages = {102--114}, - Month = Aug, - Year = 1992} - -@TechReport{OmegaManual, - Author = {Wayne Kelly and Vadim Maslov and William Pugh and - Evan Rosser and Tatiana Shpeisman and David Wonnacott}, - Title = {{T}he {O}mega {L}ibrary {I}nterface {G}uide}, - Institution = {Computer Science Dept., U. Maryland, College Park}, - Month = Apr, - Year = 1996} - -@inproceedings{ fischer74superexponential, - author = "Fischer and Rabin", - title = "Super-Exponential Complexity of Presburger Arithmetic", - booktitle = "{SIAMAMS}: Complexity of Computation: Proceedings of a Symp. in Applied Mathematics of the American Mathematical Society and the Society for Industrial and Applied Mathematics", - year = "1974", - url = "citeseer.nj.nec.com/fischer74superexponential.html" - } - -@Book{DecisionProcsBook, - Author = {Daniel Kroening and Ofer Strichman}, - Title = {Decision Procedures: An Algorithmic Point of View}, - Publisher = {Springer}, - Edition = {1st}, - Year = {2007}, - Month = {July} -} - - -@Inproceedings{Wagner:NDSS00, - author = "David Wagner and Jeffrey S. {Foster} and Eric A. Brewer and Alexander Aiken", - title = "A First Step towards Automated Detection of Buffer Overrun Vulnerabilities", - booktitle ="Network and Distributed System Security Symp.", - address = "San Diego, CA", - pages = "3--17", - month ="February", - year="2000", - url = "citeseer.nj.nec.com/wagner00first.html" -} - -@inproceedings{AddrSpaceRandomization:CCS04, - author = {Hovav Shacham and Matthew Page and Ben Pfaff and Eu-Jin Goh and Nagendra Modadugu and Dan Boneh}, - title = {On the effectiveness of address-space randomization}, - booktitle = {Proceedings ACM Conf. on Computer and Communications Security (CCS '04)}, - year = {2004}, - pages = {298--307}, - location = {Washington DC, USA}, - doi = {http://doi.acm.org/10.1145/1030083.1030124}, -} - -@MastersThesis{Razafimahefa:SideEffectForJava:Thesis99, - author = "Chrislain Razafimahefa", - title = "A study of side-effect analyses for Java", - school = "McGill University", - year = "1999", - month = {Dec}, - url = "citeseer.ist.psu.edu/razafimahefa99study.html" -} -@inproceedings{Milanova:ModRef:ISSTA02, - author = "Ana Milanova and Atanas Rountev and Barbara Ryder", - title = "Parameterized object sensitivity for points-to and side-effect analyses for Java", - booktitle = ISSTA, - pages = "1--11", - year = "2002", - url = "citeseer.ist.psu.edu/milanova02parameterized.html" } - -@inproceedings{Stocks:ModRef:ISSTA98, - author = "Phil Stocks and Barbara G. Ryder and William Landi and Sean Zhang", - title = "Comparing Flow and Context Sensitivity on the Modification-Side-Effects Problem", - booktitle = ISSTA, - pages = "21-31", - year = "1998", - url = "citeseer.ist.psu.edu/article/stocks98comparing.html" } - -@InProceedings{CooperKennedy:PLDI88, - Author = {Keith Cooper and Ken Kennedy}, - Title = {Interprocedural Side-effect Analysis in Linear Time}, - BookTitle = PLDI, - Address = {Atlanta, GA}, - Month = June, - Year = 1988} - -@InProceedings{LRZ:PLDI93, - Author = {William Landi and Barbara Ryder and Sean Zhang}, - Title = {Interprocedural Modification Side Effect Analysis with Pointer Aliasing}, - BookTitle = PLDI, - Address = {Albuquerque, NM}, - Month = June, - Year = 1993} - -@inproceedings{Banning:ModRef:POPL79, - author = {John P. Banning}, - title = {An efficient way to find the side effects of procedure calls and the aliases of variables}, - booktitle = POPL, - year = {1979}, - pages = {29--41}, - location = {San Antonio, Texas}, - doi = {http://doi.acm.org/10.1145/567752.567756}, - address = {New York, NY, USA}, - } - -@inproceedings{Cooper:ModRef:POPL85, - author = {Keith D. Cooper}, - title = {Analyzing aliases of reference formal parameters}, - booktitle = POPL, - year = {1985}, - isbn = {0-89791-147-4}, - pages = {281--290}, - location = {New Orleans, Louisiana, United States}, - doi = {http://doi.acm.org/10.1145/318593.318658}, - address = {New York, NY, USA}, - } - -@Article{RyderEtAl:TOPLAS01, - Author = {Barbara Ryder and William Landi and Philip Stocks and Sean Zhang and Rita Altucher}, - Title = {A Schema for Interprocedural Modification Side-Effect Analysis with Pointer Aliasing}, - journal = TOPLAS, - Volume = {23}, - Number = 2, - Pages = {105--186}, - Month = Mar, - Year = 2001} - - -@Article{SSAPRE:TOPLAS99, - author = {Robert Kennedy and Sun Chan and Shin-Ming Liu and Raymond Lo and Peng Tu and Fred Chow}, - title = {Partial Redundancy Elimination in SSA Form}, - journal = TOPLAS, - year = {1999}, - volume = {21}, - number = {3}, - pages = {627-676}, - month = {May}, -} - -%%============================================================================ -%% COMPILERS: SCALAR: DATAFLOW OPTIMIZATION -%%============================================================================ - -@article{KamUllman:JACM76, - author = {Kam, John B. and Ullman, Jeffrey D.}, - title = {Global Data Flow Analysis and Iterative Algorithms}, - journal = {J. ACM}, - volume = {23}, - issue = {1}, - month = {January}, - year = {1976}, - issn = {0004-5411}, - pages = {158--171}, - numpages = {14}, - url = {http://doi.acm.org/10.1145/321921.321938}, - doi = {http://doi.acm.org/10.1145/321921.321938}, - acmid = {321938}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -%%============================================================================ -%% COMPILERS: SCALAR: OTHER OPTIMIZATION -%%============================================================================ - -@InProceedings{Stampede:ASPLOS02, - author = {Antonia Zhai and Christopher B. Colohan and J. Gregory Steffan - and Todd C. Mowry}, - title = "{Compiler Optimization of Scalar Value Communication Between - Speculative Threads}", - booktitle = ASPLOS, - address = {San Jose, CA, USA}, - month = {Oct}, - year = {2002} -} - -%%============================================================================ -%% COMPILERS: SCALAR: RUNTIME COMPILATION AND OPTIMIZATION -%%============================================================================ - -@InProceedings{VCODE:PLDI96, - author = "Dawson Engler", - title = "VCODE: A retargetable, extensible, very fast dynamic code generation system", - booktitle = PLDI, - year = "1996" -} - -@InProceedings{TCC:PLDI97, - author = "M. Poletto and D. Engler and M. Kaashoek", - title = "{tcc: A System for Fast, Flexible, and High-level Dynamic - Code Generation}", - booktitle = PLDI, - year = "1997" -} - -%% author = "Michael G. Burke and Jong-Deok Choi and Stephen Fink and David Grove and Michael Hind and Vivek Sarkar and Mauricio J. Serrano and Vugranam C. Sreedhar and Harini Srinivasan and John Whaley", -%% -@InProceedings{Jalapeno, - author = {Burke, Michael G. and Choi, Jong-Deok and Fink, Stephen and Grove, David and Hind, Michael and Sarkar, Vivek and Serrano, Mauricio J. and Sreedhar, V. C. and Srinivasan, Harini and Whaley, John}, - title = "{The Jalape{\~n}o Dynamic Optimizing Compiler for Java}", - booktitle = "Java Grande", - pages = "129-141", - year = "1999", - url = "citeseer.nj.nec.com/burke99jalapeno.html" } - -@Misc{Hotspot, - author = "D. Griswold", - title = "{The Java HotSpot Virtual Machine Architecture}", - text = "D. Griswold, The Java HotSpot Virtual Machine Architecture, March 1998. - Sun Microsystems Whitepaper.", - year = "1998" } - - - -@InProceedings{IntelJIT;PLDI98, - author = {A-R. Adl-Tabatabai and M. Cierniak and G-Y. Lueh and V. M. Parikh and J. M. Stichnoth}, - title = "{Fast and effective code generation in a Just-In-Time Java compiler}", - booktitle = PLDI, - year = {1998}, - month = {May} -} - -@InProceedings{DyC:PLDI99, - author = "Brian Grant and Matthai Philipose and Markus Mock and Craig Chambers and Susan J. Eggers", - title = "An Evaluation of Staged Run-Time Optimizations in {DyC}", - booktitle = PLDI, - pages = "293-304", - year = "1999" -} - -@InProceedings{Transmeta:CGO03, - author = {James C. Dehnert and Brian K. Grant and John P. Banning and Richard Johnson and Thomas Kistler and Alexander Klaiber and Jim Mattson}, - title = {The {T}ransmeta {C}ode {M}orphing {S}oftware: {U}sing Speculation, Recovery and Adaptive Retranslation to Address Real-life Challenges}, - booktitle = CGO, - address = {San Francisco, CA}, - year = {2003}, - month = {Mar} -} - -@Misc{TransmetaWhitePaper:Jan00, - author = {A. Klaiber}, - title = "{The Technology Behind Crusoe Processors}", - text = {White Paper, Transmeta Corp.}, - year = {2000} -} - -@InProceedings{RPA:MICRO00, - author = {Timothy H. Heil and James E. Smith}, - title = {Relational profiling: enabling thread-level parallelism in virtual machines}, - booktitle = MICRO, - pages = "281-290", - year = {2000}, - address = {Monterey, CA}, - month = {Dec}, - url = {citeseer.nj.nec.com/heil00relational.html} -} - -@InProceedings{LLVM:MICRO03, - author = {Vikram Adve and Chris Lattner and Michael Brukman and - Anand Shukla and Brian Gaeke}, - title = "{LLVA: A Low-Level Virtual Instruction Set Architecture}", - booktitle = {MICRO 36: Proceedings of the 36th annual ACM/IEEE Int´l Symp. on Microarchitecture}, - pages-comment = "205-216", - year = {2003}, - address-comment = {San Diego, CA}, - month = Dec -} - -@InProceedings{VISCSoftware:NGS04, - author = {Vikram Adve and Michael Brukman and Alkis Evlogimenos and - Brian Gaeke}, - title = {Software Implications of Virtual Instruction Set Computers}, - booktitle = {Proc. Workshop on Next Generation Software}, - pages = {205--216}, - year = {2004}, - month = {Apr}, - address = {Santa Fe, NM}, -} - - -@MastersThesis{Monroe:MSThesis05, - author = {Brent M. Monroe}, - title = {Measuring and Improving the Performance of {L}inux on a Virtual Instruction Set Architecture}, - school = {Computer Science Dept., Univ. of Illinois at Urbana-Champaign}, - year = {2005}, - address = {Urbana, IL}, - month = {Dec}, -} -% note = {{\em See {\tt http://llvm.cs.uiuc.edu}.}} - -@InProceedings{WSAnalyis:ISCA02, - author = {A. S. Dhodapkar and J. E. Smith}, - title = "{Managing Multi-Configuration Hardware via Dynamic Working Set Analysis}", - booktitle = ISCA, - year = {2002}, - address = {Alaska}, - month = {May} -} - -@InProceedings{KimSmith:ISCA02, - author = {H-S. Kim and J. E. Smith}, - title = {An Instruction Set and Microarchitecture for Instruction Level Distributed Processing}, - booktitle = ISCA, - year = {2002}, - address = {Alaska}, - month = {May} -} - -@InProceedings{OberoiSohi:ISCA03, - author = {Paramjit Oberoi and Gurinder S. Sohi}, - title = {Parallelism in the Front-End}, - booktitle = ISCA, - year = {2003}, - month = {June} -} - -@InProceedings{TRIPS:ISCA03, - author = {K. Sankaralingam and R. Nagarajan and H. Liu and C. Kim and and J. Huh}, - title = {Exploiting {ILP}, {TLP}, and {DLP} with the {P}olymorphous {TRIPS} {A}rchitecture}, - booktitle = ISCA, - year = {2003}, - month = {June} -} - -@InProceedings{ReplayEval:MICRO01, - author = {B. Fahs and S. Bose and M. Crum and B. Slechta and F. Spadini and T. Tung and S. J. Patel and S. S. Lumetta}, - title = {{P}erformance {C}haracterization of a {H}ardware {F}ramework for {D}ynamic {O}ptimization}, - booktitle = MICRO, - year = {2001}, - month = {Dec} -} - -@Article{Replay:TOC01, - author = {S. J. Patel and S. S. Lumetta}, - title = {re{PL}ay: A {H}ardware {F}ramework for {D}ynamic {O}ptimization}, - journal = {IEEE Transactions on Computers}, - year = {2001}, - month = {June}, -} - -@InProceedings{Hotspot:ISCA00, - author = {M. C. Merten and A. R. Trick and E. M. Nystrom and R. D. Barnes and W-m. W. Hwu}, - title = {A Hardware Mechanism for Dynamic Extraction and Relayout of Program Hot Spots}, - booktitle = ISCA, - pages = {59--70}, - year = {2000}, - month = {June} -} - -@InProceedings{ZillesSohi:HPCA01, - author = {Craig Zilles and Gurindar S. Sohi}, - title = {A Programmable Coprocessor for Profiling}, - booktitle = HPCA, - year = {2001}, - month = {Jan} -} - -@InProceedings{TraceProcessors:MICRO97, - author = {E. Rotenberg and Q. A. Jacobson and Y. Sazeides and J. E. Smith}, - title = {Trace Processors}, - booktitle = MICRO, - pages = {138--148}, - year = {1997}, - month = {Dec} -} - -@Article{AS400:IBMSJ89, - author = {B. E. Clark and M. J. Corrigan}, - title = {{Application System/400} Performance Characteristics}, - journal = {{IBM} Systems Journal}, - year = {1989}, - volume = {28}, - number = {3}, - pages = {407--423}, -} - -@Article{IBM:s38, - author = {IBM Corporation}, - title = {{System/38-A high-level machine}}, - journal = {IBM SYSTEM/38 Technical Developments}, - year = {1978}, - volume = {}, - number = {}, - pages = {}, - isbn = {0-933186-03-7}, - note = {available through IBM branch offices}, -} - -@article{soltis:computer81, - author = {F.G. Soltis}, - title = {Design of a Small Business Data Processing System}, - journal ={IEEE Computer}, - volume = {14}, - issn = {0018-9162}, - year = {1981}, - pages = {77-93}, - doi = {http://doi.ieeecomputersociety.org/10.1109/C-M.1981.220610}, - publisher = {IEEE Computer Society}, - address = {Los Alamitos, CA, USA}, -} - -@Article{FX32:Micro98, - author = {Anton Chernoff and Mark Herdeg and Ray Hookway and Chris Reeve and Norman Rubin and Tony Tye and S. Bharadwaj Yadavalli and John Yates}, - title = {{FX!32}: A Profile-Directed Binary Translator}, - journal = MICRO, - year = {1998}, - volume = {18}, - number = {2}, - pages = {56--64} -} - -@Article{FX32:TCCA99, - author = {Paul J. Drongowski and David Hunter and Morteza Fayyazi and David Kaeli and Jason Casmira}, - title = {Studying the Performance of the {FX!32} Binary Translation System}, - journal = {{IEEE} Computer Society Technical Committee Computer Architecture Newsletter}, - year = {1999}, - month = {Dec}, - pages = {56--68} -} - - -@InProceedings{LinearScan:PLDI98, - author = {O. Traub and G. Holloway and M. D. Smith}, - title = {Quality and Speed in Linear-scan Register Allocation}, - booktitle = PLDI, - year = {1998}, - month = {May} -} - - -@article{BallLarus:TOPLAS94, - author = "T. Ball and J. R. Larus", - title = "Optimally Profiling and Tracing Programs", - journal = TOPLAS, - volume = "16", - number = "4", - month = "July", - pages = "1319--1360", - year = "1994", - url = "citeseer.nj.nec.com/ball92optimally.html" -} - -@article{Knuth:BIT, - author = "D. E. Knuth and F. R. Stevenson", - title = "Optimal Measurement Points for Program Frequency Counts", - journal = "BIT", - volume = "13", - pages = "313--322", - year = "1973" -} - -@InProceedings{MDSmith:tracecache, - author = {Kim Hazelwood and Michael D. Smith}, - title = {Code Cache Management Schemes for Dynamic Optimizers}, - booktitle = {Proc. Workshop on Interaction between Compilers and Computer Architecture}, - year = {2002}, - address = {Boston, MA}, - month = {Feb} -} - -@misc{anderson97continuous, - Author = {J. Anderson and L. Berc and J. Dean and S. Ghemawat and M. Henzinger and S. Leung and D. Sites and M. Vandevoorde and C. Waldspurger and W. Weihl}, - Title = {Continuous profiling: Where have all the cycles gone}, - BookTitle = {Technical Note 1997-016. Digital Equipment - Corporation Systems Research Center, Palo Alto, Calif., July 1997}, - Year = "1997", -} - -@article{Roar:merten, - author = "Matthew C. Merten and Andrew R. Trick and Ronald D. Barnes and Erik M. Nystrom and Christopher N. George and John C. Gyllenhaal and Wen-mei W. Hwu", - title = "An Architectural Framework for Run-Time Optimization", - journal = "IEEE Transactions on Computers", - volume = "50", - pages = "567--589", - year = "2001" -} - -@inproceedings{WuLarus:MICRO94:StaticProfile, - author = {Youfeng Wu and James R. Larus}, - title = {Static branch frequency and program profile analysis}, - booktitle = MICRO, - year = {1994}, - isbn = {0-89791-707-3}, - pages = {1--11}, - location = {San Jose, California, United States}, - doi = {http://doi.acm.org/10.1145/192724.192725}, - } - -@inproceedings{BallLarus:PLDI93:StaticProfile, - author = "Thomas Ball and James R. Larus", - title = "Branch Prediction For Free", - booktitle = PLDI, - pages = "300-313", - year = "1993", - url = "citeseer.ist.psu.edu/ball93branch.html" } - -@inproceedings{Patterson:PLDI95:StaticProfile, - author = "Jason R. C. Patterson", - title = "Accurate Static Branch Prediction by Value Range Propagation", - booktitle = PLDI, - pages = "67-78", - year = "1995", - url = "citeseer.ist.psu.edu/patterson95accurate.html" } - -@InProceedings{Larus:WholeProgramPaths, - author = {James Larus}, - title = {Whole Program Paths}, - booktitle = PLDI, - year = {1999}, - month = {May} -} - -@InProceedings{arnold:jalapeno, - author = "Matthew Arnold and Stephen J. Fink and David Grove and Michael Hind and Peter F. Sweeney", - title = "{Adaptive optimization in the Jalape{\~n}o JVM}", - booktitle = OOPSLA, - pages = "47-65", - year = 2000 -} - -@inproceedings{arnold:pldi01, - author = "Matthew Arnold and Barbara G. Ryder", - title = "A Framework for Reducing the Cost of Instrumented Code", - booktitle = PLDI, - pages = "168--179", - year = "2001" -} - - - -@InProceedings{arnold:oopsla02, - author = "Matthew Arnold and Michael Hind and Barbara G. Ryder", - title = "Online Feedback-Directed Optimization of Java", - booktitle = OOPSLA, - month = Nov, - year = 2002 -} - -@InProceedings{Tracecache:ISCA98, - author = {Daniel H. Friendly and Sanjay J. Patel and Yale N. Patt}, - title = {Putting the Fill Unit to Work: Dynamic Optimizations for Trace Cache Microprocessors}, - booktitle = ISCA, - year = {1998}, - month = {June} -} - -%%============================================================================ -%% COMPILERS: PARALLEL: PARALLEL PROGRAMMING LANGUAGES -%%============================================================================ - - -@InProceedings{Cilk:PPOPP95, - Title = {Cilk: An Efficient Multithreaded Runtime System}, - Author = {Robert D. Blumofe and Christopher F. Joerg and - Charles E. Leiserson and Keith H. Randall and Yuli Zhou}, - BookTitle = "Proc. 5$^{th}$ {ACM} {SIGPLAN} Symp. on - Principles and Practice of Parallel Programming (PPOPP)", - Address = {Santa Barbara, CA}, - Month = Jul, - Year = 1995, - Pages = {207-216}, -} - - -@InProceedings{Cilk:PLDI98, - Title = {The Implementation of the {C}ilk-5 Multithreaded Language}, - Author = {M. Frigo and C. Leiserson and K. Randall}, - BookTitle = PLDI, - Address = Montreal, - Month = June, - Year = 1998, - Pages = {212-223}, - Annote = {CATEGORY: LANGUAGES: PARALLEL.} -} - -@InProceedings{CacheOblivious:FOCS99, - Title = {Cache-Oblivious Algorithms}, - Author = {Matteo Frigo and Charles E. Leiserson and Harald Prokop and S -ridhar Ramachandran}, - BookTitle = {40th Ann. Symp. Foundations of Computer Science}, - Address = {New York, USA}, - Month = Oct, - Year = 1999, - Pages = {17-18}, - Annote = {CATEGORY: LANGUAGES: PARALLEL.}} - - -%%============================================================================ -%% CATEGORY: ARCHITECTURES: SCALAR: CHIP PARALLELISM -%%============================================================================ - -@Article{BillionQ:Computer97, - author = {D. Burger and J. R. Goodman}, - title = {Billion-Transistor Architectures}, - journal = {IEEE Computer}, - year = {1997}, - volume = {30}, - number = {9}, - pages = {46-49}, - month = {Sept} -} - -@InProceedings{tullsen95smt, - author = {Dean M. Tullsen and Susan Eggers and Henry M. Levy}, - title = "{Simultaneous Multithreading: Maximizing On-Chip Parallelism}", - booktitle = ISCA, - pages = {392--403}, - month = {June}, - year = {1995} -} - -@article{olukotun96cmp, - author = {Kunle Olukotun and Basem A. Nayfeh and Lance Hammond and Ken - Wilson and Kunyung Chang}, - title = "{The case for a single-chip multiprocessor}", - journal = "{SIGPLAN Notices}", - volume = {31}, - number = {9}, - year = {1996}, - pages = {2--11}, -} - -@article{IntelHT:IJ02, - author = {Deborah T. Marr and Frank Binns and David L. Hill and - Glenn Hinton and David A. Koufaty and J. Alan Miller and - Michael Upton}, - title = "{Hyper-Threading Technology Architecture and Microarchitecture: - A Hypertext History}", - journal = "{Intel Technology Journal}", - volume = {6}, - number = {1}, - month = {Feb}, - year = {2002} -} - -@article{IMT, - author = {Il Park and Babak Falsafi and T. N. Vijaykumar}, - title = "{Implicitly-multithreaded processors}", - journal = "{SIGARCH Computer Architecture News}", - volume = {31}, - number = {2}, - year = {2003}, - pages = {39--51}, -} - -@InProceedings{IMT:ISCA03, - author = {Il Park and Babak Falsafi and T. N. Vijaykumar}, - title = "{Implicitly-multithreaded processors}", - booktitle = ISCA, - pages = {39--51}, - address = {San Diego, California}, - month = {June}, - year = {2003} -} - -@article{RAW:IEEE02, - author = {Michael Bedford Taylor and Jason Kim and Jason Miller and - David Wentzlaff and Fae Ghodrat and Ben Greenwald and - Henry Hoffmann and Paul Johnson and Jae-Wook Lee and Walter Lee and - Albert Ma and Arvind Saraf and Mark Seneski and Nathan Shnidman and - Volker Strumpen and Matt Frank and Saman Amarasinghe and - Anant Agarwal}, - title = "{The Raw Microprocessor: A Computational Fabric for Software - Circuits and General Purpose Programs}", - journal = MICRO, - month = {Mar/Apr}, - year = {2002} -} - -%%============================================================================ -%% CATEGORY: ARCHITECTURES: SCALAR: CHIP PARALLELISM, SPECULATIVE -%%============================================================================ - -@article{krishnan99tls, - author = {Venkata Krishnan and Josep Torrellas}, - title = "{A Chip-Multiprocessor Architecture with Speculative - Multithreading}", - journal = "{IEEE Transactions on Computers}", - volume = {48}, - number = {9}, - month = {Sep}, - year = {1999}, - pages = {866--880} -} - -@InProceedings{SpeculativeSync:ASPLOS02, - author = {by Jos\'{e} F. Mart\'{i}nez and Josep Torrellas}, - - title = {Speculative Synchronization: Applying Thread-Level Speculation to Explicitly Parallel Applications}, - booktitle = ASPLOS, - year = {2002}, - month = {Oct}} - - -@Article{FastCommInCMP:IPP01, - author = {Venkata Krishnan and Josep Torrellas}, - title = {The Need for Fast Communication in Hardware-Based - Speculative Chip Multiprocessors}, - journal = {International Journal of Parallel Processing}, - year = {2001}, - volume = {29}, - number = {1}, - pages = {3-33}, - month = {Feb}} - -@InProceedings{zilles02mssp, - author = {Craig Zilles and Gurindar S. Sohi}, - title = "{Master/slave speculative parallelization}", - booktitle = MICRO, - address = {Istanbul, Turkey}, - pages = {85--96}, - month = {Nov}, - year = {2002} -} - -@InProceedings{IntelHT:HelperThreads:CGO04, - author = {Dongkeun Kim and Steve Shih-wei Liao and Perry H. Wang and - Juan del Cuvillo and Xinmin Tian and Xiang Zou and Hong Wang and - Donald Yeung and Milind Girkar and John P. Shen}, - title = "{Physical Experimentation with Prefetching Helper Threads on - Intel's Hyper-Threaded Processors}", - booktitle = CGO, - address = {San Jose, California}, - month = {Mar}, - year = {2004} -} - -@InProceedings{Multiplex:ICS01, - author = {Chong-Liang Ooi and Seon Wook Kim and Il Park and Rudolf - Eigenmann and Babak Falsafi and T. N. Vijaykumar}, - title = {Multiplex: unifying conventional and speculative thread-level - parallelism on a chip multiprocessor}, - booktitle = ICS, - pages = {368--380}, - address = {Sorrento, Italy}, - year = {2001} -} - -@InProceedings{MultiScalar:ISCA95, - author = {Gurindar S. Sohi and Scott E. Breach and T. N. Vijaykumar}, - title = "{Multiscalar processors}", - booktitle = ISCA, - pages = {414--425}, - month = {June}, - year = {1995} -} - -@article{SpMT:TPDS04, - author = {Anasua Bhowmik and Manoj Franklin}, - title = "{A General Compiler Framework for Speculative Multithreaded - Processors}", - journal = "{IEEE Transactions on Parallel and Distributed Systems}", - pages = {713-724}, - volume = {15}, - number = {8}, - month = {Aug}, - year = {2004} -} - -@InProceedings{Hydra:ASPLOS98, - author = {Lance Hammond and Mark Willey and Kunle Olukotun}, - title = "{Data speculation support for a chip multiprocessor}", - booktitle = ASPLOS, - pages = {58--69}, - address = {San Jose, CA, USA}, - year = {1998} -} - -@InProceedings{Marcuello:ClusteredSpec:ICS99, - author = {Pedro Marcuello and Antonio Gonz\'{a}lez}, - title = "{Clustered Speculative Multithreaded Processors}", - booktitle = ICS, - pages = {365--372}, - month = {June}, - year = {1999} -} - -%%============================================================================ -%% CATEGORY: ARCHITECTURES: SCALAR: MULTIMEDIA -%%============================================================================ - - -@Article{MultimediaArch:Computer97, - author = {K. Dieffendorff and P. K. Dubey}, - title = {How Multimedia Workloads Will Change Processor Design}, - journal = {IEEE Computer}, - year = {1997}, - volume = {30}, - number = {9}, - pages = {43-45}, - month = {Sept} -} - - -@Article{Power4:IBMJRD02, - author = {J. M. Tendler and J. S. Dodson and J. S. {Fields, Jr.} and - H. Le and B. Sinharoy}, - title = "{The {POWER4} system microarchitecture}", - journal = "{IBM Journal of Research and Development}", - year = {2002}, - volume = {46}, - number = {1}, - pages = {5-26}, -} - -@Article{HyperThreading:IntelJournal02, - author = "{Intel Corp.}", - title = "{Special Issue on Intel HyperThreading Technology in - Pentium 4 Processors}", - journal = "Intel Technology Journal", - volume = "Q1", - year = "2002" -} - -@inproceedings{Ruf:PLDI00, - author = {Erik Ruf}, - title = {Effective synchronization removal for Java}, - booktitle = PLDI, - year = {2000}, - isbn = {1-58113-199-2}, - pages = {208--218}, - location = {Vancouver, British Columbia, Canada}, - doi = {http://doi.acm.org/10.1145/349299.349327}, - } - -@inproceedings{TalluriKong:ISCA92, - author = "Madhusudhan Talluri and Shing I. Kong and Mark D. Hill and David A. Patterson", - title = "Tradeoffs in Supporting Two Page Sizes", - booktitle = ISCA, - pages = "415-424", - year = "1992" -} - -@inproceedings{ChenBorg:ISCA92, - author = "J. Bradley Chen and Anita Borg and Norman P. Jouppi", - title = "A Simulation Based Study of {TLB} Performance", - booktitle = ISCA, - pages = "114-123", - year = "1992" -} - -@inproceedings{RomerOhlrich:ISCA95, - author = {Theodore H. Romer and Wayne H. Ohlrich and Anna R. Karlin and Brian N. Bershad}, - title = {Reducing TLB and memory overhead using online superpage promotion}, - booktitle = ISCA, - year = {1995}, - isbn = {0-89791-698-0}, - pages = {176--187}, - location = {S. Margherita Ligure, Italy}, - doi = {http://doi.acm.org/10.1145/223982.224419}, - address = {New York, NY, USA}, - } - -%%% This is the algorithm implemented by the ptrdist/ft benchmark. -@article{FredmanTarjan:JACM87, - author = {Michael L. Fredman and Robert Endre Tarjan}, - title = {Fibonacci heaps and their uses in improved network optimization algorithms}, - journal = JACM, - volume = {34}, - number = {3}, - year = {1987}, - issn = {0004-5411}, - pages = {596--615}, - address = {New York, NY, USA}, - } - -@article{UnionFind:JACM75, - author = {Robert Endre Tarjan}, - title = {Efficiency of a Good But Not Linear Set Union Algorithm}, - journal = {J. ACM}, - volume = {22}, - number = {2}, - year = {1975}, - issn = {0004-5411}, - pages = {215--225}, - doi = {http://doi.acm.org/10.1145/321879.321884}, - publisher = {ACM Press}, - address = {New York, NY, USA}, - } - -@inproceedings{Jouppi:ISCA90, - author = {Norman P. Jouppi}, - title = {Improving direct-mapped cache performance by the addition of a small fully-associative cache and prefetch buffers}, - booktitle = ISCA, - year = {1990}, - isbn = {0-89791-366-3}, - pages = {364--373}, - location = {Seattle, WA, USA}, - doi = {http://doi.acm.org/10.1145/325164.325162}, - address = {New York, NY, USA}, - } - -%%============================================================================ -%% CATEGORY: ARCHITECTURES: Processor Manuals -%%============================================================================ - -%%============================================================================ -%% CATEGORY: OPERATING SYSTEMS -%%============================================================================ - -@inproceedings{CacheKernel:OSDI94, - author = {David R. Cheriton and Kenneth J. Duda}, - title = {A caching model of operating system kernel functionality}, - booktitle = OSDI, - year = {1994}, - month = {November}, - pages = {179--193}, - address = {Monterey, CA, USA}, -} - -@inproceedings{Nooks:SOSP03, - author = {Michael M. Swift and Brian N. Bershad and Henry M. Levy}, - title = {Improving the reliability of commodity operating systems}, - booktitle = SOSP, - year = {2003}, - pages-comment = {207--222}, - location-comment = {Bolton Landing, NY, USA}, - publisher-comment = {ACM Press}, - address-comment = {New York, NY, USA}, - } - -@inproceedings{Ganapathy:CCS05, - address = {New York, NY, USA}, - author = {Ganapathy, Vinod and Jaeger, Trent and Jha, Somesh }, - booktitle = {CCS '05: Proceedings of the 12th ACM conference on Computer and communications security}, - pages = {330--339}, - publisher = {ACM Press}, - title = {Automatic placement of authorization hooks in the linux security modules framework}, - year = {2005} -} - -@inproceedings{AuthSyscalls, - author = "Mohan Rajagopalan and Matti Hiltunen and Trevor Jim and Richard Schlichting", - title = "Authenticated System Calls", - booktitle = "2005 International Conference on Dependable Systems and Networks (DSN'05)", - year = "2005"} - -@misc{provos02improving, - author = "N. Provos", - title = "Improving host security with system call policies", - text = "N. Provos. Improving host security with system call policies. Technical Report 02-3, CITI, November 2002.", - year = "2002", - url = {citeseer.ist.psu.edu/provos02improving.html} } - -@inproceedings{goldberg, - author = "Ian Goldberg and David Wagner and Randi Thomas and Eric A. Brewer", - title = "A Secure Environment for Untrusted Helper Applications", - booktitle = "Proceedings of the 6th Usenix Security Symp.", - address = "San Jose, CA, USA", - year = "1996", - url = "citeseer.ist.psu.edu/goldberg96secure.html" } - -@inproceedings{strata, - author = {Kevin Scott and Jack Davidson}, - title = {Safe Virtual Execution Using Software Dynamic Translation}, - booktitle = {ACSAC '02: Proceedings of the 18th Annual Computer Security Applications Conference}, - year = {2002}, - isbn = {0-7695-1828-1}, - pages = {209}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA} } - -@inproceedings{whitaker02scale, - author = "Andrew Whitaker and Marianne Shaw and Steven D. Gribble", - title = "Scale and Performance in the Denali Isolation Kernel", - booktitle = OSDI, - year = "2002", - month = {Dec}, - address = "Boston, MA", - url = {citeseer.ist.psu.edu/article/whitaker02scale.html} } - -@inproceedings{ Xen, - author = "B. Dragovic and K. Fraser and S. Hand and T. Harris and A. Ho and I. Pratt and A. Warfield and P. Barham and R. Neugebauer", - title = "Xen and the Art of Virtualization", - booktitle = SOSP, - year = 2003, - month = {October}, - address = {Bolton Landing, NY, USA}, - pages = {164--177}, - url = "citeseer.ist.psu.edu/dragovic03xen.html" } - -@phdthesis{hbench, - Author = {Aaron Brown}, - Title = {A Decompositional Approach to Computer System Performance}, - School = {Harvard College}, - Year = {1997}, - Month = {April} -} - -@Misc{VMWare:WHERE, - Author = {VMWare}, - Title = {{VMWare}}, - Year = {2006}, - Note = "http://www.vmware.com", - URL = "http://www.vmware.com" -} - -@Misc{webstone, - Author = {Mindcraft}, - Title = {Webstone: The Benchmark for Webservers}, - Year = {2002}, - Note = "http://www.mindcraft.com/webstone", - URL = "http://www.mindcraft.com/webstone" -} - -@Misc{abyss, Author = {Moez Mahfoudh}, - Title = {The {A}byss {W}ebserver}, - Year = {2000}, - Note = "http://abyss.sourceforge.net", - URL = "http://abyss.sourceforge.net" -} - -@Misc{thttpd, Author = {Jef Poskanze}, - Title = {thttpd - tiny/turbo/throttling HTTP server}, - Year = {2000}, - Note = "http://www.acme.com/software/thttpd", - url = "http://www.acme.com/software/thttpd" -} - -@Misc{ghttpd, Author = {Gareth Owen}, - Title = {GazTek Website}, - Year = {1999}, - Note = "http://gaztek.sourceforge.net/ghttpd", - url = "http://gaztek.sourceforge.net/ghttpd" -} - -@Misc{imapd, Author = {University of Washington}, - Title = {University of Washington IMAP Toolkit}, - Year = {2006}, - Note = "http://www.washington.edu/imap/", - url = "http://www.washington.edu/imap/" -} - -@Misc{netkit, - Author = {David A. Holland}, - Title = {NetKit}, - Year = {2000}, - Note = "http://www.hcs.harvard.edu/$\tilde{\ }$dholland/computers/netkit.html", - url = "http://www.hcs.harvard.edu/$\tilde{\ }$dholland/computers/netkit.html" -} - -@Misc{cfingerd, - Author = {Ken Hollis}, - Title = {CFingerd}, - Publisher = {Bitgate Software}, -} - -@Misc{sudo, - Author = {Todd Miller and Chris Jepeway and Aaron Spangler and Jeff Nieusma and Dave Hieb}, - Title = {sudo}, - Year = {2004}, - Note = "http://www.gratisoft.us/sudo/sudo.html", - url = "http://www.gratisoft.us/sudo/sudo.html" -} - -@Misc{OpenSSH, - Author = {The {OpenBSD} Project}, - Title = {OpenSSH}, - Year = {2006}, - Note = "http://www.openssh.com", - URL = "http://www.openssh.com" -} - -@Misc{ApacheServer, - Author = {"The Apache Software Foundation"}, - Title = {Apache}, - Year = {2007}, - Note = "http://www.apache.org", - URL = "http://www.apache.org" -} - -@Misc{OpenBSDManPages, - Title = {OpenBSD Manual Pages}, - Year = {2006}, - Note = "http://www.openbsd.org/cgi-bin/man.cgi", - url = "http://www.openbsd.org/cgi-bin/man.cgi" -} - -@Misc{wget, Author = {Hrvoje Niksic}, - Title = {{GNU} Wget}, - Year = {2006}, - Note = "http://www.gnu.org/software/wget/", - url = "http://www.gnu.org/software/wget/" -} - -@Misc{hbd, Author = {Pete Ryland}, - Title = {HomeBrew Decompiler}, - Year = {2002}, - Note = "http://pdr.cx/projects/hbd/", - url = "http://pdr.cx/projects/hbd/" -} - -@Misc{WU-FTPD, - Author = {WU-FTPD Development Group}, - Title = {WU-FTPD}, - Year = {2001}, - Note = "http://www.wu-ftpd.org/", - URL = "http://www.wu-ftpd.org/" -} - - -%%============================================================================ -%% CATEGORY: MISCELLANEOUS -%%============================================================================ -@article{ herlihy91waitfree, - author = "Maurice Herlihy", - title = "Wait-Free Synchronization", - journal = "ACM Transactions on Programming Languages and Systems", - volume = "13", - number = "1", - month = "January", - publisher = "ACM Press", - pages = "124--149", - year = "1991", - url = "citeseer.ist.psu.edu/herlihy93waitfree.html" } - - -@Article{Choices:CACM93, - author = {Roy Campbell and Nayeem Islam and Peter Madany - and David Raila}, - title = {Designing and Implementing {C}hoices: An Object-oriented - System in {C++}}, - journal = {Communications of the ACM}, - year = {1993}, - volume = {36}, - number = {9}, - pages = {36(9):117--126}, - OPTmonth = Sept, -} - - -@Misc{JSR121, - Title = "{JSR} 121", - Author = "{Java {C}ommunity {P}rocess}", - Year = "2003", - Note = "http://jcp.org/jsr/detail/121.jsp" -} - -@Book{AlphaHandbook:1998, - author = {{Compaq {C}omputer {C}orporation}}, - title = {Alpha Architecture Handbook}, - publisher = {Compaq {C}omputer {C}orporation}, - year = {1998}, -} - -@Book{PowerPC32:2001, - author = {Motorola, Inc.}, - title = {Programming Environments Manual for 32 Bit Implementations - of the {PowerPC} Architecture}, - publisher = {Motorola, Inc.}, - year = {2001}, -} - -@Book{ItaniumV2:2002, - author = {Intel Corporation}, - title = {Intel Itanium Architecture Software Developer's Manual}, - publisher = {Intel Corporation}, - volume = {2}, - year = {2002}, -} - -@article{KaffeOS:TOPLAS05, - author = {Godmar Back and Wilson C. Hsieh}, - title = {The {K}affe{OS} {J}ava runtime system}, - journal = TOPLAS, - volume = {27}, - number = {4}, - year = {2005}, - issn = {0164-0925}, - pages = {583--630}, - doi = {http://doi.acm.org/10.1145/1075382.1075383}, - publisher = {ACM Press}, - address = {New York, NY, USA}, -} - - -@inproceedings{Singularity:Submitted05, - Author = {Mark Aiken and Paul Barham and Manuel Fahndrich and Galen Hunt and Orion Hodson and James Larus and Steven Levi and Nick Murphy and Bjarne Steensgaard and David Tarditi and Brian Zill}, - Title = {Uniform Extensibility in {S}ingularity using Software Isolated Processes}, - Booktitle = {Submitted for Publication}, - Year = {2005}, -} - -@TechReport{Singularity:TR04, - author = {Galen C. Hunt and James R. Larus}, - title = {Singularity {D}esign {M}otivation - ({S}ingularity {T}echnical {R}eport 1)}, - institution = {Microsoft Research}, - year = {2004}, - number = {MSR-TR-2004-105}, - month = {Dec}, -} -% address = {Redmond, WA}, - -@TechReport{Singularity:TR05, - author = {Galen C. Hunt and James R. Larus and Martín Abadi and Mark Aiken and Paul Barham and Manuel Fähndrich and Chris Hawblitzel Orion Hodson and Steven Levi and Nick Murphy and Bjarne Steensgaard and David Tarditi and Ted Wobber and Brian Zill}, - title = {An Overview of the {S}ingularity Project}, - institution = {Microsoft Research}, - year = {2005}, - number = {MSR-TR-2005-135}, - month = {October}, -} - -@InProceedings{JX:USENIX02, - Author = {Michael Golm and Meik Felser and Christian Wawersich and - Jurgen Kleinoder}, - Title = {The {JX} {O}perating {S}ystem}, - Booktitle = "Proc. {USENIX} Annual Technical Conference", - Pages = "45--58", - Address = "Monterey, CA, USA", - Month = {June}, - Year = {2002} -} - - -@Book{JavaOS:Book99, - author = {T. Saulpaugh and C. Mirho}, - title = {Inside the {J}ava{OS} {O}perating {S}ystem. }, - publisher = {Addison-Wesley}, - isbn = {0201183935}, - address = {Reading, MA, USA}, - year = {1999}, -} - -@inproceedings{JKernel:Usenix98, - author = {Chris Hawblitzel and Chi-Chao Chang and Grzegorz - Czajkowski and Deyu Hu and Thorsten {von Eicken}}, - title = {Implementing Multiple Protection Domains in {Java}}, - booktitle = {{USENIX} Annual Technical Conference}, - month = jun, - year = 1998, - url = {citeseer.ifi.unizh.ch/hawblitzel98implementing.html}, - url = {http://citeseer.nj.nec.com/hawblitzel98implementing.html} } - % address = {New Orleans, LA}, - % pages = {259--270}, - -@inproceedings{JRes:OOPSLA98, - author = "Grzegorz Czajkowski and Thorsten von Eicken", - title = "{JRes}: {A} Resource Accounting Interface for {J}ava", - booktitle = OOPSLA, - pages = "21--35", - year = 1998, - url = "citeseer.ist.psu.edu/czajkowski98jres.html" } - -%%============================================================================ -%% GARBAGE COLLECTION -%%============================================================================ -@misc{ hirzel03connectivitybased, - author = "M. Hirzel and A. Diwan and M. Hertz", - title = "Connectivity-based garbage collection", - text = "M. Hirzel, A. Diwan, and M. Hertz, Connectivity-based garbage collection, - in ACM Conference on Object-Oriented Programming Systems, Languages, and - Applications, Oct. 2003, pp. 359--373.", - year = "2003", - url = "citeseer.ist.psu.edu/article/hirzel04connectivitybased.html" } - - -@inproceedings{conf-c++-AttardiF94, - author = {Giuseppe Attardi and - Tito Flagella}, - title = {A Customisable Memory Management Framework.}, - booktitle = {C++ Conference}, - year = {1994}, - pages = {123-142}, - ee = {http://www.usenix.org/publications/library/proceedings/c++94/attardi.html}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - - -@article{journals-spe-AttardiFI98, - author = {Giuseppe Attardi and - Tito Flagella and - Pietro Iglio}, - title = {A Customisable Memory Management Framework for C++.}, - journal = {Softw., Pract. Exper.}, - volume = {28}, - number = {11}, - year = {1998}, - pages = {1143-1183}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - -@inproceedings{conf-pldi-BarrettZ93, - author = {David A. Barrett and - Benjamin G. Zorn}, - title = {Using Lifetime Predictors to Improve Memory Allocation Performance.}, - booktitle = {PLDI}, - year = {1993}, - pages = {187-196}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - - -@techreport{ joel88compacting, - author = "Bartlett, Joel F.", - title = "Compacting Garbage Collection with Ambiguous Roots", - number = "88/2", - year = "1988", - url = "citeseer.ist.psu.edu/bartlett88compacting.html" } - -@techreport{ bartlett89mostlycopying, - author = "Joel F. Bartlett", - title = "Mostly-{C}opying Garbage Collection picks up Generations and {C++}", - volume = "TN--12", - year = "1989", - url = "citeseer.ist.psu.edu/bartlett89mostlycopying.html" } - - -@article{journals-csur-Cohen81, - author = {Jacques Cohen}, - title = {Garbage Collection of Linked Data Structures.}, - journal = {ACM Comput. Surv.}, - volume = {13}, - number = {3}, - year = {1981}, - pages = {341-367}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - - -@article{journals-spe-DetlefsDZ94, - author = {David Detlefs and - Al Dosser and - Benjamin G. Zorn}, - title = {Memory Allocation Costs in Large C and C++ Programs.}, - journal = {Softw., Pract. Exper.}, - volume = {24}, - number = {6}, - year = {1994}, - pages = {527-542}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - - -@inproceedings{conf-c++-EllisD94, - author = {John R. Ellis and - David Detlefs}, - title = {Safe, Efficient Garbage Collection for C++.}, - booktitle = {C++ Conference}, - year = {1994}, - pages = {143-178}, - ee = {http://www.usenix.org/publications/library/proceedings/c++94/ellis.html}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - -@article{journals-iandc-TofteT97, - author = {Mads Tofte and - Jean-Pierre Talpin}, - title = {Region-based Memory Management.}, - journal = {Inf. Comput.}, - volume = {132}, - number = {2}, - year = {1997}, - pages = {109-176}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - -@article{journals-spe-Zorn93, - author = {Benjamin G. Zorn}, - title = {The Measured Cost of Conservative Garbage Collection.}, - journal = {Softw., Pract. Exper.}, - volume = {23}, - number = {7}, - year = {1993}, - pages = {733-756}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - - -@inproceedings{conf-lfp-Zorn90, - author = {Benjamin G. Zorn}, - title = {Comparing Mark-and-Sweep and Stop-and-Copy Garbage Collection.}, - booktitle = {LISP and Functional Programming}, - year = {1990}, - pages = {87-98}, - ee = {http://doi.acm.org/10.1145/91556.91597}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - -@Misc{DSA:PLDI06Submission, - author = {Chris Lattner and Vikram Adve}, - title = {Data Structure Analysis: A Fast, Flow-insensitive Algorithm for Analyzing Linked Data Structures.}, - howpublished = "Submitted for publication", - month = Nov, - year = {2005} -} - - -@InProceedings{YiAdveKennedy:PLDI2000, - author = {Qing Yi and Vikram Adve and Ken Kennedy}, - title = {Transforming Loops To Recursion for Multi-Level Memory Hierarchies}, - booktitle = PLDI, - year = {2000}, - OPTaddress = {Vancouver, Canada}, - OPTmonth = {June}, -} - -@InProceedings{SingularityChannels:Eurosys06, - author = {Manuel Fahndrich and Mark Aiken and Chris Hawblitzel and Orion Hodson and Galen C. Hunt and James R. Larus and Steven Levi}, - title = {Language Support for Fast and Reliable Message-based Communication in {S}ingularity {OS}}, - booktitle = {Proceedings of EuroSys}, - year = {2006}, - OPTaddress = {Belgium}, - OPTmonth = {April} -} - - -@inproceedings{ PCC:OSDI96, - author = "George C. Necula and Peter Lee", - title = "Safe Kernel Extensions Without Run-Time Checking", - booktitle = OSDI, - publisher-comment = "USENIX", - address-comment = "Berkeley, CA, USA", - editor-comment = "{USENIX}", - pages-comment = "229--243", - year = "1996", - url = "citeseer.ist.psu.edu/necula96safe.html" } - - -@inproceedings{LLVAOS:WIOSCA06, - author = "John Criswell and Brent Monroe and Vikram Adve", - title = "A Virtual Instruction Set Interface for Operating System Kernels", - booktitle = WIOSCA, - year = {2006}, - address = {Boston, MA, USA}, - month = {June}, - pages = {26--33}, -} - -@inproceedings{LLVAOS:Anon06, - author = "Anonymous", - title = " Details omitted for double-blind reviewing", - booktitle = "Workshop paper", - year = {2006}, -} - - -@inproceedings{378846, - author = {Thomas Ball and Rupak Majumdar and Todd Millstein and Sriram K. Rajamani}, - title = {Automatic predicate abstraction of C programs}, - booktitle = PLDI, - year = {2001}, - isbn = {1-58113-414-2}, - pages = {203--213}, - location = {Snowbird, Utah, United States}, - doi = {http://doi.acm.org/10.1145/378795.378846}, - publisher = {ACM Press}, - address = {New York, NY, USA}, -} - -@inproceedings{o'callahan97lackwit, - author = {Robert O'Callahan and Daniel Jackson}, - title = {Lackwit: a program understanding tool based on type inference}, - booktitle = ICSE, - year = {1997}, - isbn = {0-89791-914-9}, - pages = {338--348}, - location = {Boston, Massachusetts, United States}, - doi = {http://doi.acm.org/10.1145/253228.253351}, - publisher = {ACM Press}, - address = {New York, NY, USA}, - } - -@Misc{LLVM:Bossa07, - title = {The {LLVM Compiler} System}, - author = {Chris Lattner}, - address = {Recife, Brazil}, - month-comment = {March}, - year = {2007}, - howpublished1-comment = {{\rm Presentation at the} 2007 Bossa Conference on Open Source, Mobile Internet and Multimedia}, - howpublished = {Bossa Conference on Open Source, Mobile Internet and Multimedia}, - note = {http://llvm.org/pubs/2007-03-12-BossaLLVMIntro.html}, - url = {http://llvm.org/pubs/2007-03-12-BossaLLVMIntro.html}, -} - - -@Misc{LLVMLinkTimeOpt:URL, - title = {LLVM Link Time Optimization: Design and Implementation}, - author = {Devang Patel}, - year = {2006}, - howpublished = {LLVM Developer Documentation}, - note = {Available at {\tt http://llvm.org/docs/LinkTimeOptimization.html}}, -} - -@article{TagsAndTypes:SIGPLAN87, - author = {Steenkiste, Peter and Hennessy, John}, - title = {Tags and type checking in LISP: hardware and software approaches}, - journal = {SIGPLAN Not.}, - volume = {22}, - number = {10}, - year = {1987}, - issn = {0362-1340}, - pages = {50--59}, - doi = {http://doi.acm.org/10.1145/36205.36183}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@inproceedings{CVC3, - author = "Clark Barrett and Cesare Tinelli", - title = "{CVC3}", - booktitle = "Proceedings of the $19^{th}$ International Conference on Computer Aided Verification (CAV '07)", - series = "Lecture Notes in Computer Science", - volume = 4590, - publisher = "Springer-Verlag", - editor = "Werner Damm and Holger Hermanns", - pages = "298--302", - month = jul, - year = 2007, - note = "Berlin, Germany" -} - -@misc{CVC3Manual, - title = {The {CVC3} User's Manual}, - note = "http://www.cs.nyu.edu/acsys/cvc3/doc/user\_doc.html" -} - -@inproceedings{Z3, - author = "Leonardo de Moura and Nikolaj Bjørner", - title = "{Z3}: An Efficient {SMT} Solver", - booktitle = "Conference on Tools and Algorithms for the Construction and Analysis of Systems (TACAS)", - year = 2008, - note = "Budapest, Hungary" -} - -@inproceedings{Zhang:ICSE03, - author = {Zhang, Xiangyu and Gupta, Rajiv and Zhang, Youtao}, - title = {Precise dynamic slicing algorithms}, - booktitle = {Proceedings of the 25th International Conference on Software Engineering}, - series = {ICSE '03}, - year = {2003}, - isbn = {0-7695-1877-X}, - location = {Portland, Oregon}, - pages = {319--329}, - numpages = {11}, - url = {http://dl.acm.org/citation.cfm?id=776816.776855}, - acmid = {776855}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, -} - -@article{Korel:IPL88, - author = {Korel, B. and Laski, J.}, - title = {Dynamic program slicing}, - journal = {Inf. Process. Lett.}, - volume = {29}, - issue = {3}, - month = {October}, - year = {1988}, - issn = {0020-0190}, - pages = {155--163}, - numpages = {9}, - url = {http://dl.acm.org/citation.cfm?id=56378.56386}, - doi = {10.1016/0020-0190(88)90054-3}, - acmid = {56386}, - publisher = {Elsevier North-Holland, Inc.}, - address = {Amsterdam, The Netherlands, The Netherlands}, -} - -@Book{AllenKennedy, - author = "R. Allen and K. Kennedy", - title = "{Optimizing Compilers for Modern Architectures}", - publisher = "Morgan Kaufmann Publishers, Inc.", - address = "San Francisco, CA", - year = "2002" -} - - -@inproceedings{JoKulkarni:OOPSLA11, - author = {Jo, Youngjoon and Kulkarni, Milind}, - title = {Enhancing locality for recursive traversals of recursive structures}, - booktitle = {Proceedings of the 2011 ACM international conference on Object oriented programming systems languages and applications}, - series = {OOPSLA '11}, - year = {2011}, - isbn = {978-1-4503-0940-0}, - location = {Portland, Oregon, USA}, - pages = {463--482}, - numpages = {20}, - url = {http://doi.acm.org/10.1145/2048066.2048104}, - doi = {http://doi.acm.org/10.1145/2048066.2048104}, - acmid = {2048104}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {irregular programs, locality transformations, tree traversals}, -} - - @InProceedings{PointerComp:MSP05, - author = {Chris Lattner and Vikram Adve}, - title = "{Transparent Pointer Compression for Linked Data Structures}", - booktitle = "{Proceedings of the ACM Workshop on Memory System Performance (MSP'05)}", - address = {Chigago, Illinois}, - month = {June}, - year = {2005} - } diff --git a/paper/pact.bib b/paper/pact.bib deleted file mode 100644 index edcea6456e2e5c395c90c5d73638e1b94aa6cd62..0000000000000000000000000000000000000000 --- a/paper/pact.bib +++ /dev/null @@ -1,1311 +0,0 @@ -@misc{SCC, - Howpublished = {\url{http://http://techresearch.intel.com/ProjectDetails.aspx?Id=1}}, - Title = {Intel Research: Single-Chip Cloud Computer}} - -@inproceedings{Albert-gpu, - author = {Albert Sidelnik and I-JUi Sung and Wanmin Wu and Maria J. Garzaran and Wen-mei Hwu and Klara Nahrstedt and David Padua and Sanjay J. Patel}, - title = "{Optimization of Tele-Inmersion Codes. In the second Workshop on General-Purpose Computation on Graphics Processing Units}", - booktitle = {Second Workshop on General-Purpose Computation on Graphics Processing Units, in conjunction with ASPLOS}, - year = {2009} - } -@inproceedings{chapelhotpar, - author = {Bradford L. Chamberlain and Steven J. Deitz and David Iten and Sung-Eun ChoiJ}, - title = "{User-Defined Distributions and Layouts in Chapel: Philosophy and Framework}", - booktitle = {Proc.\ of the USENIX Workshop on Hot Topics in Parallelism}, - year = {2010}, - month = {June}, -} - -@article{bsp1, - author = {Valiant, Leslie G.}, - title = {A bridging model for parallel computation}, - journal = {Commun. ACM}, - volume = {33}, - number = {8}, - year = {1990}, - issn = {0001-0782}, - pages = {103--111}, - } -@ARTICLE{bsp2, - author = {Jonathan M. D. Hill and Bill Mccoll and Dan C. Stefanescu and Mark W. Goudreau and Kevin Lang and Satish B. Rao and Torsten Suel and Thanasis Tsantilas and Rob Bisseling}, - title = {BSPlib - The BSP Programming Library}, - journal = {Parallel Computing}, - year = {1997}, - volume = {24} -} -@book{dh-pc-01, -keyent = {dh-pc-01}, -author = "R. O. Duda and P. E. Hart and D. G. Stork", -title = "Pattern Classification", -edition = {2nd}, -publisher = "Wiley-Interscience", -address = "New York", -year = 2001 -} - -@article{Gustavson, - author = {Gustavson, F. G.}, - title = "{High-performance Linear Algebra Algorithms Using New Generalized Data Structures for Matrices}", - journal = {IBM J. Res. Dev.}, - volume = {47}, - number = {1}, - year = {2003}, - issn = {0018-8646}, - pages = {31--55}, - publisher = {IBM Corp.}, - address = {Riverton, NJ, USA}, - } - -@inproceedings{guo:htas:ppopp:08, - author = {Jia Guo and Ganesh Bikshandi and Basilio B.\ Fraguela and Mar{\'i}a J. Garzar{\'a}n and David Padua}, - title = "{Programming with Tiles}", - booktitle = {Proc.\ of the ACM SIGPLAN Symp.\ on Principles and Practice of Parallel Programming}, - year = {2008}, - pages = {111--122}, - month = {Feb}, -} - -@inproceedings{, - author = {Ganesh Bikshandi and Jia Guo and Dan Hoeflinger and Gheorghe Almasi and Basilio B. Fraguela and Mar{\'i}a J. Garzar{\'a}n and David Padua and Christoph von Praun}, - title = "{Programming for Parallelism and Locality with Hierarchically Tiled Arrays}", - booktitle = {Proc.\ of the ACM SIGPLAN Symp.\ on Principles and Practice of Parallel Programming}, - year = {2006}, - pages = {48--57}, -} - -@inproceedings{bikshandi:htas:lcpc:06, - author = {Ganesh Bikshandi and Jia Guo and Christoph von Praun and Gabriel Tanase and Basilio B. Fraguela and Mar{\'i}a J. Garzar{\'a}n and David Padua and Lawrence Rauchwerger}, - title = {Design and Use of htalib - a Library for {Hierarchically Tiled Arrays}}, - booktitle = { Proc. of the Intl.\ Workshop on Languages and Compilers for Parallel Computing}, - year = {2006}, - pages = {17--32}, - month = {Nov}, -} - -@article{Flame, - author = "Paolo Bientinesi and John A. Gunnels and Margaret E. Myers and - Enrique S. Quintana-Ort\'{i} and Robert A. van de Geijn", - title = "{The Science of Deriving Dense Linear Algebra Algorithms}", - journal = "{ACM} Trans. Math. Softw.", - volume = "31", - number = "1", - month = mar, - year = "2005", - pages = "1--26", -} - -@article{flame-api, - author = {Paolo Bientinesi and Enrique S. Quintana-Ort\'{\i} and Robert A. van de Geijn}, - title = {Representing linear algebra algorithms in code: the {FLAME} application program interfaces}, - journal = {ACM Trans. Math. Softw.}, - volume = {31}, - number = {1}, - year = {2005}, - issn = {0098-3500}, - pages = {27--59}, - doi = {http://doi.acm.org/10.1145/1055531.1055533}, - publisher = {ACM Press}, - address = {New York, NY, USA}, - } - - -@inproceedings{oblivious, - author = {Matteo Frigo and Charles E. Leiserson and Harald Prokop and Sridhar Ramachandran}, - title = {Cache-Oblivious Algorithms}, - booktitle = {FOCS '99: Proceedings of the 40th Annual Symposium on Foundations of Computer Science}, - year = {1999}, - isbn = {0-7695-0409-4}, - pages = {285}, - } - -@book{lapack, - author = {Anderson, E. and Bai, Z. and Bischof, C. and - Blackford, S. and Demmel, J. and Dongarra, J. and - Du Croz, J. and Greenbaum, A. and Hammarling, S. and - McKenney, A. and Sorensen, D.}, - title = {{LAPACK} Users' Guide}, - edition = {Third}, - publisher = {Society for Industrial and Applied Mathematics}, - year = {1999}, - address = {Philadelphia, PA}, - isbn = {0-89871-447-8 (paperback)} } - -@book{ fox, - author = {Geoffrey C. Fox and Mark A. Johnson and Gregory A. Lyzenga and Steve W. Otto and John K. Salmon and David W. Walker}, - title = "{Solving Problems on Concurrent Processors. Vol. 1: General Techniques and Regular Problems}", - year = {1988}, - isbn = {0-13-823022-6}, - publisher = {Prentice-Hall, Inc.}, -} - -@book{halstead, - author = {Halstead,Maurice H.}, - year = {1977}, - title = {Elements of Software Science}, - publisher = {Elsevier}, - OPTaddress = {New York} -} - -@Book{reinders:tbb:2007, - author = {James Reinders}, - title = {Intel Threading Building Blocks: Outfitting C++ for Multi-core Processor Parallelism}, - publisher = {O'Reilly}, - year = {2007}, - edition = {1}, - month = {July} -} - -@article{maCabe, - author = {McCabe}, - year = {1976}, - title = "{A Complexity Measure}", - journal = {IEEE Transactions on Software Engineering}, - volume = {2}, - pages = {308-320} -} - -@book{Geist94pvm, - author = "A. Geist and A. Beguelin and J. Dongarra and W. Jiang and R. Manchek and V. S. Sunderamet", - title = "{PVM}: Parallel Virtual Machine: {A} Users' Guide and Tutorial for Networked Parallel Computing", - publisher = "MIT Press", - isbn = "0-262-57108-0 (paperback)", - pages = "xvii + 279", - year = "1994", -} - - -@inproceedings{fortranD, - author = {Seema Hiranandani and Ken Kennedy and Chau-Wen Tseng}, - title = "{Compiler Optimizations for {Fortran D} on {MIMD} Distributed-memory Machines}", - booktitle = {Supercomputing '91: Proceedings of the 1991 ACM/IEEE conference on Supercomputing}, - year = {1991}, - isbn = {0-89791-459-7}, - pages = {86--100}, - } - -@phdthesis{bikshandi:phd, - author = {Ganesh Bikshandi}, - title = "{Parallel Programming with Hierarchically Tiled Arrays}", - school = {UIUC}, - year = 2007, -} - -@book{GrES99, - author = "W. Gropp and E. Lusk and A. Skjellum", - title = "{Using MPI (2nd ed.): Portable Parallel Programming with the Message-Passing Interface}", - year = {1999}, - isbn = {0-262-57132-3}, - publisher = {MIT Press}, - } - -@article{NuRe98, - author = "R. W. Numrich and J. Reid", - title = "{Co-array Fortran for Parallel Programming}", - journal = {SIGPLAN Fortran Forum}, - volume = {17}, - number = {2}, - year = {1998}, - pages = {1--31}, - publisher = {ACM Press}, -} - -@TechReport{CDCY99, - author = {W.~Carlson and J.~Draper and D.~Culler and K.~Yelick and E.~Brooks and K.~Warren}, - title = "{Introduction to UPC and Language Specification}", - institution = {IDA Center for Computing Sciences}, - year = {1999}, - number = {CCS-TR-99-157}, -} - -@TechReport{veldhuizen99techniques, - author = {T. Veldhuizen}, - title = {Techniques for Scientific {C++}}, - institution = {Department of Computer Science, Indiana University}, - year = {2000}, - number = {TR542}, -} - -@article{HiKT92, - author = {S. Hiranandani and K. Kennedy and C.-W. Tseng}, - title = "{Compiling Fortran D for MIMD Distributed-memory Machines}", - journal = {Commun. ACM}, - volume = {35}, - number = {8}, - year = {1992}, - issn = {0001-0782}, - pages = {66--80}, - publisher = {ACM Press}, - } - -@article{KoMe92, - author = {C. Koelbel and P. Mehrotra}, - title = "{An Overview of High Performance Fortran}", - journal = {SIGPLAN Fortran Forum}, - volume = {11}, - number = {4}, - year = {1992}, - pages = {9--16}, - publisher = {ACM Press}, - } - -@InProceedings{HTA1, - author = "G. Almasi and L. De Rose and B. B. Fraguela and J. Moreira and D. Padua", - title = "{Programming for Locality and Parallelism with Hierarchically Tiled Arrays}", - booktitle = "Proc.\ of LCPC 2003", - pages = "162--176", - year = "2003", - address = "College Station, Texas", - publisher = "Springer-Verlag", - series = "LCNS", - volume = "2958", - month = "Oct", - isbn = "3-540-21199-3", -} - -@PhdThesis{Cannon, - author = {L.E. Cannon}, - title = "{A Cellular Computer to Implement the Kalman Filter Algorithm}", - school = {Montana State University}, - year = {1969}, -} - - -@Article{Summa, - author = "R. A. Van De Geijn and J. Watts", - title = "{SUMMA: Scalable Universal Matrix Multiplication Algorithm}", - journal = "Concurrency: Practice and Experience", - volume = "9", - number = "4", - pages = "255--274", - month = "Apr", - year = "1997", - CODEN = "CPEXEI", - ISSN = "1040-3108", -} - -@article{zpl, -author = "B.L. Chamberlain and S.Choi and E.C. Lewis and C. Lin and L. Synder and W.D. Weathersby", -title = "{The Case for High Level Parallel Programming in {ZPL}}", -journal = "IEEE Computational Science and Engineering", -volume = "5", -number = "3", -pages = "76--86", -month = "July--September", -year = "1998", -} - -@misc{mkl, - title="{I}ntel {M}ath {K}ernel {L}ibrary", - note= "http://www.intel.com/cd/software/products/asmo-na/eng/perflib/mkl/index.htm", -} - -@misc{htawebsite, - title="{H}ierarchically {T}iled {A}rrays", - note= "http://polaris.cs.uiuc.edu/hta/", -} -@misc{nas, - title="{NAS} {P}arallel {B}enchmarks", - howpublished="Website", - note= "http://www.nas.nasa.gov/Software/NPB/", -} - -@misc{hpf, - author = "{High Performance Fortran Forum}", - title = "{H}igh {P}erformance {F}ortran Specification Version 2.0", - month = "January", - year = "1997", -} - -@incollection{reynders96pooma, - author = "John V. W. Reynders and Paul J. Hinker and Julian C. Cummings and Susan R. Atlas and Subhankar Banerjee and William F. Humphrey and Steve R. Karmesin and Katarzyna Keahey and Marikani Srikant and Mary Dell Tholburn", - title = "{{POOMA}: {A} {F}ramework for {S}cientific {S}imulations of {P}aralllel {A}rchitectures}", - booktitle = "Parallel Programming in C++", - publisher = "MIT Press", - OPTeditor = "Gregory V. Wilson and Paul Lu", - pages = "547--588", - year = "1996", -} - -@InProceedings{POET, - author = "R. C. Armstrong and A. Cheung", - title = "{POET (Parallel Object-oriented Environment and Toolkit) and Frameworks for Scientific Distributed Computing}", - booktitle = "Proc.\ of 30th Hawaii International Conference on System Sciences (HICSS 1997)", - pages = "54--63", - address = "Maui, Hawai", - year = "1997", -} - -@InProceedings{quicksort, - author = "B. Wager", - title = "{Hyperquicksort: A Fast Algorithm for Hypercubes}", - booktitle = "Hypercube Multiprocessors", - year = "1987", - pages = "292-299", - address = "Philadelphia, PA", - publisher = "SIAM", -} - -@article{McKellar, - author = {A. C. McKellar and E. G. Coffman, Jr.}, - title = "{Organizing Matrices and Matrix Operations for Paged Memory Systems}", - journal = {Communications of the ACM}, - volume = {12}, - number = {3}, - year = {1969}, - issn = {0001-0782}, - pages = {153--165}, - doi = {http://doi.acm.org/10.1145/362875.362879}, - publisher = {ACM Press}, -} - -@inproceedings{Wolf, - author = {Michael E. Wolf and Monica S. Lam}, - title = "{A Data Locality Optimizing Algorithm}", - booktitle = {Proc. of the Conf. on Programming Language Design and Implementation}, - year = {1991}, - isbn = {0-89791-428-7}, - pages = {30--44}, - location = {Toronto, Ontario, Canada}, - doi = {http://doi.acm.org/10.1145/113445.113449}, -} - -@inproceedings{Adve , - author = {Vikram Adve and John Mellor-Crummey}, - title = "{Using Integer Sets for Data-parallel Program Analysis and Optimization}", - booktitle = {Proc. of the Conf. on Programming Language Design and Implementation}, - year = {1998}, - isbn = {0-89791-987-4}, - pages = {186--198}, - location = {Montreal, Quebec, Canada}, - doi = {http://doi.acm.org/10.1145/277650.277721}, - publisher = {ACM Press}, -} - -@article{Sturler, - author = "E. de Sturler and D. Loher", - title = "Parallel iterative solvers for irregular sparse matrices in {High Performance Fortran}", - journal = "Future Generation Computer Systems", - volume = "13", - number = "4--5", - pages = "315--325", - year = "1998", - url = "citeseer.nj.nec.com/desturler97parallel.html" -} - -@inproceedings{WYSIWYG, - author = {Chamberlain, B.L. and Lin, C. and Sung-Eun Choi and Snyder, L. and Lewis, E.C. and Weathersby, W.D.}, - title = "{ZPL's WYSIWYG Performance Model}", - booktitle = {Procs. of the High-Level Parallel Programming Models and Supportive Environments}, - year = {1998}, - isbn = {0-8186-8412-7}, - pages = {50--61}, - publisher = {IEEE Computer Society}, - } - -@PhdThesis{NgoThesis, - author = {Ton Anh Ngo}, - title = "{The Role of Performance Models in Parallel Programming and Languages}", - year = {1997}, - school = {Department of Computer Science and Engineering, University of Washington}, -} - -@article{bodin93pcxx, - author = {FRANÇOIS Bodin and Peter Beckman and Dennis Gannon and Srinivas Narayana and Shelby X. Yang}, - title = "{D}istributed p{C}++: {B}asic {I}deas for an object parallel language", - journal = "Scientific Programming", - volume = "2", - number = "3", - pages = "7--22", - year = "1993", -} - -@techreport{trefethen96multimatlab, - author = "A. E. Trefethen and V. S. Menon and C. Chang and G. Czajkowski and C. Myers and L. N. Trefethen", - title = "{Multi{MATLAB}: {MATLAB} on Multiple Processors}", - number = "TR96-1586", - month = "May", - year = "1996", -} - -@inproceedings{matlabSp, - author = "P. Husbands and C. Isbell", - title = "{Matlab*p: A Tool for Interactive Supercomputing}", - booktitle = {Procs. of the Ninth SIAM Conference on Parallel Processing for Scientific Computing}, - year = {1999}, -} - - -@ARTICLE{Choy:05, - AUTHOR = {Ron Choy and Alan Edelman}, - TITLE = {Parallel {MATLAB}: Doing it Right}, - JOURNAL = {Proceedings of the IEEE}, - VOLUME = {93}, - NUMBER = 2, - YEAR={2005}, - NOTE = {special issue on "Program Generation, Optimization, and Adaptation"} -} - -@techreport{rstls, - AUTHOR = {E. Barszcz and R. Fathoohi and V. Venkatakrishnan and S. Weeratunga}, - TITLE = "{Solution of Regular, Sparse Triangular Linear Systems on Vector and Distributed-Memory Multiprocessor}", - NUMBER = "RNR-93-007", - MONTH = "April", - YEAR="1993", - INSTITUTION = "NASA Ames Research Center", -} - - -@techreport{nashpf, - AUTHOR = {M. Frumkin and H. Jin and J. Yan}, - TITLE = "{Implementation of NAS parallel benchmarks in High Performance Fortran}", - NUMBER = "NAS-98-009", - MONTH = "September", - YEAR="1998", - INSTITUTION = "NAS Technical Report", -} - -@inproceedings{titanium, - author = {K. A. Yelick and L. Semenzato and G. Pike and C. Miyamoto and B. Liblit and A. Krishnamurthy and P. N. Hilfinger and S. L. Graham and D. Gay and P. Colella and A. Aiken}, - title = "{Titanium: A High-Performance Java Dialect}", - booktitle = {Workshop on Java for High-Performance Network Computing}, - month="February", - year = {1998} - } - -@inproceedings{stapl, - author = {An, Ping and Jula, Alin and Rus, Silvius and Saunders, Steven and Smith, Tim and Tanase, Gabriel and Thomas, Nathan and Amato, Nancy and Rauchwerger, Lawrence}, - title = "{STAPL: An Adaptive, Generic Parallel Programming Library for C++}", - booktitle = {Proc. of LCPC}, - pages="193-208", - month="August", - year = {2001} - } - -@inproceedings{cytron, - author = {R. Cytron}, - title = "{Doacross: Beyond Vectorization for Multiprocessors}", - booktitle = {ICPP}, - pages="836-844", - year = {1986} - } - -@book{wolfe, - author = "M. Wolfe", - title = "{High Performance Compilers for Parallel Computing}", - year = {1996}, - publisher = {Addison-Wesley} - } - -@article{matlabsparse, - author = {J.R. Gilber and C. Moler and R. Schreiber}, - title = "{Sparse Matrices in MATLAB: Design and Implementation}", - journal = {SIMAX}, - volume={13}, - number=1, - pages="333-356", - month="January", - year = {1992} - } - -@InProceedings{burns94:_lam, - author = {Greg Burns and Raja Daoud and James Vaigl}, - title = {{LAM}: {A}n {O}pen {C}luster {E}nvironment for {MPI}}, - booktitle = {Proceedings of Supercomputing Symposium}, - pages = {379--386}, - year = {1994}, -} - -@book{knuth, - author = "Donald E. Knuth", - title = "{The Art of Computer Programming}", - volume = {2}, - publisher = {Addison-Wesley} -} - -@article{Hoare78, - author = {C. A. R. Hoare}, - title = {Communicating sequential processes}, - journal = {Commun. ACM}, - volume = {21}, - number = {8}, - year = {1978}, - issn = {0001-0782}, - pages = {666--677}, - publisher = {ACM Press}, - address = {New York, NY, USA}, - } - -@inproceedings{GAsSc94, - author = {J. Nieplocha and R. J. Harrison and R. J. Littlefield}, - title = {Global Arrays: a Portable Shared-memory Programming Model for Distributed Memory Computers}, - booktitle = {Proc. of Supercomputing'94}, - year = {1994}, - pages = {340--ff.}, - } - -@inproceedings{X10OOPSLA05, - author = {P. Charles and Ch. Donawa and K. Ebcioglu and Ch. Grothoff and A. Kielstra and Ch. von Praun and V. Saraswat and V.\ Sarkar}, - title="{X10: An Object-oriented Approach to Non-uniform Cluster Computing}", - booktitle = {Procs.\ of the Conf. on Object-Oriented Programming, -Systems, Languages, and Applications (OOPSLA) -- Onward! Track}, - year = {2005}, - month = {Oct}, -} - -@TechReport{ZPLTaskPar, - author = {S. J. Deitz}, - title = "{Renewed Hope for Data Parallelism: Unintegrated Support for Task Parallelism in ZPL}", - institution = {University of Washington}, - year = {2003}, - month = {Dec}, - number = {UW-CSE-03-12-04}, -} - -@article{ViennaFortran, - author = {B.M. Chapman and P. Mehrotra and H. P. Zima}, - title = "{Vienna Fortran\-a Fortran Language Extension for Distributed Memory Multiprocessors}", - journal = {Languages, Compilers and Run-time Environments for Distributed Memory Machines}, - year = {1992}, - isbn = {0-444-88712-1}, - pages = {39--62}, - publisher = {Elsevier Science Publishers B. V.}, - address = {Amsterdam, The Netherlands, The Netherlands}, - } - - -@article{Illiac4, - author = {G. H. Barnes and R. M. Brown and M. Kato and D.J. Kuck and D.L. Slotnick and R.A. Stokes}, - title = "{The ILLIAC IV Computer}", - journal = {IEEE Transactions on Computers}, - volume = {8}, - number = {17}, - year = {1968}, - pages = {746-757} - } - -@article{ATLAS, - author = "R.C. Whaley and A. Petitet and J. Dongarra", - title = "{Automated Empirical Optimizations of Sofware and the ATLAS Project}", - JOURNAL = "Parallel Computing", - VOLUME = {27}, - NUMBER = {1-2}, - PAGES = {3-35}, - year = "2001"} - -@inproceedings{cell, - author = {Pham, D. and Asano, S. and Bolliger, M. and Day, M.N. and Hofstee, H.P. and Johns, C. and Kahle, J. and Kameyama, A. and Keaty, J. and Masubuchi, Y. and Riley, M. and Shippy, D. and Stasiak, D. and Suzuoki, M. and Wang, M. and Warnock, J. and Weitzel, S. and Wendel, D. and Yamazaki, T. and Yazawa, K.}, - title="{The Design and Implementation of a First-generation Cell Processor}", - booktitle = {Proceedings of the IEEE Solid-State Circuits Symposium}, - year = {2005}, - month = {February}, -} - -@book{OpenMPBook, - author = {Robit Chandra and Leonardo Dagum and Dave Kohr and Dror Maydan and Jeff McDonald and Ramesh Menon}, - title = {Parallel programming in OpenMP}, - year = {2001}, - isbn = {1-55860-671-8}, - publisher = {Morgan Kaufmann Publishers Inc.}, - address = {San Francisco, CA, USA}, -} - -@inproceedings{Irigoin88, - author = {F. Irigoin and R. Triolet}, - title = "{Supernode Partitioning}", - booktitle = {POPL '88: Proc.\ of the 15th ACM SIGPLAN-SIGACT Symp. on Principles of Programming Languages}, - year = {1988}, - pages = {319--329}, -} - -@book{XueTilingBook00, - author = {Jingling Xue}, - title = {Loop tiling for parallelism}, - year = {2000}, - isbn = {0-7923-7933-0}, - publisher = {Kluwer Academic Publishers}, -} - -@inproceedings{lim01, - author = {Amy W. Lim and Shih-Wei Liao and Monica S. Lam}, - title = "{Blocking and Array Contraction Across Arbitrarily Nested Loops Using Affine Partitioning}", - booktitle = {PPoPP '01: Proc.\ of the 8th ACM SIGPLAN Symp. on Principles and Practice of Parallel Programming}, - year = {2001}, - pages = {103--112}, - } - - @inproceedings{sequoia06, - author = {Kayvon Fatahalian and Daniel Reiter Horn and Timothy J. Knight and Larkhoon Leem and Mike Houston and Ji Young Park and Mattan Erez and Manman Ren and Alex Aiken and William J. Dally and Pat Hanrahan}, - title = {Sequoia: programming the memory hierarchy}, - booktitle = {Supercomputing '06: Proceedings of the 2006 ACM/IEEE Conference on Supercomputing}, - year = {2006}, - pages = {83}, - } - - @inproceedings{Krishnamoorthy07, - author = {Sriram Krishnamoorthy and Muthu Baskaran and Uday Bondhugula and J. Ramanujam and Atanas Rountev and P. Sadayappan}, - title = "{Effective Automatic Parallelization of Stencil Computations.}", - booktitle = {Proc. PLDI 2007}, - year = {2007}, - pages = {235-244}, -} - -@article{Hodzic02, - author = {Edin Hodzic and Weijia Shang}, - title = "{On Time Optimal Supernode Shape}", - journal = {IEEE Trans. Parallel Distrib. Syst.}, - volume = {13}, - number = {12}, - year = {2002}, - issn = {1045-9219}, - pages = {1220--1233}, - publisher = {IEEE Press}, - address = {Piscataway, NJ, USA}, - } - - @inproceedings{Ram91, - author = {J. Ramanujam and P. Sadayappan}, - title = "{Tiling Multidimensional Iteration Spaces for Nonshared Memory Machines}", - booktitle = {Supercomputing '91: Proceedings of the 1991 ACM/IEEE conference on Supercomputing}, - year = {1991}, - pages = {111--120}, - } - - @InProceedings{Fraguela05, - author = {Fraguela, B. B. and Carmueja, M. G. and Andrade, D.}, - title = "{Optimal Tile Size Selection Guided by Analytical Models}", - booktitle = {Procs.\ of Parallel Computing 2005 (ParCo 2005).}, - pages = {565--572}, - year = {2005}, - location = {Malaga, Spain}, - month = {Sept}, -} - -@article{AbuSufah, - author = {W. Abu-Sufah and D. J. Kuck and D. H. Lawrie}, - title = "{On the Performance Enhancement of Paging Systems Through Program Analysis and Transformations}", - journal = {IEEE Trans. Comput.}, - volume = {30}, - number = {5}, - year = {1981}, - issn = {0018-9340}, - pages = {341--356}, - doi = {http://dx.doi.org/10.1109/TC.1981.1675792}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, - } -@inproceedings{WolfeTiling, - author = {M. Wolfe}, - title = "{More Iteration Space Tiling}", - booktitle = {Supercomputing '89: Proceedings of the 1989 ACM/IEEE conference on Supercomputing}, - year = {1989}, - isbn = {0-89791-341-8}, - pages = {655--664}, - location = {Reno, Nevada, United States}, - doi = {http://doi.acm.org/10.1145/76263.76337}, - } - -@inproceedings{Barton06, -author = {Christopher Barton and C\'{C}lin Cas\c{c}aval and George Alm\'{a}si and Yili Zheng and Montse Farreras and Siddhartha Chatterje and Jos\'{e} Nelson Amaral}, -title = "{Shared Memory Programming for Large Scale Machines}", -booktitle = {PLDI '06: Proceedings of the 2006 ACM SIGPLAN Conference on Programming Language Design and Implementation}, -year = {2006}, -pages = {108--117}, -} - -@Book{butenhof97:_progr_posix_thread, - author = {David R. Butenhof}, - title = {Programming with {POSIX} Threads}, - publisher = {Addison Wesley}, - year = 1997 -} - -@inproceedings{charm++, -author = {L.V. Kale and Sanjeev Krishnan}, -title = "{CHARM++: A Portable Concurrent Object Oriented System Based on C++}", -booktitle = {Proceedings of the Conference on Object Oriented Programming Systems, Languages and Applications}, -year = {1993}, -pages = {91--108}, -} - -@inproceedings{AMPI, -author = {Milind Bhandarkar and L.V. Kale and Eric de Sturler and Jay Hoeflinger}, -title = "Object-Based Adaptive Load Balancing for {MPI} Programs", -booktitle = {Proceedings of the International Conference on Computational Science}, -year = {2001}, -pages = {108--117}, -} - -@article{Brook4GPUs, - author = {Ian Buck and Tim Foley and Daniel Horn and Jeremy Sugerman and Kayvon Fatahalian and Mike Houston and Pat Hanrahan}, - title = "{B}rook for {GPUs}: stream computing on graphics hardware", - journal = {ACM Trans. Graph.}, - volume = {23}, - number = {3}, - year = {2004}, - issn = {0730-0301}, - pages = {777--786}, - doi = {http://doi.acm.org/10.1145/1015706.1015800}, - publisher = {ACM}, - address = {New York, NY, USA}, - } - -@misc{StreamC, - author = {P. Mattson and U. Kapasi and J. Owens and S. Rixner}, - title = {Imagine programming system userÕs guide}, - howpublished = "Imagine internal document", - year = {2001}, -} - -@inproceedings{StreamIt, - author = {William Thies and Michal Karczmarek and Saman P. Amarasinghe}, - title = "{StreamIt}: A Language for Streaming Applications", - booktitle = {CC '02: Proceedings of the 11th International Conference on Compiler Construction}, - year = {2002}, - isbn = {3-540-43369-4}, - pages = {179--196}, - publisher = {Springer-Verlag}, - address = {London, UK}, - } - -@inproceedings{CUDA07, - author = {David Kirk}, - title = "{NVIDIA CUDA} software and {GPU} parallel computing architecture", - booktitle = {ISMM '07: Proceedings of the 6th international symposium on Memory management}, - year = {2007}, - isbn = {978-1-59593-893-0}, - pages = {103--104}, - location = {Montreal, Quebec, Canada}, - doi = {http://doi.acm.org/10.1145/1296907.1296909}, - publisher = {ACM}, - address = {New York, NY, USA}, - } - - @inproceedings{GPGPU, - author = {David Luebke and Mark Harris and Jens Kr\"{u}ger and Tim Purcell and Naga Govindaraju and Ian Buck and Cliff Woolley and Aaron Lefohn}, - title = "{GPGPU: General Purpose Computation on Graphics Hardware}", - booktitle = {ACM SIGGRAPH 2004 Course Notes}, - year = {2004}, - pages = {33}, - doi = {http://doi.acm.org/10.1145/1103900.1103933}, - } - - - -@inproceedings{dean:mapreduce:04, - title = "{MapReduce: Simplified Data Processing on Large Clusters}", - author = {Jeffrey Dean and Sanjay Ghemawat}, - booktitle = {Symposium on Operating System Design and Implementation (OSDI)}, - year = {2004}, -} - -@inproceedings{1cdcstar100, - author = {R. G. Hintz and D. P. Tate}, - title = "{Control Data STAR-100 Processor Design}", - booktitle = {Proc. of COMPCON}, - year = {1972}, - pages = {1--4} - } - - @misc{2cdcstar100, - title="{Control Data Star-100 Computer System -- Hardware Reference Manual}", - note="Control Data Corporation Technical Publications Department, Arden Hills, - Minn.", - year= 1972 -} - -@misc{PowerPCISA, - title="Power {ISA}. Version 2.04", - author = {IBM}, - year = 2007, -} - - -@article{ren:IPDPS:2005, -author = {Gang Ren and Peng Wu and David Padua}, -title = {An Empirical Study On the Vectorization of Multimedia Applications for Multimedia Extensions}, -journal = {IPDPS '05: Proceedings of the 19th IEEE International Parallel and Distributed Processing Symposium (IPDPS'05)}, -year = {2005}, -doi = {http://doi.ieeecomputersociety.org/10.1109/IPDPS.2005.94}, -publisher = {IEEE Computer Society}, -address = {Los Alamitos, CA, USA}, -} - -@inproceedings{TI-ASC, - author = {W. Watson}, - title = "{The TI-ASC, A Highly Modular and Flexible Super Computer Architecture}", - booktitle = {Proc. AFIP}, - year = {1972}, - pages = {221-228} - } - -@misc{SSE, - title = "{IA32 Intel Architecture Software Developer's Manual (Volume 1: Basic Architecture)}", - author= "{Intel Corporation}", - year = "2004", -} - -@TechReport{Altivec, - author ={Sam Fuller}, - title = "{Motorola's Altivec Technology.}", - institution={Motorola, Inc}, - year = "1998" -} - -@article{APLdesign, - author = {A.D. Falkoff and K.E. Iverson}, - title = "{The Design of APL}", - journal = {IBM Journal of Research and Development}, - month = {July}, - year = {1973}, - pages = {324--334}, - } - -@misc{Vectranmanual, - author = "G. Paul and M.W. Wilson", - title = "{The VECTRAN Language: An Experimental Language for Vector/Matrix Array Processing}", - note = "IBM Palo Alto Scientific Center Report G320-3334, Palo Alto, California", - year = "1975", - month = "August"} - -@inproceedings{Vectran2, - author = {G. Paul and M. Wilson}, - title = "{An Introduction to VECTRAN and Its Use in Scientific Computing}", - booktitle = {Proc. of the 1978 LASL Workshop on Vector and Parallel Processors}, - year = {1978}, - pages = {176--204} - } - -@misc{APLmanual, - author = "A.D. Falkoff and K.E. Iverson", - title = "{APL 360: User's Manual}", - note = "IBM Corporation", - year = "1968", - month = "August" -} - -@inproceedings{LRLTRAN, - author = {R. G. Zwakenberg}, - title = "{Vector Extensions to LRLTRAN}", - booktitle = {Proc. of the conference on Programming Languages and Compilers for Parallel and Vector Machines}, - year = {1975}, - pages = {77--86} - } - -@TechReport{dep1, - author = "J.R. Allen and K. Kennedy", - title = "{PFC: A Program to Convert Fortran to Parallel Form}", - type = "{Technical Report MASC-TR82-6}", - institution = "Rice University, Houston, TX", - month = "March", - year = "1982", -} - -@Book{AllenKennedy, - author = "R. Allen and K. Kennedy", - title = "{Optimizing Compilers for Modern Architectures}", - publisher = "Morgan Kaufmann Publishers, Inc.", - address = "San Francisco, CA", - year = "2002" -} - -@TechReport{dep2, - author = "U. Banerjee", - title = "{Speedup of Ordinary Programs}", - type= "{Ph.D. Thesis, Report 79-989}", - institution = "Department of Computer Science, University of Illinois at Urbana-Champaign", - month = "October", - year = "1979" -} - -@article{dep3, - author = {U. Banerjee and S.C. Chen and D.J. Kuck}, - title = "{Time and Parallel Processor Bounds for Fortran-like Loops}", - journal = {IEEE Transactions on Computers}, - volume = {28}, - number ={9}, - month = {September}, - year = {1979}, - pages = {660--670}, - } - -@TechReport{dep4, - author = "M.J. Wolfe", - title = "{Optimizing Supercompilers for Supercomputers}", - type= "{Ph.D. Thesis, Report 82-1105}", - institution = "Department of Computer Science, University of Illinois at Urbana-Champaign", - month = "October", - year = "1982" -} - -@phdthesis{PaduaPhd, - author = {David A. Padua}, - title = {Multiprocessors: discussion of some theoretical and practical problems}, - year = {1980}, - order_no = {AAI8018194}, - publisher = {University of Illinois at Urbana-Champaign}, - address = {Champaign, IL, USA}, - } - -@article{PaduaKL80, - author = {David A. Padua and David J. Kuck and Duncan H. Lawrie}, - title = {High-Speed Multiprocessors and Compilation Techniques}, - journal = {IEEE Trans. Computers}, - volume = {29}, - number = {9}, - year = {1980}, - pages = {763-776}, -} -@article{Chandy91, - author = {Mani Chandy and Carl Kesselman}, - title = {Parallel Programming in 2001}, - journal = {IEEE Software}, - volume = {8}, - number = {6}, - year = {1991}, - pages = {11-20}, -} -@inproceedings{emrath-padua, -author = {Perry A. Emrath and David A. Padua}, -title = "{Automatic Detection of Nondeterminacy in Parallel Programs}", -booktitle = {PADD '88: Proceedings of the 1988 ACM SIGPLAN and SIGOPS Workshop on Parallel and Distributed Debugging}, -year = {1988}, -isbn = {0-89791-296-9}, -pages = {89--99}, -location = {Madison, Wisconsin, United States}, -doi = {http://doi.acm.org/10.1145/68210.69224}, -publisher = {ACM}, -address = {New York, NY, USA}, -} - -@inproceedings{kung76, - author = {H.T. Kung}, - title = "{Synchronized and asynchronous parallel algorithms for multiprocessors}", - booktitle = {Algorithms and Complexity: New Directions and Recent Results.J. F. Traub, ed. Academic Press}, - year ={1976}, -} - -@article{IVTRAN, - author = {R. Millstein and C. Muntz}, - title = "{The Illiac IV Fortran Compiler}", - booktitle = {ACM Sigplan Notices}, - volume={10}, - number ={3}, - pages={1-8}, - month={March}, - year = {1975} - } - -@book{Fortran90, - author = {Jeanne C. Adams and Walter S. Brainerd and Jeanne T. Martin and B -rian T. Smith and Jerrold L. Wagener}, - title = {Fortran 90 Handbook}, - publisher = {McGraw-Hill}, - year = 1992, -} -@misc{SETL, -author ={J.T. Schwartz}, -title="{Set Theory as a Language for Program Specification and Programming}", -institution={Courant Institute of Mathematical Sciences, New York University}, -year=1970, -} -@book{HillisCM, - author = {W. Daniel Hillis}, - title = {The Connection Machine}, - publisher = {MIT Press series in artificial intelligence}, - year = 1985, -} - -@article{paduawolfe, -author = {Padua,, David A. and Wolfe,, Michael J.}, -title = "{Advanced Compiler Optimizations for Supercomputers}", -journal = {Commun. ACM}, -volume = {29}, -number = {12}, -year = {1986}, -issn = {0001-0782}, -pages = {1184--1201}, -doi = {http://doi.acm.org/10.1145/7902.7904}, -publisher = {ACM}, -address = {New York, NY, USA}, -} - -@book{lisp, -author = {McCarthy,, John}, -title = {LISP 1.5 Programmer's Manual}, -year = {1962}, -isbn = {0262130114}, -publisher = {The MIT Press}, -} - -@book{apl, -author = {Iverson,, Kenneth E.}, -title = "{A Programming Language}", -year = {1962}, -isbn = {0-471430-14-5}, -source = {Library of Congress Catalog Card Number: 62-15180}, -publisher = {John Wiley \& Sons, Inc.}, -address = {New York, NY, USA}, -} - -@techreport{NESL, -author = {Blelloch,, Guy E.}, -title = "{NESL: A Nested Data-Parallel Language}", -year = {1992}, -source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Acmucs%3ACMU%2F%2FCS-92-103}, -publisher = {Carnegie Mellon University}, -address = {Pittsburgh, PA, USA}, -} - -@book{commonlisp, -author = {Steele,, Guy}, -title = {Common Lisp: The Language}, -year = {1990}, -isbn = {0131515071}, -publisher = {Digital Press}, -address = {Newton, MA, USA}, -} - -@article{mapreduce, -author = {Dean,, Jeffrey and Ghemawat,, Sanjay}, -title = "{MapReduce: Simplified Data Processing on Large Clusters}", -journal = {Commun. ACM}, -volume = {51}, -number = {1}, -year = {2008}, -issn = {0001-0782}, -pages = {107--113}, -doi = {http://doi.acm.org/10.1145/1327452.1327492}, -publisher = {ACM}, -address = {New York, NY, USA}, -} - -@inproceedings{parallelSETL, -author = {Hummel,, Robert and Kelly,, Rob and Flynn Hummel,, Susan}, -title = "{A Set-based Language for Prototyping Parallel Algorithms}", -booktitle = {Proceedings of the Computer Architecture for Machine Perception '91}, -year = {1991}, -} - - -@article{flamegunnels, -author = {Gunnels,, John A. and Gustavson,, Fred G. and Henry,, Greg M. and van de Geijn,, Robert A.}, -title = "{FLAME: Formal Linear Algebra Methods Environment}", -journal = {ACM Trans. Math. Softw.}, -volume = {27}, -number = {4}, -year = {2001}, -issn = {0098-3500}, -pages = {422--455}, -doi = {http://doi.acm.org/10.1145/504210.504213}, -publisher = {ACM}, -address = {New York, NY, USA}, -} - -@inproceedings{SAMOS, -author={J. Brodman and B. Fraguela and M. J. Garzaran and D. Padua}, -title="{Design Issues in Parallel Array Languages for Shared Memory}", -booktitle={8th Int. Workshop on Systems, Architectures, Modeling, and Simulation}, -year={2008}, -} - -@inproceedings{multiprog08, -author={D. Andrade and J. Brodman and B.B. Fraguela and D. Padua }, -title="{Hierarchically Tiled Arrays Vs. Intel Threading Building Blocks for Programming Multicore Systems}", -booktitle={ Programmability Issues for Multi-Core Computers}, -year={2008}, -} - -@InProceedings{cilk, -author = {Robert D. Blumofe and Christopher F. Joerg and Bradley C. Kuszmaul and Charles E. Leiserson and Keith H. Randall and Yuli Zhou}, -title = {{Cilk}: An Efficient Multithreaded Runtime System}, -booktitle = {Proc. of the ACM SIGPLAN Symp.\ on Principles and Practice of Parallel Programming}, -year = {1995}, -pages = {207--216}, -} - -@Book{puzzlebook, - author = {Jerry Slocum and Dic Sonneveld}, - title = {The Fifteen Puzzle}, - publisher = {The Slocum Puzzle Foundation}, - year = 2006 -} - -@article{astar, -author = {P. E. Hart and N. J. Nilsson and B. Raphael}, -title = "{A Formal Basis for the Heuristic Deterministic of Minimum Cost Paths}", -journal = {IEEE Transactions on Systems Science and Cybernetics}, -volume = {4}, -number = {2}, -year = {1968}, -pages = {100--107}, -doi = "{http://dx.doi.org/10.1109\%2FTSSC.1968.300136}", -publisher = {IEEE}, -} - -@book{kumarparbook, - author = {Ananath Grama and Anshul Gupta and George Karypis and Vipin Kumar}, - title = {Introduction to Parallel Computing}, - edition = {Second}, - publisher = {Addison-Wesley}, - year = {2003}, - address = {}, - isbn = {0-201-64865-2} } - -@inproceedings{sidelnikgpgpu, - author = {Albert Sidelnik and I-Jui Sung and Wanmin Wu and Mar{\'i}a J. Garzar{\'a}n and Wen-mei Hwu and Klara Nahrstedt and David Padua and Sanjay J. Patel}, - title = "{Programming with Tiles}", - booktitle = {Proc.\ of the ACM SIGPLAN Symp.\ on Principles and Practice of Parallel Programming}, - year = {2008}, - pages = {111--122}, - month = {Feb}, -} - -@inproceedings{tiledtrees, - author = {Chilimbi,, Trishul M. and Hill,, Mark D. and Larus,, James R.}, - title = {Cache-conscious structure layout}, - booktitle = {PLDI '99: Proceedings of the ACM SIGPLAN 1999 conference on Programming language design and implementation}, - year = {1999}, - isbn = {1-58113-094-5}, - pages = {1--12}, - location = {Atlanta, Georgia, United States}, - doi = {http://doi.acm.org/10.1145/301618.301633}, - publisher = {ACM}, - address = {New York, NY, USA}, - } - -@article{skiplist, - author = {Pugh, William}, - title = {Skip lists: a probabilistic alternative to balanced trees}, - journal = {Commun. ACM}, - volume = {33}, - number = {6}, - year = {1990}, - issn = {0001-0782}, - pages = {668--676}, - doi = {http://doi.acm.org/10.1145/78973.78977}, - publisher = {ACM}, - address = {New York, NY, USA}, - } - -@article{costzones, - author = {Singh, Jaswinder Pal and Holt, Chris and Totsuka, Takashi and Gupta, Anoop and Hennessy, John}, - title = {Load balancing and data locality in adaptive hierarchical N-body methods: Barnes-Hut, fast multipole, and radiosity}, - journal = {J. Parallel Distrib. Comput.}, - volume = {27}, - number = {2}, - year = {1995}, - issn = {0743-7315}, - pages = {118--141}, - doi = {http://dx.doi.org/10.1006/jpdc.1995.1077}, - publisher = {Academic Press, Inc.}, - address = {Orlando, FL, USA}, - } - -@inproceedings{lonestar, - author = {Kulkarni, Milind and Burstcher, Martin and Cascaval, Calin and Pingali, Keshav}, - title = {Lonestar: A Suite of Parallel Irregular Programs}, - journal = {International Symposium on Performance Analysis of Software and Systems (ISPASS)}, - year = {2009}, -} - -@article{galois2, - author = {Kulkarni, Milind and Pingali, Keshav and Ramanarayanan, Ganesh and Walter, Bruce and Bala, Kavita and Chew, L. Paul}, - title = {Optimistic parallelism benefits from data partitioning}, - journal = {SIGARCH Comput. Archit. News}, - volume = {36}, - number = {1}, - year = {2008}, - issn = {0163-5964}, - pages = {233--243}, - } -@inproceedings{galois1, - author = {Kulkarni, Milind and Pingali, Keshav and Walter, Bruce and Ramanarayanan, Ganesh and Bala, Kavita and Chew, L. Paul}, - title = {Optimistic parallelism requires abstractions}, - booktitle = {PLDI '07: Proceedings of the 2007 ACM SIGPLAN conference on Programming language design and implementation}, - year = {2007}, - isbn = {978-1-59593-633-2}, - pages = {211--222}, - } -@article{chapel, - author = {Chamberlain, B.L. and Callahan, D. and Zima, H.P.}, - title = {Parallel Programmability and the Chapel Language}, - journal = {Int. J. High Perform. Comput. Appl.}, - volume = {21}, - number = {3}, - year = {2007}, - issn = {1094-3420}, - pages = {291--312}, - doi = {http://dx.doi.org/10.1177/1094342007078442}, - publisher = {Sage Publications, Inc.}, - address = {Thousand Oaks, CA, USA}, - } - -@article{KecklerDally2011, - abstract = {{This article discusses the capabilities of state-of-the art GPU-based high-throughput computing systems and considers the challenges to scaling single-chip parallel-computing systems, highlighting high-impact areas that the computing research community can address. Nvidia Research is investigating an architecture for a heterogeneous high-performance computing system that seeks to address these challenges.}}, - author = {Keckler, Stephen W. and Dally, William J. and Khailany, Brucek and Garland, Michael and Glasco, David}, - citeulike-article-id = {9942806}, - citeulike-linkout-0 = {http://dx.doi.org/10.1109/MM.2011.89}, - citeulike-linkout-1 = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=6045685}, - doi = {10.1109/MM.2011.89}, - issn = {0272-1732}, - journal = {Micro, IEEE}, - keywords = {archtiecture, gpu, micro, nvidia}, - number = {5}, - pages = {7--17}, - posted-at = {2011-10-25 08:24:52}, - priority = {2}, - publisher = {IEEE}, - title = {{GPUs and the Future of Parallel Computing}}, - url = {http://dx.doi.org/10.1109/MM.2011.89}, - volume = {31}, - year = {2011} -} - -@inproceedings{BanakarSteinke2002, - author = {Banakar, Rajeshwari and Steinke, Stefan and Lee, Bo-Sik and Balakrishnan, M. and Marwedel, Peter}, - title = {Scratchpad memory: design alternative for cache on-chip memory in embedded systems}, - booktitle = {Proceedings of the tenth international symposium on Hardware/software codesign}, - series = {CODES '02}, - year = {2002}, - isbn = {1-58113-542-4}, - location = {Estes Park, Colorado}, - pages = {73--78}, - numpages = {6}, - url = {http://doi.acm.org/10.1145/774789.774805}, - doi = {10.1145/774789.774805}, - acmid = {774805}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@article{Snapdragon, - author = {Qualcomm}, - title = {{Snapdragon S4 Processors: System on Chip Solutions for a New Mobile Age}}, - month = {October}, - year = {2011}, -} - -@InProceedings{LiAhn2009, - author = {Sheng Li and Jung Ho Ahn and Strong, R.D. and Brockman, J.B. and Tullsen, D.M. and Jouppi, N.P.}, - title = {McPAT: An Integrated Power, Area, and Timing Modeling Framework for Multicore and -Manycore Architectures}, - OPTbooktitle = {MICRO'09: Proceedings of the 42nd Annual IEEE/ACM International Conference on Microarchitecture}, - OPTyear = {2009} -} - -@incollection{Pillar:LCPC07, - author = {Anderson, Todd and Glew, Neal and Guo, Peng and Lewis, Brian T. and Liu, Wei and Liu, Zhanglin and Petersen, Leaf and Rajagopalan, Mohan and Stichnoth, James M. and Wu, Gansha and Zhang, Dan}, - chapter = {Pillar: A Parallel Implementation Language}, - title = {Languages and Compilers for Parallel Computing}, - editor = {Adve, Vikram and Garzar\'{a}n, Mar\'{\i}a Jes\'{u}s and Petersen, Paul}, - year = {2007}, - isbn = {978-3-540-85260-5}, - pages = {141--155}, - numpages = {15}, - url = {http://dx.doi.org/10.1007/978-3-540-85261-2_10}, - doi = {10.1007/978-3-540-85261-2_10}, - acmid = {1433063}, - publisher = {Springer-Verlag}, - address = {Berlin, Heidelberg}, -} - diff --git a/paper/paper.tex b/paper/paper.tex deleted file mode 100644 index 8bb5015abfdee518c9a8b3313f1cdcc34113b9b8..0000000000000000000000000000000000000000 --- a/paper/paper.tex +++ /dev/null @@ -1,202 +0,0 @@ -%----------------------------------------------------------------------------- -% -% Template for sigplanconf LaTeX Class -% -% Name: sigplanconf-template.tex -% -% Purpose: A template for sigplanconf.cls, which is a LaTeX 2e class -% file for SIGPLAN conference proceedings. -% -% Guide: Refer to "Author's Guide to the ACM SIGPLAN Class," -% sigplanconf-guide.pdf -% -% Author: Paul C. Anagnostopoulos -% Windfall Software -% 978 371-2316 -% paul@windfall.com -% -% Created: 15 February 2005 -% -%----------------------------------------------------------------------------- - - -\documentclass{sigplanconf} - -% The following \documentclass options may be useful: - -% preprint Remove this option only once the paper is in final form. -% 10pt To set in 10-point type instead of 9-point. -% 11pt To set in 11-point type instead of 9-point. -% authoryear To obtain author/year citation style instead of numeric. - -\usepackage{amsmath} - -\usepackage{textcomp} -\usepackage[disable]{todonotes} -%\usepackage[disable]{todonotes} -\usepackage{listings} - -% For writing pseudocode -\usepackage{algorithm} -\usepackage{algpseudocode} -\usepackage{pifont} - -\usepackage{url} -\usepackage{balance} -% \usepackage{subfig} - -% MACROS USED IN THE TEXT -\newcommand{\NAME}{\textit{hVISC}\/} - -\lstset{ - inputencoding=utf8, -% backgroundcolor=\color{white}, - tabsize=4, - rulecolor=, - upquote=true, -% aboveskip={1.5\baselineskip}, - columns=fixed, - %numbers=left, - showstringspaces=false, - extendedchars=true, - breaklines=true, - prebreak = \raisebox{0ex}[0ex][0ex]{\ensuremath{\hookleftarrow}}, - %frame=single, - frame=none, - captionpos=b, - showtabs=false, - showspaces=false, - showstringspaces=false, - basicstyle=\scriptsize\ttfamily, - identifierstyle=\ttfamily, - keywordstyle=\ttfamily\color[rgb]{0,0,1}, - commentstyle=\ttfamily\color[rgb]{0.133,0.545,0.133}, - stringstyle=\ttfamily\color[rgb]{0.627,0.126,0.941} -} - -%\makeatletter -\lstdefinelanguage{llvm}{ - morecomment = [l]{;}, - morestring=[b]", - sensitive = true, - classoffset=0, - morekeywords={ - %define, - declare, global, constant, - internal, external, private, - linkonce, linkonce_odr, weak, weak_odr, appending, - common, extern_weak, - thread_local, dllimport, dllexport, - hidden, protected, default, - except, deplibs, - volatile, fastcc, coldcc, cc, ccc, - x86_stdcallcc, x86_fastcallcc, - ptx_kernel, ptx_device, - signext, zeroext, inreg, sret, nounwind, noreturn, - nocapture, byval, nest, readnone, readonly, noalias, uwtable, - inlinehint, noinline, alwaysinline, optsize, ssp, sspreq, - noredzone, noimplicitfloat, naked, alignstack, - module, asm, align, tail, to, - addrspace, section, alias, sideeffect, c, gc, - target, datalayout, triple, - blockaddress, - %type - }, - classoffset=1, keywordstyle=\color{purple}, - morekeywords={ - fadd, sub, fsub, mul, fmul, - sdiv, udiv, fdiv, srem, urem, frem, - add, sub, - and, or, xor, - icmp, fcmp, - eq, ne, ugt, uge, ult, ule, sgt, sge, slt, sle, - oeq, ogt, oge, olt, ole, one, ord, ueq, ugt, uge, - ult, ule, une, uno, - nuw, nsw, exact, inbounds, - phi, - call, - select, shl, lshr, ashr, va_arg, - trunc, zext, sext, - fptrunc, fpext, fptoui, fptosi, uitofp, sitofp, - ptrtoint, inttoptr, bitcast, - ret, - define, - br, indirectbr, switch, invoke, unwind, unreachable, - malloc, alloca, free, load, store, getelementptr, - extractelement, insertelement, shufflevector, - extractvalue, insertvalue, - type - }, - alsoletter={\%,.}, - keywordsprefix={@}, -} - -\begin{document} - -\special{papersize=8.5in,11in} -\setlength{\pdfpageheight}{\paperheight} -\setlength{\pdfpagewidth}{\paperwidth} - -\conferenceinfo{CONF 'yy}{Month d--d, 20yy, City, ST, Country} -\copyrightyear{20yy} -\copyrightdata{978-1-nnnn-nnnn-n/yy/mm} -\doi{nnnnnnn.nnnnnnn} - -% Uncomment one of the following two, if you are not going for the -% traditional copyright transfer agreement. - -%\exclusivelicense % ACM gets exclusive license to publish, - % you retain copyright - -%\permissiontopublish % ACM gets nonexclusive license to publish - % (paid open-access papers, - % short abstracts) - -\titlebanner{banner above paper title} % These are ignored unless -\preprintfooter{short description of paper} % 'preprint' option specified. - -\title{\NAME{}: A Portable Virtual Instruction Set for Heterogeneous Parallel Systems} -%\subtitle{Subtitle Text, if any} - -%\authorinfo{Prakalp Srivastava \and Maria Kotsifakou \and Vikram Adve} - %{University of Illinois at Urbana-Champaign} - %{psrivas2@illinois.edu, kotsifa2@illinois.edu, vadve@illinois.edu} -\authorinfo{Author Name} - {Affiliation} - {Email} - -\maketitle - -\input{Abstract} -%\category{CR-number}{subcategory}{third-level} - -% general terms are not compulsory anymore, -% you may leave them out -%\terms -%term1, term2 - -%\keywords -%keyword1, keyword2 - -\input{Introduction} -\input{DesignGoals} -\input{VirtualISA} -\input{Compilation} -\input{Evaluation} -\input{RelatedWork} -\input{Conclusion} -\listoftodos - -%\acks -%Acknowledgments, if needed. - -% We recommend abbrvnat bibliography style. -\balance -\bibliographystyle{abbrvnat} - -% The bibliography should be embedded for final submission. - -\bibliography{hetero,optimization} - -\end{document} - diff --git a/paper/rutenbar.bib b/paper/rutenbar.bib deleted file mode 100644 index fb5999ceca86713172f9839028afe5fd5f2be66f..0000000000000000000000000000000000000000 --- a/paper/rutenbar.bib +++ /dev/null @@ -1,749 +0,0 @@ -@inproceedings{Ohshima:VECPAR06, - author = {Ohshima, Satoshi and Kise, Kenji and Katagiri, Takahiro and Yuba, Toshitsugu}, - title = {Parallel processing of matrix multiplication in a CPU and GPU heterogeneous environment}, - booktitle = {Proceedings of the 7th international conference on High performance computing for computational science}, - series = {VECPAR'06}, - year = {2007}, - isbn = {978-3-540-71350-0}, - location = {Rio de Janeiro, Brazil}, - pages = {305--318}, - numpages = {14}, - url = {http://dl.acm.org/citation.cfm?id=1761728.1761755}, - acmid = {1761755}, - publisher = {Springer-Verlag}, - address = {Berlin, Heidelberg}, -} - -@inproceedings{Lee:ISCA10, - author = {Lee, Victor W. and Kim, Changkyu and Chhugani, Jatin and Deisher, Michael and Kim, Daehyun and Nguyen, Anthony D. and Satish, Nadathur and Smelyanskiy, Mikhail and Chennupaty, Srinivas and Hammarlund, Per and Singhal, Ronak and Dubey, Pradeep}, - title = {Debunking the 100X GPU vs. CPU myth: an evaluation of throughput computing on CPU and GPU}, - booktitle = {Proceedings of the 37th annual international symposium on Computer architecture}, - series = {ISCA '10}, - year = {2010}, - isbn = {978-1-4503-0053-7}, - location = {Saint-Malo, France}, - pages = {451--460}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/1815961.1816021}, - doi = {http://doi.acm.org/10.1145/1815961.1816021}, - acmid = {1816021}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {cpu architecture, gpu architecture, performance analysis, performance measurement, software optimization, throughput computing}, -} - -@inproceedings{Wu:ICS05, - author = {Wu, Peng and Eichenberger, Alexandre E. and Wang, Amy and Zhao, Peng}, - title = {An integrated simdization framework using virtual vectors}, - booktitle = {Proceedings of the 19th annual international conference on Supercomputing}, - series = {ICS '05}, - year = {2005}, - isbn = {1-59593-167-8}, - location = {Cambridge, Massachusetts}, - pages = {169--178}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/1088149.1088172}, - doi = {http://doi.acm.org/10.1145/1088149.1088172}, - acmid = {1088172}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@techreport{MLCReport:TR10, -author = { K. Nahrstedt and L. Angrave and M. Caccamo and R. Campbell and B. Godfrey and I. Gupta and K. Karahalios and R. Kravets and S. Kamin and S. Poole and W. Sanders }, -title = "Mobile Learning Communities à Are We There Yet?", -institution = "Information Trust Institute, University of Illinois at Urbana-Champaign", -month = {Nov}, -year = {2010} -} - -@article{CogniServe:Micro11, - title = {CogniServe: Heterogeneous Server Architecture for Large-Scale Recognition}, - author = {Ravi Iyer and Sadagopan Srinivasan and Omesh Tickoo and Zhen Fang and Ramesh Illikkal and Steven Zhang and Vineet Chadha and Paul M. Stillwell and Seung Eun Lee}, - year = {2011}, - doi = {http://dx.doi.org/10.1109/MM.2011.37}, - tags = {architecture}, - researchr = {http://researchr.org/publication/IyerSTFIZCSL11}, - cites = {0}, - citedby = {0}, - journal = {IEEE Micro}, - volume = {31}, - number = {3}, - pages = {20-31}, -} - - -@techreport{SARNeeds:TR04, -author = { James Wong and Cassandra Robinson }, -title = "Urban Search and Rescue Technology Needs: Identification of Needs", -number = 207771, -institution = "Savannah River National Laboratory", -month = {Nov}, -year = {2004} -note = "Available at https://www.ncjrs.gov/pdffiles1/nij/grants/207771.pdf" -} - -@ARTICLE{CDSC:IEEEDTC11, -author={Cong, J. and Reinman, G. and Bui, A. and Sarkar, V.}, -journal={Design Test of Computers, IEEE}, -title={Customizable Domain-Specific Computing}, -year={2011}, -month={march-april }, -volume={28}, -number={2}, -pages={6 -15}, -keywords={computing industry;customizable domain specific computing;heat dissipation;parallel general purpose computing system;parallelization era;power density limitation;power performance efficiency;general purpose computers;parallel processing;}, -doi={10.1109/MDT.2010.141}, -ISSN={0740-7475},} - -@INPROCEEDINGS{CUDACUTS, -author={Vineet, V. and Narayanan, P.J.}, -booktitle={Computer Vision and Pattern Recognition Workshops, 2008. CVPRW '08. IEEE Computer Society Conference on}, -title={CUDA cuts: Fast graph cuts on the GPU}, -year={2008}, -month={june}, -volume={}, -number={}, -pages={1 -8}, -keywords={CUDA cuts;GPU;Nvidia 8800 GTX;graph cuts;graphics processor unit;image restoration;image segmentation;maxflow algorithm;mincut algorithm;push-relabel algorithm;stereo vision;computer graphic equipment;computer graphics;image restoration;image segmentation;stereo image processing;}, -doi={10.1109/CVPRW.2008.4563095}, -ISSN={},} - -@techreport{CHOI, -author = {J. Choi}, -title={Hardware implementation of MRF MAP estimation on FPGA platform, Ph.D. qualifying examination report}, -institution={ECE, UIUC}, -year = {2011} -} - -@misc{CONVEY, - author = {}, - title = {{Convey HC-1 Family}}, - howpublished = {http://www.conveycomputer.com} -} - -@article{AutoPilot, - address = {Dordrecht}, - author = {Zhang, Zhiru and Fan, Yiping and Jiang, Wei and Han, Guoling and Yang, Changqi and Cong, Jason}, - booktitle = {High-Level Synthesis}, - chapter = {6}, - doi = {10.1007/978-1-4020-8588-8\_6}, - editor = {Coussy, Philippe and Morawiec, Adam}, - isbn = {978-1-4020-8587-1}, - keywords = {autoesl, hls, springer-book}, - pages = {99--112}, - posted-at = {2011-10-03 14:09:35}, - priority = {2}, - publisher = {Springer Netherlands}, - title = {{AutoPilot}: A {Platform-Based} {ESL} Synthesis System}, - url = {http://dx.doi.org/10.1007/978-1-4020-8588-8\_6}, - year = {2008} -} - -@INPROCEEDINGS{GAUT, -author={P. Coussy and G. Lhairech-Lebreton}, -booktitle={DATE}, -title={GAUT: An Open Source High-Level Synthesis Tool}, -year={2009}, -month={}, -volume={}, -number={}, -ISSN={},} - -@INPROCEEDINGS{NISC, -author={B. Gorjiara and D. Gajski}, -booktitle={Workshop on Embedded Systems for Real-time Multimedia (ESTIMEDIA)}, -title={Design Space Exploration of C Programs Using NISC: A Case-Study on DCT algorithm}, -year={2005}, -month={}, -volume={}, -number={}, -ISSN={},} - -@article{Diniz2005, -title = "Automatic mapping of C to FPGAs with the DEFACTO compilation and synthesis system", -journal = "Microprocessors and Microsystems", -volume = "29", -number = "2-3", -pages = "51 - 62", -year = "2005", -note = "<ce:title>Special Issue on FPGA Tools and Techniques</ce:title>", -issn = "0141-9331", -doi = "10.1016/j.micpro.2004.06.007", -url = "http://www.sciencedirect.com/science/article/pii/S0141933104000869", -author = "Pedro Diniz and Mary Hall and Joonseok Park and Byoungro So and Heidi Ziegler", -keywords = "Design automation", -keywords = "Parallelizing compiler technology and data dependence analysis", -keywords = "Behavioral synthesis and estimation", -keywords = "Reconfigurable computing", -keywords = "Field-programmable-gate-arrays (FPGAs)" -} - -@BOOK{SPARK, -TITLE = {SPARK: a parallelizing approach to the high-level synthesis of digital circuits}, -AUTHOR = {Sumit Gupta and Rajesh Gupta and Nikil D. Dutt}, -PUBLISHER = {Springer}, -EDITION = {}, -month = {June}, -YEAR = {2009}, -} - -@article{PAPA2009, -author = {Alexandros Papakonstantinou and Karthik Gururaj and John A. Stratton and Deming Chen and Jason Cong and Wen-Mei W. Hwu}, -title = {FCUDA: Enabling efficient compilation of CUDA kernels onto FPGAs}, -journal ={Application Specific Processors, Symposium on}, -volume = {0}, -isbn = {978-1-4244-4939-2}, -year = {2009}, -pages = {35-42}, -doi = {http://doi.ieeecomputersociety.org/10.1109/SASP.2009.5226333}, -publisher = {IEEE Computer Society}, -address = {Los Alamitos, CA, USA}, -} - -@INPROCEEDINGS{PAPA2011, - author={Papakonstantinou, A. and Yun Liang and Stratton, J.A. and Gururaj, K. and Deming Chen and Hwu, W.-M.W. and Cong, J.}, - booktitle={2011 IEEE 19th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)}, - title={Multilevel Granularity Parallelism Synthesis on FPGAs}, - year={2011}, - month={may}, - volume={}, - number={}, - pages={178 -185}, - keywords={CUDA kernel mapping;FPGA programming;FPGA-based accelerator;abstraction level;coarse grained parallelism;design layout information;design space search heuristic;hardware spatial parallelism;high-level synthesis technique;lengthy logic synthesis;multigranularity parallelism extraction;multilevel granularity parallelism synthesis;performance evaluation;physical design flow;reconfigurable computing;field programmable gate arrays;integrated circuit layout;logic design;}, - doi={10.1109/FCCM.2011.29}, - ISSN={}, -} - -@INPROCEEDINGS{vu-iShare-globecomm2010, - author={Long Vu and Nahrstedt, K. and Rimac, I. and Hilt, V. and Hofmann, M.}, - booktitle={2010 IEEE GLOBECOM Workshops (GC Wkshps)}, - title={iShare: Exploiting opportunistic ad hoc connections for improving data download of cellular users}, - year={2010}, - month={December}, - volume={}, - number={}, - pages={1475 -1480}, - keywords={ad hoc communication;ad hoc mesh network;cellular broadcast channel;cellular link user;cellular unicast channel;data download;iShare;mobile device;sharing protocol;tit-for-tat incentive mechanism;tree-based protocol;broadcast channels;cellular radio;mobile ad hoc networks;protocols;wireless mesh networks;}, - doi={10.1109/GLOCOMW.2010.5700183}, - ISSN={}, -} - -@ARTICLE{yuan-grace1-ieeeMobComp, - author={Wanghong Yuan and Nahrstedt, K. and Adve, S.V. and Jones, D.L. and Kravets, R.H.}, - journal={IEEE Transactions on Mobile Computing}, - title={GRACE-1: cross-layer adaptation for multimedia quality and battery energy}, - year={2006}, - month={July}, - volume={5}, - number={7}, - pages={ 799 - 815}, - keywords={ GRACE-1; battery energy; cross-layer adaptation; mobile devices; multimedia quality; multiple system layers; video quality; mobile computing; multimedia communication; quality of service;}, - doi={10.1109/TMC.2006.98}, - ISSN={1536-1233}, -} - -@inproceedings{yuan-rtMobileSched-sosp03, - author = {Yuan, Wanghong and Nahrstedt, Klara}, - title = {Energy-efficient soft real-time CPU scheduling for mobile multimedia systems}, - booktitle = {Proceedings of the Nineteenth ACM symposium on Operating Systems Principles}, - series = {SOSP '03}, - year = {2003}, - isbn = {1-58113-757-5}, - location = {Bolton Landing, NY, USA}, - pages = {149--163}, - numpages = {15}, - url = {http://doi.acm.org/10.1145/945445.945460}, - doi = {http://doi.acm.org/10.1145/945445.945460}, - acmid = {945460}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {mobile computing, multimedia, power management}, -} - -@article{yuan-multimediaSched-acmTOCS06, - author = {Yuan, Wanghong and Nahrstedt, Klara}, - title = {Energy-efficient CPU scheduling for multimedia applications}, - journal = {ACM Trans. Comput. Syst.}, - volume = {24}, - issue = {3}, - month = {August}, - year = {2006}, - issn = {0734-2071}, - pages = {292--331}, - numpages = {40}, - url = {http://doi.acm.org/10.1145/1151690.1151693}, - doi = {http://doi.acm.org/10.1145/1151690.1151693}, - acmid = {1151693}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {Power management, mobile computing, multimedia, soft real-time}, -} - -@INPROCEEDINGS{yuan-recalendar-percom03, - author={Wanghong Yuan and Nahrstedt, K.}, - booktitle={Proceedings of the First IEEE International Conference on Pervasive Computing and Communications, 2003 (PerCom 2003)}, - title={ReCalendar: calendaring and scheduling applications with CPU and energy resource guarantees for mobile devices}, - year={2003}, - month={march}, - volume={}, - number={}, - pages={ 425 - 432}, - keywords={ CPU advance reservation; CPU reservations; CPU resource guarantees; CPUfrequency/voltage adaptation; ReCalendar; admitted reservations; advance reservation scheme; calendaring applications; energy reservations; energy resource guarantees; mobile devices; resource manager; scheduling applications; soft real-time applications; computer power supplies; mobile communication; personal computing; real-time systems; resource allocation; scheduling; telecommunication computing; telecommunication congestion control;}, - doi={10.1109/PERCOM.2003.1192767}, - ISSN={}, -} - -@inproceedings{vu-btWiFi-mswim10, - author = {Vu, Long and Nahrstedt, Klara and Retika, Samuel and Gupta, Indranil}, - title = {Joint bluetooth/wifi scanning framework for characterizing and leveraging people movement in university campus}, - booktitle = {Proceedings of the 13th ACM international conference on Modeling, analysis, and simulation of wireless and mobile systems}, - series = {MSWIM '10}, - year = {2010}, - isbn = {978-1-4503-0274-6}, - location = {Bodrum, Turkey}, - pages = {257--265}, - numpages = {9}, - url = {http://doi.acm.org/10.1145/1868521.1868563}, - doi = {http://doi.acm.org/10.1145/1868521.1868563}, - acmid = {1868563}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {android phone, bluetooth trace, people movement characterization, wifi trace}, -} - -@article{vu-jyotish-percom11, - author = {Long Vu and Quang Do and Klara Nahrstedt}, - title = {Jyotish: Constructive approach for context predictions of people movement from joint Wifi/Bluetooth trace}, - journal = {Pervasive and Mobile Computing}, - booktitle = {The Ninth Annual IEEE International Conference on Pervasive Computing and Communications (PerCom 2011)}, - volume = {7}, - number = {6}, - pages = {690 - 704}, - year = {2011}, - issn = {1574-1192}, - doi = {10.1016/j.pmcj.2011.07.004}, - url = {http://www.sciencedirect.com/science/article/pii/S1574119211001167}, - keywords = {People movement prediction, People movement trace, Wifi trace, Bluetooth trace}, -} - -% MDS: I had to fill this in manually, so there may be some slightly incorrect -% information, especially in the volume/number. -@article{vu-coada-ijaras11, - author = {Vu, Long and Nahrstedt, Klara and Malik, Rahul and Wang, Qiyan}, - title = {COADA: Leveraging Dynamic Coalition Peer-to-Peer Network for Adaptive Content Download of Cellular Users}, - journal = {International Journal of Adaptive, Resilient, and Autonomic Systems (IJARAS)}, - year = {2011}, - volume = {2}, - number = {2}, - pages = {1 - 22}, - url = {http://www.igi-global.com/viewtitlesample.aspx?id=53463}, - publisher = {IGI Publisher}, -} - -@INPROCEEDINGS{vu-3R-wowmom11, - author={Long Vu and Quang Do and Nahrstedt, K.}, - booktitle={2011 IEEE International Symposium on a World of Wireless, Mobile and Multimedia Networks (WoWMoM 2011)}, - title={3R: Fine-grained encounter-based routing in Delay Tolerant Networks}, - year={2011}, - month={june}, - volume={}, - number={}, - pages={1 -6}, - keywords={3R routing protocol;delay tolerant networks;epidemic routing protocols;fine-grained encounter-based routing;large-scale Bluetooth;message delivery probability;mobile nodes;mobile users;prophet routing protocols;Bluetooth;mobile radio;routing protocols;}, - doi={10.1109/WoWMoM.2011.5986470}, - ISSN={} -} - -%%%%%%%%% -% These come from various sections, but I put them all here for now to have -% them in a centralized place. -%%% motivation.tex - -@inproceedings{DarkSilicon:ISCA11, - author={Hadi Esmaeilzadeh and Emily Blem and Renee St. Amant and Karthikeyan Sankaralingam and Doug Burger}, - title={{Dark Silicon and the End of Multicore Scaling}}, - booktitle="{Proceedings of the 38th International Symposium on Computer Architecture}", - year={2011}, - bib2html_dl_pdf = {http://bit.ly/fmPjY4}, - bib2html_pubtype = {Refereed Conference}, - bib2html_rescat = {Architecture}, - MONTH = {June} -} - -@INPROCEEDINGS{Hameed:ISCA10, - author = {Rehan Hameed and Wajahat Qadeer and Megan Wachs and Omid Azizi and Alex Solomatnikov and Benjamin C. Lee and Stephen Richardson and Christos Kozyrakis and Mark Horowitz}, - title = {Understanding sources of inefficiency in general-purpose chips}, - booktitle = {In Proceedings of the 37th Annual International Symposium on Computer Architecture (ISCA 2010)}, - year = {2010} - url = {http://www.duke.edu/~BCL15/documents/hameed2010-isca-h264.pdf}, -} - -@INPROCEEDINGS{FCUDA:SASP09, - author={Papakonstantinou, A. and Gururaj, K. and Stratton, J.A. and Chen, D. and Cong, J. and Hwu, W.-M.W.}, - booktitle={Application Specific Processors, 2009. SASP '09. IEEE 7th Symposium on}, - title={FCUDA: Enabling efficient compilation of CUDA kernels onto FPGAs}, - year={2009}, - month={July}, - volume={}, - number={}, - pages={35 -42}, - keywords={CUDA kernel;FPGA programming;Moores law;application program interface;clock frequency;compute unified device architecture;computing industry;field programmable gate array;graphics processing unit;multicore system;multiprocessor system;parallel processing;performance per watt boosting;power dissipation;application program interfaces;field programmable gate arrays;multiprocessing systems;parallel architectures;}, - doi={10.1109/SASP.2009.5226333}, - ISSN={}, -} - -@inproceedings{LLVA:MICRO03, - author = {Adve, Vikram and Lattner, Chris and Brukman, Michael and Shukla, Anand and Gaeke, Brian}, - title = {LLVA: A Low-level Virtual Instruction Set Architecture}, - booktitle = {Proceedings of the 36th annual IEEE/ACM International Symposium on Microarchitecture}, - series = {MICRO 36}, - year = {2003}, - isbn = {0-7695-2043-X}, - pages = {205--}, - url = {http://dl.acm.org/citation.cfm?id=956417.956545}, - acmid = {956545}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, -} - -@inproceedings{SVA:SOSP07, - author = {Criswell, John and Lenharth, Andrew and Dhurjati, Dinakar and Adve, Vikram}, - title = {Secure virtual architecture: a safe execution environment for commodity operating systems}, - booktitle = {Proceedings of twenty-first ACM SIGOPS symposium on Operating systems principles}, - series = {SOSP '07}, - year = {2007}, - isbn = {978-1-59593-591-5}, - location = {Stevenson, Washington, USA}, - pages = {351--366}, - numpages = {16}, - url = {http://doi.acm.org/10.1145/1294261.1294295}, - doi = {http://doi.acm.org/10.1145/1294261.1294295}, - acmid = {1294295}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {compiler, memory safety, operating systems, security, type safety, typed assembly language, virtual machine}, -} - -@inproceedings{SVA:UsenixSecurity09, - author = {Criswell, John and Geoffray, Nicolas and Adve, Vikram}, - title = {Memory safety for low-level software/hardware interactions}, - booktitle = {Proceedings of the 18th conference on USENIX security symposium}, - series = {SSYM'09}, - year = {2009}, - location = {Montreal, Canada}, - pages = {83--100}, - numpages = {18}, - url = {http://dl.acm.org/citation.cfm?id=1855768.1855774}, - acmid = {1855774}, - publisher = {USENIX Association}, - address = {Berkeley, CA, USA}, -} - -@misc{UPCRC:URL, - howpublished = {http://upcrc.illinois.edu/}, - author = {Universal Parallel Computing Research Center}, -} - -@misc{UPCRCSummerSchool2011:URL, - howpublished = {http://upcrc.illinois.edu/summer/2011/index.html}, - author = {UPCRC Illinois Summer School on Multicore Programming}, - month = {July}, - year = {2011}, -} - -@misc{CUDASummerSchool09:URL, - howpublished = {http://www.greatlakesconsortium.org/events/GPUMulticore/agenda.html}, - author = {Summer School 2008: Accelerators for Science and Engineering Applications: GPUs and Multicores}, - month = {August}, - year = {2008}, -} - -%%% programmability.tex -@misc{OpenCL:URL, - howpublished = {http://www.khronos.org/opencl/}, - author = {{Khronos Group -- OpenCL}}, -} - -@misc{Renderscript:URL, - title = {Renderscript}, - howpublished = {http://developer.android.com/reference/android/renderscript/package-summary.html}, - author = {{Android Developers}}, -} - -@article{CnC:WHERE, - title = {{Concurrent Collections}}, - author = {Zoran Budimlic and Michael Burke and Vincent Cavé and Kathleen Knobe and Geoff Lowney and Ryan Newton and Jens Palsberg and David Peixotto and Vivek Sarkar and Frank Schlimbach and Sagnak Tasirlar}, - journal = {Scientific Programming}, - year = {2010}, - pages = {203-217}, - keywords = {Computer & Communication Sciences}, - volume = {18}, - number = {3-4}, - url = {http://iospress.metapress.com/content/83w0360mk786443n/}, -} - -@inproceedings{DPJ:HotPar09, - author = {Robert Bocchino and Vikram Adve and Sarita Adve and Marc -Snir}, - title = {{Parallel programming must be deterministic by default}}, - booktitle = {First USENIX Workshop on Hot Topics in Parallelism -(HotPar)}, - year = {2009}, -} - -@inproceedings{DPJ:POPL11, - author = {Bocchino, Robert and Stephen Heumann and Nima Honarmand and -Sarita Adve and Vikram Adve and Adam Welc and Tatiana Shpeisman}, - title = {{Safe nondeterminism in a deterministic-by-default parallel -language}}, - booktitle = {POPL}, - year = {2011}, -} - -@inproceedings{Bocchino:ECOOP11, - author = {Bocchino, Robert L. and Adve, Vikram S.}, - title = {Types, regions, and effects for safe programming with -object-oriented parallel frameworks}, - booktitle = {Proceedings of the 25th European conference on Object-oriented -programming}, - series = {ECOOP'11}, - year = {2011}, - isbn = {978-3-642-22654-0}, - location = {Lancaster, UK}, - pages = {306--332}, - numpages = {27}, - url = {http://dl.acm.org/citation.cfm?id=2032497.2032519}, - acmid = {2032519}, - publisher = {Springer-Verlag}, - address = {Berlin, Heidelberg}, -} - -@InProceedings{KimNarayanan2011, - author = {E. P. Kim and S. P. Narayanan and N. R. Shanbhag and D. L. Jones}, - title = {Low-power and error-resilient PN code acquisition filter via statistical error compensation}, - OPTcrossref = {}, - OPTkey = {}, - OPTbooktitle = {IEEE Custom Integrated Circuits Conference}, - OPTpages = {}, - OPTyear = {2011}, - OPTeditor = {}, - OPTvolume = {}, - OPTnumber = {}, - OPTseries = {}, - OPTaddress = {}, - OPTmonth = {}, - OPTorganization = {}, - OPTpublisher = {}, - OPTnote = {}, - OPTannote = {} -} - - - -@Article{HegdeShanbhag2001, - author = {R. Hegde and N. R. Shanbhag}, - title = {Soft digital signal processing}, - journal = {IEEE Trans. VLSI Systems}, - year = {2001}, - OPTkey = {}, - OPTvolume = {9}, - OPTnumber = {6}, - OPTpages = {813--823}, - OPTmonth = {}, - OPTnote = {}, - OPTannote = {} -} - - - - -@Article{KimShanbhag2010, - author = {E. Kim and Naresh R. Shanbhag}, - title = {Soft N-modular redundancy}, - journal = {IEEE Computer}, - year = {2010}, - OPTkey = {}, - OPTvolume = {}, - OPTnumber = {}, - OPTpages = {}, - OPTmonth = {dec}, - OPTnote = {}, - OPTannote = {} -} - - - -@Article{NarayananVaratkar2010, - author = {S. Narayanan and G. Varatkar and D. L. Jones and N. R. Shanbhag}, - title = {Computation as estimation: A general framework for robustness and energy-efficiency in SoCs}, - journal = {IEEE Transactions on Signal Processing}, - year = {2010}, - OPTkey = {}, - OPTvolume = {58}, - OPTnumber = {8}, - OPTpages = {4416--4421}, - OPTmonth = {}, - OPTnote = {}, - OPTannote = {} -} - - - -@InProceedings{AbdallahShanbhag2010, - author = {R. Abdallah and N. Shanbhag}, - title = {Robust energy-efficient DSP systems via output probability processing}, - OPTcrossref = {}, - OPTkey = {}, - OPTbooktitle = {Proc. of Int. Conf. on Computer Design}, - OPTpages = {}, - OPTyear = {2010}, - OPTeditor = {}, - OPTvolume = {}, - OPTnumber = {}, - OPTseries = {}, - OPTaddress = {}, - OPTmonth = {oct}, - OPTorganization = {}, - OPTpublisher = {}, - OPTnote = {}, - OPTannote = {} -} - - -@InProceedings{Ben-Asher:CODES2010, - Author = {Ben-Asher, Yosi and Rotem, Nadav}, - Title = {Automatic memory partitioning: increasing memory parallelism via -data structure partitioning}, - Booktitle = {Proceedings of the eighth IEEE/ACM/IFIP international -conference on Hardware/software codesign and system synthesis}, - Year = {2010}, - Pages = {155--162}, - url = {http://doi.acm.org.proxy2.library.illinois.edu/10.1145/1878961.1878989}, - Annote = { - "Uses run-time tracing to partition memory between different banks within - an accelerator or FPGA. Partitioning is based on identifying linear - memory access patterns. Uses variation of graph coloring, solved via - ILP, to do the assignment to memory banks. - See references for other relevant papers from the embedded world."} -} - - -@InProceedings{SPEECH1, - Author = {E.C. Lin and R.A. Rutenbar}, - Title = {A Multi-FPGA 10x-Real-Time High-Speed Search Engine for a 5000-Word - Vocabulary Speech Recognizer}, - Booktitle = {Proc. 2009 ACM International Symposium on FPGAs (ISFPGA)}, - Month = {February}, - Year = {2009} -} - -@PhdThesis{SPEECH2, - Author = {Edward C. Lin}, - Title = {A High Performance Custom Hardware Backend Search Engine for a Speech Recognition System}, - School = {Dept of Electrical and Computer Engineering, Carnegie Mellon University}, - Month = {December}, - Year = {2007} -} - -@PhdThesis{SPEECH3, - Author = {Kai Yu}, - Title = {Hardware Optimization and Exploration of Feature Extraction - and Feature Scoring for Speech Recognition}, - School = {Dept of Electrical and Computer Engineering, Carnegie Mellon University}, - Month = {August}, - Year = {2009} -} - -@PhdThesis{SPEECH4, - Author = {Patrick Bourke}, - Title = {A Low-Power Hardware Architecture for Speech Recognition Search}, - School = {Dept of Electrical and Computer Engineering, Carnegie Mellon University}, - Month = {April}, - Year = {2011} -} - -@incollection{SPEECH5, - author = {Patrick Bourke, Kai Yu and Rob A. Rutenbar}, - title = {Mobile Speech Hardware: The Case for Custom Silicon}, - Chapter = {2}, - booktitle = {Speech in Mobile and Pervasive Environments}, - editor = {Nitendra Rajput and Amit Anil Nanavati}, - publisher = {Wiley}, - pages = {7-56}, - year = {2012}, - isbn = {0470694351} -} - -@InProceedings{SPEECH6, - Author = {J. Johnston and R.A. Rutenbar}, - Title = {A High-Rate, Low-Power, ASIC Speech Decoder Using Finite State Transducers}, - Booktitle = {Proc. 23rd IEEE International Conference on Application-Specific Systems, - Architectures and Processors (ASAP12)}, - Month = {to appear July}, - Year = {2012} -} - -@BOOK{BIS06, -TITLE = {Pattern Recognition and Machine Learning}, -AUTHOR = {C. M. Bishop}, -PUBLISHER = {Springer}, -YEAR = {2006} -} - -@BOOK{DAR09, -TITLE = {Modeling and Reasoning with Bayesian Networks}, -AUTHOR = {A. Darwiche}, -PUBLISHER = {Cambridge University Press}, -YEAR = {2009} -} - -@article{CHOI12, - Author = {J. Choi and R.A. Rutenbar}, - title = {Hardware Implementation of MRF MAP Inference on an FPGA Platform}, - journal = {submitted to 2012 International Conference of Field Programmable Logic and Applications (FPL'12)}, - Month = {to be held August}, - Year = {2012} -} - -@Article{CONVEY2, - Author = {Convey Computer Corporation}, - Title = {Convey Reference Manual}, - note = {{http://www.conveycomputer.com}}, - month = {September}, - year = {2009} -} - -@Article{VOCI, - author = {Voci Technologies Inc.}, - note = {\url{http://www.vocitech.com}} -} - -@ARTICLE{VOCI2, -title={Office of Naval Research Selects Voci}, -journal={BusinessWire}, -month = {October 17}, -year = {2011}, -note={\url{http://www.businesswire.com/news/home/20111018005498/en/Office-Naval-Research-Selects-Voci}} -} -%journal={Enhanced Online News (EON), BusinessWire}, -%http://eon.businesswire.com/news/eon/20111018005499/en/speech-recognition/text-analytics/national-security}} - -@Article{paris1, - author = "P. Smaragdis P and B. Raj", - title = "The Markov selection model for concurrent speech recognition", - journal = "Neurocomputing", - volume = "80", - number = "15", - year = 2012 - month = march, - pages = "64–72" -} - -@InProceedings{paris2, - author = "Mysore, G. and P. Smaragdis and B. Raj", - year = 2010, - title = "Non-negative hidden Markov modeling of audio with application to source separation", - booktitle = "9th Int'l Conference on Latent Variable Analysis and Signal Separation (LCA/ICA)", - year = 2010 -} diff --git a/paper/safecode.bib b/paper/safecode.bib deleted file mode 100644 index 08231be64f42133104c1d07f8e1b386fa296474e..0000000000000000000000000000000000000000 --- a/paper/safecode.bib +++ /dev/null @@ -1,1150 +0,0 @@ -%%===----------------------------------------------------------------------===%% -%% -%% LLVM Research Group Bibtex File -%% -%%===----------------------------------------------------------------------===%% -%% -%% When adding an entry to this file, please keep the following in mind: -%% -%% -- Please make all entries consistent :-) -%% -- Include the authors names spelled out, first and last with an optional -%% middle initial. e.g. Chris A. Lattner or Chris Lattner, but NOT C. -%% Lattner. -%% -- Please use the abbreviations at the top of the file for publications, -%% e.g. booktitle = PLDI, instead of booktitle = "Proceedings of -%% ...". This makes it much easier to shorten the citations when trying -%% to cram a paper in (make a local change to change the substitution -%% strings at the top of the file). It also keeps all of the cites for a -%% conference identical. -%% -- Please spell things right! :) -%% -- Include city and date in all InProceedings entries. -%% -- Also, please use a consistent naming scheme for cite keys, where -%% possible. If 1-3 authors: Lastname1Lastname2Lastname3:WHEREYY -%% e.g., LattnerAdve:MSP05. If 4 or more authors: InitialsList:WHEREYY -%% with the initials of the last names of the first 4 authors, e.g, -%% DKAL:TECS05. For more weird cases, there's not much hope but most -%% cases work as above. -%% -- Please make all entries consistent! :-) -%% -- Did I say, please make all entries consistent? -%% -%% When the final version of a paper is done, please *COPY* the current -%% version of this file to the paper directory and check it into CVS for -%% that paper. -%% -%% TO-DO: -%% -- Replace full conference names with abbrevs -%% -- Make all entries consistent -%% -- Replace ...WHEREYY with ...WHERE:YYYY in all cite keys -%% -%%===----------------------------------------------------------------------===%% - - -%%% -%%% Substitutions: Replace these with shorter string to shrink refs. -%%% - - -%% Journals - -@string{CACM = "Communications of the ACM"} -@string{JACM = "Journal of the ACM"} -@string{SPE = "Software--Practice and Experience"} - -%% Conferences - -@string{PLDI = "ACM SIGPLAN Conference on Programming Language Design and Implementation"} -@string{ICSE = "International Conference on Software Engineering"} -@string{ISSTA = "Proc. ACM SIGSOFT Int'l Symp. on Software Testing and Analysis"} -@string{CASES = "Proc. Int'l Conf. on Compilers, Architecture and Synthesis for Embedded Systems (CASES)"} -@string{SAS = "Proc. Int'l Symp. on Static Analysis (SAS)"} -@string{ISCA = "Proc. Int'l Conf. on Computer Architecture (ISCA)"} -@string{ISCA = "ISCA"} -@string{CC = "Proc. Int'l Conf. on Compiler Construction (CC)"} -@string{PACT = "Proc. Int'l Conf. on Parallel Architectures and Compilation Techniques (PACT)"} -@string{ISMM = "Proc. Int'l Symp. On Memory Management (ISMM)"} -@string{ESEC = "Proc. European Software Engineering Conf. (ESEC)"} -@string{SIGMOD = "Proc. ACM SIGMOD Int'l Conf. on Management of Data (SIGMOD)"} -@string{ICS = "Proc. Int'l Conf. on Supercomputing (ICS)"} - -%% Workshops - -@string{LCPC = "Proc. Int'l Workshop on Languages and Compilers for - Parallel Computing (LCPC)"}, -@string{MSP = "Proc. ACM Workshop on Memory System Performance"} -%%@string{MSP = "MSP"} -@string{PASTE = "Proc. ACM SIGPLAN-SIGSOFT Workshop on Program Analysis for Software Tools and Engineering (PASTE)"} -@string{IWMM = "Proc. Int'l Workshop on Memory Management"} - -@string{WCRE = "Proc. Working Conf. on Reverse Engineering (WCRE)"} -@string{FMPC = "Proc. Symp. on the Frontiers of Massively Parallel Computation"} -@string{ICCD = "Proc. Int'l Conf. on Computer Design (CDES)"} -@string{CC = "Proc. Int'l Conf. on Compiler Construction (CC)"} -@string{IPDPS = "Proc. Int'l Parallel and Distributed Processing Symp."} - - -@article{BurkeTorczon:TOPLAS93, - author = {Michael Burke and Linda Torczon}, - title= {Interprocedural optimization: eliminating unnecessary recompilation}, - journal = TOPLAS, - volume = {15}, - number = {3}, - year = {1993}, - issn = {0164-0925}, - pages = {367--399}, - doi = {http://doi.acm.org/10.1145/169683.169678}, - publisher = {ACM Press}, - } - -@inproceedings{ sirer96writing, - author = "Emin G{\"u}n Sirer and Stefan Savage and Przemyslaw Pardyak and Greg P. DeFouw and Brian N. Bershad", - title = "Writing an Operating System Using {M}odula-3", - booktitle = "Workshop on Compiler Support for Systems Software", - year = "1996", - url = "citeseer.nj.nec.com/article/sirer96writing.html" } - - -@inproceedings{ghiya96is, - author = "Rakesh Ghiya and Laurie J. Hendren", - title = "Is it a Tree, a {DAG}, or a Cyclic Graph? A Shape Analysis for Heap-Directed Pointers in C", - booktitle = POPL, - pages = "1-15", - year = "1996", - url = "citeseer.nj.nec.com/ghiya96is.html" } - -@InProceedings{HPCMO:PLDI98, - Author = {A. Ayers and S. de Jong and J. Peyton and R. Schooler}, - Title = {Scalable Cross-Module Optimization}, - Booktitle = PLDI, - Address = "Montreal", - Month = Jun, - Year = 1998 -} - - -@Article{ConvexAppsCompiler:Sigplan94, - Title = {Developing an Interprocedural Optimizing Compiler}, - Author = {J. Loeliger and R. Metzger}, - Journal = "ACM Sigplan Notices", - Month = Apr, - Year = 1994, - Volume = 29, - Number = 4 -} - -@Article{LimLeeSha:PDCP, - Title = {Ensuring Integrity and Serivce Availability in a Web Based - Control Laboratory}, - Author = {Sungsoo Lim and Kihwal Lee and Lui Sha}, - Journal = "To appear in Journal of Parallel and - Distributed Computing Practices" -} - - - -@misc{franz97communications, - author = "M. Franz and T. Kistler", - title = "Communications of the ACM", - text = "Michael Franz and Thomas Kistler. Slim binaries. Communications of the - ACM, 40(12):8794, 1997.", - year = "1997" -} - -@book{JVM, - author = "Tim Lindholm and Frank Yellin", - title = "The {Java} Virtual Machine Specification", - publisher = "Ad{\-d}i{\-s}on-Wes{\-l}ey", - address = "Reading, MA", - isbn = "0-201-63452-X", - year = "1997" -} - -@manual{Ada:Manual, -note = {International Standard ISO/IEC 8652:1995}, -organization = {International Organisation for Standardisation}, -title = {Ada95 Reference Manual}, -year = {1995} -} - - -@misc{CLR, - author = "Microsoft", - title = "", - text = "The .NET Common Language Runtime, See web site at: http://msdn.microsoft.com/net" - } - -@misc{DougLea, - author = "Doug Lea", - title = "A memory allocator, see website at \\ - http://gee.cs.oswego.edu/dl/html/malloc.html" - } - -@misc{TinyOS, - title = "TinyOS, a component based OS for the networked sensor regime, See web site at: http://webs.cs.berkeley.edu/tos/" - } - -@inproceedings{Self, - author = "David Ungar and Randall B. Smith", - title = "Self: The Power of Simplicity", - booktitle = "Proceedings of the Conference on Object-Oriented Programming Systems, Languages, and Applications ({OOPSLA})", - journal = "SIGPLAN Notices", - volume = "22", - number = "12", - alt_publisher = "ACM Press", - alt_address = "New York, NY", - editor = "Norman Meyrowitz", - pages = "227--242", - year = "1987" -} - -@InProceedings{EMachine:PLDI02, - author = "Thomas A. Henzinger and Christoph M. Kirsch", - title = "The Embedded Machine: Predictable, Portable Real-Time Code", - booktitle = PLDI, - address = "Berlin, Germany", - month = Jun, - year = 2002 -} - -@InProceedings{GayAiken:PLDI01, - author = "David Gay and Alex Aiken", - title = "Language Support for Regions", - booktitle = PLDI, - address = "Snowbird, UT", - pages = "70-80", - month = Jun, - year = 2001 -} - -@InProceedings{BarrettZorn:PLDI93, - author = "D. A. Barret and B. G. Zorn", - title = "Using Lifetime Predictors to Improve Memory Allocation Performance", - booktitle = PLDI, - address = "Albuquerque, New Mexixo", - pages = "187-196", - month = Jun, - year = 1993 -} - -@InProceedings{ XuMillerReps:PLDI01, - author = "Zhichen Xu and Barton P. Miller and Thomas Reps", - title = "Safety checking of machine code", - booktitle = PLDI, - address = "Vancouver B.C., Canada", - pages = "70--82", - year = "2000", - } - -@inproceedings{CraryWalkerMorrisett:POPL99, - author = "Karl Crary and David Walker and Greg Morrisett", - title = "Typed Memory Management in a Calculus of Capabilities", - booktitle = POPL, - address = "New York, NY", - pages = "262--275", - year = "1999", -} - -@inproceedings{Wilhelm:CC00, - author = "R. Wilhelm and M. Sagiv and T. Reps", - title = "Shape analysis", - booktitle = "Proceedings of CC 2000: 9th Int. Conf. on Compiler Construction", - address = "Berlin, Ger.", - month = "Mar-Apr", - year = 2000 -} - -@inproceedings{Hallenberg:PLDI02, - author = "N. Hallenberg and M. Elsman and M. Tofte", - title = "Combining region inference and garbage collection", - booktitle = PLDI, - address = "Berlin, Germany", - month = Jun, - year = 2002 -} - -@inproceedings{AikenFL:PLDI95, - author = "A. Aiken and M. Fahndrich and R. Levien", - title = "Better static memory management: Improving region-based analysis -of higher-order languages", - booktitle = PLDI, - month = Jun, - year = 1995 -} - -@InProceedings{CCured:PLDI03, - author = "Jeremy Condit and Mathew Harren and Scott McPeak and George C. Necula and Westley Weimer", - title = "{CC}ured in the Real World", - booktitle = PLDI, - month = Jun, - year = 2003 -} - -@InProceedings{realtimeGarbage:POPL03, - author = "D.F. Bacon and P. Cheng and V.T. Rajan", - title = "A real-time garbage collector with low overhead and consisitent utilization", - booktitle = POPL, - month = Jan, - year = 2003 -} - -@Article{Pugh:CACM, - Author = {W. Pugh}, - Title = {A practical algorithm for exact array dependence analysis}, - Journal = CACM, - Volume = {35}, - Number = 8, - Pages = {102--114}, - Month = Aug, - Year = 1992} - -@InProceedings{Sha:Simplex1, - author = "Lui Sha", - title = "Dependable System Upgrades", - booktitle = "Proceedings of IEEE Real Time System Symposium", - year = 1998 -} - -@article{Sha:Simplex, - author = "Lui Sha", - title = "Using Simplicity to Control Complexity", - journal = "IEEE Software", - month = "July/August", - year = 2001 -} - -@misc{AlephOne:StackSmash, - author = "AlephOne", - title = "Smashing the Stack for Fun and Profit", - url = "http://www.fc.net/phrack/files/p49/p49-14", - note = "http://www.fc.net/phrack/files/p49/p49-14", -} - -@Inproceedings{ShankarFMT, - author = "Umesh Shankar and Kunal Talwar and Jeffrey S. Foster and David Wagner", - title = "Detecting Format String Vulnerabilities with Type Qualifiers", - booktitle = "10th USENIX Security Symposium", - address = "Washington,D.C.", - month = Aug, - year = 2001 -} - -@InProceedings{Mibench02, - author = "Matthew R. Guthaus and Jeffrey S. Ringenberg and Dan Ernst -and Todd M. Austin and Trevor Mudge and Richard B. Brown", - title = "MiBench: A free, commercially representative embedded benchmark suite ", - booktitle = "IEEE 4th Annual Workshop on Workload Characterization", - address = "Austin, TX", - month = Dec, - year = 2001 -} - -@inproceedings{ lee97mediabench, - author = "Chunho Lee and Miodrag Potkonjak and William H. Mangione-Smith", - title = "MediaBench: A Tool for Evaluating and Synthesizing Multimedia and Communicatons Systems", - booktitle = "International Symposium on Microarchitecture", - pages = "330-335", - year = "1997", -} - -@inproceedings{ Mate, - author = "P. Levis and D. Culler", - title = "Mate: A tiny virtual machine for sensor networks", - booktitle = "International Conference on Architectural Support for Programming Languages - and Operating Systems, San Jose, CA, USA", - month = "Oct.", - year = "2002", - url = "citeseer.nj.nec.com/levis02mate.html" } - -@inproceedings{ TAL:POPL99, - author = "Karl Crary and David Walker and Greg Morrisett", - title = "Typed Memory Management in a Calculus of Capabilities", - booktitle = POPL, - address = "New York, NY", - pages = "262--275", - year = "1999", - url = "citeseer.nj.nec.com/article/crary99typed.html" } - - -@article{walker01alias, - author = "David Walker and Greg Morrisett", - title = "Alias Types for Recursive Data Structures", - journal = "Lecture Notes in Comp. Sci.", - volume = "vol. 2071", - year = "2001", - url = "citeseer.nj.nec.com/walker00alias.html" } - -%% pages = "177", - -@TechReport{LattnerAdve:DSA, - Author = {Chris Lattner and Vikram Adve}, - Title = {Data Structure Analysis: An Efficient Context-Sensitive Heap -Analysis}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2003-2340}, - Type = {Tech. Report}, - Month = {Apr}, - Year = {2003}, - Keyword = {} -} - - -@InProceedings{CSSV:PLDI03, - author = "Nurit Dor and Michael Rodeh and Mooly Sagiv", - title = "CSSV: Towards a Realistic Tool for Statically Detecting All Buffer Overflows in C", - booktitle = PLDI, - year = "2003", - address = "San Diego", - month = Jun -} - -@inproceedings{ david94lclint, - author = "Evans, David and Guttag, John and Horning, James and Tan, Yang Meng", - title = "{LCL}int: A Tool for Using Specifications to Check Code", - booktitle = "Proceedings of the {ACM} {SIGSOFT} '94 Symposium on the Foundations of Software Engineering", - pages = "87-96", - year = "1994", - url = "citeseer.nj.nec.com/evans94lclint.html" -} - -@inproceedings{ jones97backwardscompatible, - author = "Richard W. M. Jones and Paul H. J. Kelly", - title = "Backwards-Compatible Bounds Checking for Arrays and Pointers in C Programs", - booktitle = "Automated and Algorithmic Debugging", - pages = "13-26", - year = "1997", - url = "citeseer.ist.psu.edu/jones97backwardscompatible.html" } - -@inproceedings{ruwase04practical, - author = "O. Ruwase and M. Lam", - title = "A Practical Dynamic Buffer Overflow Detector", - booktitle = "In Proceedings of the Network and Distributed System Security (NDSS) Symposium", - pages = "159--169", - month = February, - year = 2004, - address = {San Diego, CA, USA}, - url = "citeseer.ist.psu.edu/ruwase04practical.html"} - -@inproceedings{Xu:FSE04, - author = {Wei Xu and Daniel C. DuVarney and R. Sekar}, - title = {An efficient and backwards-compatible transformation to ensure memory safety of {C} programs}, - booktitle = {Proc. 12th ACM SIGSOFT Symposium on Foundations of Software Engineering}, - year = {2004}, - pages = {117--126}, - location = {Newport Beach, CA, USA}, -} - -@inproceedings{YongHorwitz:FSE03, - author = {Suan Hsi Yong and Susan Horwitz}, - title = {Protecting {C} Programs from Attacks via Invalid Pointer - Dereferences}, - booktitle = "Foundations of Software Engineering", - year = {2003}, - location = {Helsinki, Finland} -} - -@inproceedings{ mccanne93bsd, - author = "Steven McCanne and Van Jacobson", - title = "The {BSD} Packet Filter: A New Architecture for User-level Packet Capture", - booktitle = "{USENIX} Winter", - pages = "259-270", - year = "1993", - url = "citeseer.nj.nec.com/mccanne92bsd.html" } - - -@article{SFI:SOSP93, - author = "Robert Wahbe and Steven Lucco and Thomas E. Anderson and Susan L. Graham", - title = "Efficient Software-Based Fault Isolation", - journal = "ACM SIGOPS Operating Systems Review", - volume = "27", - number = "5", - pages = "203--216", - year = "1993", - url = "citeseer.nj.nec.com/wahbe93efficient.html" } - -@article{CCured:TOPLAS05, - author = {George C. Necula and - Jeremy Condit and - Matthew Harren and - Scott McPeak and - Westley Weimer}, - title = {CCured: type-safe retrofitting of legacy software.}, - journal = TOPLAS, - volume-comment = {27}, - number-comment = {3}, - year = {2005}, - pages-comment = {477-526}, - ee = {http://doi.acm.org/10.1145/1065892}, - bibsource = {DBLP, http://dblp.uni-trier.de} -} - - - - -@inproceedings{DADY:ISSTA04, - author = {Nurit Dor and Stephen Adams and Manuvir Das and Zhe Yang}, - title = {Software validation via scalable path-sensitive value flow analysis}, - booktitle = {Proc. of ACM SIGSOFT international symposium on Software testing and analysis}, - year = {2004}, - isbn = {1-58113-820-2}, - location = {Boston, Massachusetts, USA}, - doi = {http://doi.acm.org/10.1145/1007512.1007515}, - } - - @inproceedings{BCC, - author = {Samuel C. Kendall}, - title = {BCC: Runtime Checking for C programs}, - booktitle = {In Proceedings of the USENIX}, - year = {1983}, - } - - @inproceedings{RTCC, - author = {Joseph L. Steffen}, - title = {Adding Run-Time Checking to the Portable C Compiler}, - booktitle = {Software: Practice and Experience}, - Month = April, - year = {1992}, - } - - @TechReport{Omega, - Author = {Wayne Kelly and Vadim Maslov and William Pugh and - Evan Rosser and Tatiana Shpeisman and David Wonnacott}, - Title = {{T}he {O}mega {L}ibrary {I}nterface {G}uide}, - Institution = {Computer Science Dept., U. Maryland, College Park}, - Month = Apr, - Year = 1996 - } - - @TechReport{SAFECode:TR04, - Author = {Dinakar Dhurjati and Sumant Kowshik and Vikram Adve}, - Title = {{SAFEC}ode: Enabling Sound Static Analysis for Weakly Typed Languages}, - Institution = {Computer Science Dept., Univ of Illinois, Urbana-Champaign}, - Year = 2004 - } - -@InProceedings{SafeC:PLDI94, - author = "Todd M. Austin and Scott E. Breach and Gurindar S. Sohi", - title = "Efficient Detection of All Pointer and Array Access Errors", - booktitle = PLDI, - month = Jun, - year = 1994 -} - - -@InProceedings{leroy04, - author = "X. Leroy", - title = " Exploiting type systems and static analyses for smart card security", - booktitle = "Cassis International Workshop", - address = "Marseille", - month = Mar, - year = 2004 -} - -@TechReport{LattnerAdve:LLVMTR1, - Author = {Chris Lattner and Vikram Adve}, - Title = {The {LLVM} {I}nstruction {S}et and {C}ompilation {S}trategy}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2002-2292}, - Type = {Tech. Report}, - Month = {Aug}, - Year = {2002}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/LLVMCompilationStrategy.pdf}, - Annote = { CATEGORY: COMPILERS: SCALAR: SYSTEMS } -} - -@TechReport{LattnerAdve:LLVMTR1, - Author = {Chris Lattner and Vikram Adve}, - Title = {The {LLVM} {I}nstruction {S}et and {C}ompilation {S}trategy}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2002-2292}, - Type = {Tech. Report}, - Month = {Aug}, - Year = {2002}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/LLVMCompilationStrategy.pdf}, - Annote = { CATEGORY: COMPILERS: SCALAR: SYSTEMS } -} - -@TechReport{LattnerAdve:DSGTR031, - Author = {Chris Lattner and Vikram Adve}, - Title = {{D}ata {S}tructure {A}nalysis: {A} {F}ast and {S}calable {C}ontext-{S}ensitive {H}eap {A}nalysis}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2003-2340}, - Type = {Tech. Report}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/2003-04-29-DataStructureAnalysisTR.html}, - Annote = { CATEGORY: COMPILERS: SCALAR: POINTER, HEAP, AND SHAPE ANALYSIS } -} - -@TechReport{LattnerAdve:PoolAllocTR041, - Author = {Chris Lattner and Vikram Adve}, - Title = {Automatic Pool Allocation: Compile-Time Control of - Data Structure Layout in the Heap}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2004-2465}, - Type = {Tech. Report}, - Annote = { CATEGORY: COMPILERS: SCALAR: POINTER, HEAP, AND SHAPE ANALYSIS } -} - -@inproceedings{Ghiya:PLDI01, - author = {Rakesh Ghiya and Daniel Lavery and David Sehr}, - title = {On the importance of points-to analysis and other memory disambiguation methods for C programs}, - booktitle = PLDI, - year = {2001}, - isbn = {1-58113-414-2}, - pages = {47--58}, - location = {Snowbird, Utah, United States}, - doi = {http://doi.acm.org/10.1145/378795.378806}, - alt_publisher = {ACM Press}, - alt_address = {New York, NY, USA}, - } - - @article{WrightSyntactic, - author = {Andrew K. Wright and Matthias Felleisen}, - title = {A syntactic approach to type soundness}, - journal = {Inf. Comput.}, - volume = {115}, - number = {1}, - year = {1994}, - issn = {0890-5401}, - pages = {38--94}, - doi = {http://dx.doi.org/10.1006/inco.1994.1093}, - alt_publisher = {Academic Press, Inc.}, - alt_address = {Duluth, MN, USA}, - } - - - - @phdthesis{Olden, - author = {Martin Christopher Carlisle}, - title = {Olden: parallelizing programs with dynamic data structures on distributed-memory machines}, - year = {1996}, - order_no = {UMI Order No. GAX96-27387}, - publisher = {Princeton University}, - } - - @TechReport{Omega, - Author = {Wayne Kelly and Vadim Maslov and William Pugh and - Evan Rosser and Tatiana Shpeisman and David Wonnacott}, - Title = {{T}he {O}mega {L}ibrary {I}nterface {G}uide}, - Institution = {Computer Science Dept., U. Maryland, College Park}, - Month = Apr, - Year = 1996 - } - -@article{Loginov:FASE01, - author = "Alexey Loginov and Suan Hsi Yong and Susan Horwitz and Thomas Reps", - title = "Debugging via Run-Time Type Checking", - journal = "Lecture Notes in Computer Science", - year = "2001", - url = "citeseer.ist.psu.edu/loginov01debugging.html" } - - -@Article{PatilFisher:Debugging97, - author = "Harish Patil and Charles Fischer", - title = "Low-cost, Concurrent Checking of Pointer and Array Accesses - in C Programs", - journal = "Software--Practice and Experience", - volume = "27", - number = "1", - pages = "87-110", - year = "1997", - url = "citeseer.ist.psu.edu/64347.html" -} - - -@InProceedings{OORegions:PLDI04, - author = {Wei-Ngan Chin and Florin Craciun and Shengchao Qin and - Martin Rinard}, - title = {Region Inference for an Object-Oriented Language}, - booktitle = PLDI, - year = {2004}, - month = {June}, -} - - - -@inproceedings{PatilFisher:Debugging95, - author = "Harish Patil and Charles N. Fischer", - title = "Efficient Run-time Monitoring Using Shadow Processing", - booktitle = "Automated and Algorithmic Debugging", - pages = "119-132", - year = "1995", - url = "citeseer.ist.psu.edu/patil95efficient.html" } - -@InProceedings{purify, - author = "Reed Hastings and Bob Joyce", - title = "Purify: Fast detection of memory leaks and access errors", - booktitle = "{Winter USENIX}", - year = 1992 -} - -@misc{valgrind, - author = "J. Seward", - title = "Valgrind, an open-source memory debugger for x86-GNU/Linux", - url = "http://developer.kde.org/~sewardj/", -} - -@InProceedings{leroy04, - author = "X. Leroy", - title = " Exploiting type systems and static analyses for smart card security", - booktitle = "Cassis International Workshop", - address = "Marseille", - month = Mar, - year = 2004 -} - -@InProceedings{ Henzinger:BLAST03, - author = "T. Henzinger and R. Jhala and R. Majumdar and G. Sutre", - title = "Software verification with {B}last", - booktitle = "Tenth International Workshop on Model Checking of Software (SPIN)", - pages = "235--239", - year = "2003", - url = "citeseer.ist.psu.edu/henzinger03software.html" -} - -@TechReport{LattnerAdve:LLVMTR1, - Author = {Chris Lattner and Vikram Adve}, - Title = {The {LLVM} {I}nstruction {S}et and {C}ompilation {S}trategy}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2002-2292}, - Type = {Tech. Report}, - Month = {Aug}, - Year = {2002}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/LLVMCompilationStrategy.pdf}, - Annote = { CATEGORY: COMPILERS: SCALAR: SYSTEMS } -} - -@TechReport{LattnerAdve:LLVMTR1, - Author = {Chris Lattner and Vikram Adve}, - Title = {The {LLVM} {I}nstruction {S}et and {C}ompilation {S}trategy}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2002-2292}, - Type = {Tech. Report}, - Month = {Aug}, - Year = {2002}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/LLVMCompilationStrategy.pdf}, - Annote = { CATEGORY: COMPILERS: SCALAR: SYSTEMS } -} - -@TechReport{LattnerAdve:DSGTR031, - Author = {Chris Lattner and Vikram Adve}, - Title = {{D}ata {S}tructure {A}nalysis: {A} {F}ast and {S}calable {C}ontext-{S}ensitive {H}eap {A}nalysis}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2003-2340}, - Type = {Tech. Report}, - URL = {\tt http://llvm.cs.uiuc.edu/pubs/2003-04-29-DataStructureAnalysisTR.html}, - Annote = { CATEGORY: COMPILERS: SCALAR: POINTER, HEAP, AND SHAPE ANALYSIS } -} - -@TechReport{LattnerAdve:PoolAllocTR041, - Author = {Chris Lattner and Vikram Adve}, - Title = {Automatic Pool Allocation: Compile-Time Control of - Data Structure Layout in the Heap}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2004-2465}, - Type = {Tech Report}, - Annote = { CATEGORY: COMPILERS: SCALAR: POINTER, HEAP, AND SHAPE ANALYSIS } -} - -@inproceedings{zitser, - author = {Misha Zitser and Richard Lippmann and Tim Leek}, - title = {Testing static analysis tools using exploitable buffer overflows from open source code}, - booktitle = {Proceedings of the 12th ACM SIGSOFT symposium on Foundations of software engineering}, - year = {2004}, - isbn = {1-58113-855-5}, - location = {Newport Beach, CA, USA}, - doi = {http://doi.acm.org/10.1145/1029894.1029911}, - } - - @inproceedings{zhivich, - author = { Michael Zhivich and Tim Leek and Richard Lippmann}, - title = { Dynamic Buffer Overflow Detection }, - booktitle = {BUGS : Workshop on the Evaluation of Software Defect Detection Tools}, - year = {2005}, - location = {Chicago, IL, USA}, - } - - - -@inproceedings{SAFECode:PLDI06, - author = { Dinakar Dhurjati and Sumant Kowshik and Vikram Adve}, - title = { {SAFEC}ode: Enforcing Alias Analysis for Weakly Typed Languages}, - booktitle = PLDI, - year = {2006}, - month = {June}, - address = {Ottawa, Canada}, - pages = {144--157}, -} - -@inproceedings{DA:ICSE06, - author = {Dinakar Dhurjati and Vikram Adve}, - title = {Backwards-Compatible Array Bounds Checking for {C} with Very Low Overhead}, - booktitle = ICSE, - month = May, - year = 2006, - address = {Shanghai, China}, - pages = {162--171}, -} - - -@inproceedings{Jha:CCS, - author = {Vinod Ganapathy and Somesh Jha and David Chandler and David Melski and David Vitek}, - title = {Buffer overrun detection using linear programming and static analysis}, - booktitle = {Proceedings of the 10th ACM conference on Computer and communications security}, - year = {2003}, - isbn = {1-58113-738-9}, - location = {Washington D.C., USA}, - doi = {http://doi.acm.org/10.1145/948109.948155}, - alt_address = {New York, NY, USA} - } - - @article{ARCHER, - author = {Yichen Xie and Andy Chou and Dawson Engler}, - title = {ARCHER: using symbolic, path-sensitive analysis to detect memory access errors}, - journal = {SIGSOFT Softw. Eng. Notes}, - volume = {28}, - number = {5}, - year = {2003}, - issn = {0163-5948}, - pages = {327--336}, - doi = {http://doi.acm.org/10.1145/949952.940115}, - alt_publisher = {ACM Press}, - alt_address = {New York, NY, USA}, - } - - @inproceedings{Rugina:PLDI00, - author = {Radu Rugina and Martin Rinard}, - title = {Symbolic bounds analysis of pointers, array indices, and accessed memory regions}, - booktitle = PLDI, - year = {2000}, - isbn = {1-58113-199-2}, - pages = {182--195}, - location = {Vancouver, British Columbia, Canada}, - doi = {http://doi.acm.org/10.1145/349299.349325}, - alt_publisher = {ACM Press}, - alt_address = {New York, NY, USA}, - } - -@inproceedings{Parafait:FSE10, - author = {Li, Lian and Cifuentes, Cristina and Keynes, Nathan}, - title = {Practical and effective symbolic analysis for buffer overflow detection}, - booktitle = {Proceedings of the eighteenth ACM SIGSOFT international symposium on Foundations of software engineering}, - series = {FSE '10}, - year = {2010}, - isbn = {978-1-60558-791-2}, - location = {Santa Fe, New Mexico, USA}, - pages = {317--326}, - numpages = {10}, - url = {http://doi.acm.org/10.1145/1882291.1882338}, - doi = {http://doi.acm.org/10.1145/1882291.1882338}, - acmid = {1882338}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {demand-driven, scalability, static program analysis}, -} - -@TechReport{DKA:SAFECodeTR05, - Author = {Dinakar Dhurjati and Sumant Kowshik and Vikram Adve}, - Title = {Enforcing Alias Analysis for Weakly Typed Languages}, - Institution = {Computer Science Dept., - Univ. of Illinois at Urbana-Champaign}, - Number = {UIUCDCS-R-2005-2657}, - Type = {Tech Report}, - Month = {Oct}, - Year = {2005}, - Keyword = {}, - note = {See {\tt http://safecode.cs.uiuc.edu/}} -} -@inproceedings{Cyclone:ISMM04, - author = {Michael Hicks and Greg Morrisett and Dan Grossman and Trevor Jim}, - title = {Experience with safe manual memory-management in {C}yclone}, - booktitle = {Proc. of the 4th international symposium on Memory management (ISMM)}, - year = {2004}, - isbn = {1-58113-945-4}, - location = {Vancouver, BC, Canada}, - doi = {http://doi.acm.org/10.1145/1029873.1029883}, - } - -@INPROCEEDINGS{MemSafe:SCAM10, - author={Simpson, M.S. and Barua, R.K.}, - booktitle={Source Code Analysis and Manipulation (SCAM), 2010 10th IEEE Working Conference on}, - title={MemSafe: Ensuring the Spatial and Temporal Memory Safety of C at Runtime}, - year={2010}, - month={sept.}, - volume={}, - number={}, - pages={199 -208}, - keywords={C programs;MemSafe;manual code modifications;memory access violations;memory error detection;metadata representation;spatial memory safety;temporal memory safety;C language;meta data;}, - doi={10.1109/SCAM.2010.15}, - ISSN={} -} - - @techreport{TofteMLkit, - author = "Mads Tofte and Lars Birkedal and Martin Elsman and Niels Hallenberg and Tommy H{\o}jfeld Olesen and Peter Sestoft and Peter Bertelsen", - title = "Programming with {R}egions in the {ML} {K}it", - number = "DIKU-TR-97/12", - year = "1997", - url = "citeseer.ist.psu.edu/tofte97programming.html" } - -@inproceedings{REAPS:OOPSLA02, - author = "E. Berger and B. Zorn and K. McKinley", - title = "Reconsidering custom memory allocation", - booktitle = {Proc. Conference on Object-Oriented - Programming: Systems, Languages, and Applications }, - year = {2002}, - url = {citeseer.ist.psu.edu/article/berger04reconsidering.html}, - } - -@misc{LinuxMRemap:Torvalds, - author = "Linus Torvalds", - title = "mremap feature discussion, See {\tt http://lkml.org/lkml/2004/1/12/265}" -} - -@misc{CVSdoublefree, - author = "Igor Dobrovitski", - title = "Exploit for CVS double free() for linux pserver. {\tt http://seclists.org/lists/bugtraq/ - 2003/Feb/0042.html}", - year = {2003}, - month = {Feb} -} - -@misc{Mysqldoublefree, - title = "{M}y{SQL} Double Free Heap Corruption Vulnerability. {\tt http: - //www.securityfocus.com/bid/6718/info}", - year = {2003}, - month = {Jan} -} - -@misc{Kerberosdoublefree, - title = "{MITKRB5-SA}: double free vulnerabilities. {\tt http: - //seclists.org/lists/bugtraq/2004/Sep/ - 0015.html}", - year = {2004}, - month = {Aug} -} - -@misc{EFENCE, - author = "Bruce Perens", - title = "Electric fence \textsl{malloc} Debugger", - HowPublished = "{\tt http:// - perens.com/FreeSoftware/ElectricFence/ }" -} - -@misc{PageHeap, - author = "Microsoft", - title = "How to use {P}ageheap.exe in {W}indows {XP} and {W}indows 2000", - HowPublished = "{\tt http://support. - microsoft.com/?kbid=286470 }" -} - -@MastersThesis{Younan:2003:OCP, - author = "Yves Younan", - title = "An overview of common programming security - vulnerabilities and possible solutions", - school = "Vrije Universiteit Brussel", - year = "2003" -} - -@InProceedings{DA:DSN06, - author = "Dinakar Dhurjati and Vikram Adve", - title = "Efficiently Detecting All Dangling Pointer Uses in Production Servers", - booktitle = "Proc. Int'l Conf. on Dependable Systems and Networks (DSN)", - month = Jun, - year = 2006, - address = {Philadelphia, USA}, -} - - -@inproceedings{HR:POPL05, - author = {Brian Hackett and Radu Rugina}, - title = {Region-based shape analysis with tracked locations}, - booktitle = {POPL '05: Proceedings of the 32nd ACM SIGPLAN-SIGACT symposium on Principles of programming languages}, - year = {2005}, - isbn = {1-58113-830-X}, - pages = {310--323}, - location = {Long Beach, California, USA}, - doi = {http://doi.acm.org/10.1145/1040305.1040331}, - alt_publisher = {ACM Press}, - alt_address = {New York, NY, USA}, - } - - @inproceedings{HDWY:ICSE06, - author = {Brian Hackett and Manuvir Das and Daniel Wang and Zhe Yang}, - title = {Modular Checking for Buffer Overflows in the Large}, - booktitle = ICSE, - address = {Shanghai, China}, - year = {2006}, -} - - @inproceedings{DieHard:PLDI06, - author = {Emery Berger and Ben Zorn}, - title = {DieHard: Probabilistic Memory Safety for Unsafe Languages}, - booktitle = PLDI, - year = {2006}, - location = {Ottawa, Canada}, - } - - - -@InProceedings{SPIN:SOSP95, - author = {Brian Bershad and Stefan Savage and Przemyslaw Pardyak and Emin Gun Sirer and David Becker and Marc Fiuczynski and Craig Chambers and Susan Eggers}, - title = {Extensibility, {S}afety and {P}erformance in the {SPIN} {O}perating {S}ystem}, - booktitle = SOSP, - pages = {267--284}, - year = {1995}, - address = {Copper Mountain, CO, USA}, -} - - -@InProceedings{PacketFilter:SOSP87, - author = {Jeffrey C. Mogul and Richard F. Rashid and Michael J. Accetta}, - title = {The packet filter: An efficient mechanism for user-level network code}, - booktitle = SOSP, - pages = {39-51,}, - year = {1987}, - address = {Austin, TX}, -} - - -@article{Nooks:TOCS05, - author = {Michael M. Swift and Brian N. Bershad and Henry M. Levy}, - title = {Improving the reliability of commodity operating systems}, - journal = TOCS, - volume = {23}, - number = {1}, - year = {2005}, - issn = {0734-2071}, - pages = {77--110}, - doi = {http://doi.acm.org/10.1145/1047915.1047919}, - alt_publisher-comment = {ACM Press}, - alt_address-comment = {New York, NY, USA}, -} - -@InProceedings{NooksRecovery:OSDI04, - author = "Michael M. Swift and Muthukaruppan Annamalai and Brian N. Bershad and Henry M. Levy", - title = "Recovering device drivers", - booktitle = OSDI, - address-comment = "San Francisco, CA, USA", - month = Dec, - year = "2004", - url = "citeseer.ist.psu.edu/swift04recovering.html" -} - -@InProceedings{OKE:OpenArch02, - author = {Herbert Bos and Bart Samwel}, - title = {Safe {K}ernel {P}rogramming in the {OKE}}, - booktitle = {Proceedings of OpenArch'2002}, - year = {2002}, - month = {June}, - address = {New York, NY}, -} - -@InProceedings{XTP:SOSP03, - author = {Parveen Patel and Andrew Whitaker and David Wetherall and - Jay Lepreau and Tim Stack}, - title = {Upgrading Transport Protocols using Untrusted Mobile Code}, - booktitle = SOSP, - year = {2003}, - address = {Bolton Landing, USA}, - month = {October}, -} - -@InProceedings{Mondrix:SOSP05, - author = {Emmett Witchel and Junghwan Rhee and Krste Asanovic}, - title = {Mondrix: Memory Isolation for Linux using Mondriaan Memory Protection}, - booktitle = SOSP, - year = {2005}, - address = {Brighton, UK}, - month = {October}, - pages = {31--44} -} - -@InProceedings{Mondrian:ASPLOS02, - author = {Emmett Witchel and Josh Cates and Krste Asanovic.}, - title = {Mondrian memory protection}, - booktitle = ASPLOS, - year = {2002}, - address = {San Jose, CA, USA}, - month = {October}, - pages = {304--316} -} - - -@phdthesis{ PCC:NeculaThesis98, - author = "George C. Necula", - title = "Compiling with Proofs", - year = "1998", - school = "Carnegie Mellon University", - url = "citeseer.ist.psu.edu/necula98compiling.html" } - -@inproceedings{TALx86:99, - author = "Greg Morrisett and Karl Crary and Neal Glew and Dan Grossman and Richard Samuels and Frederick Smith and David Walker and Stephanie Weirich and Steve Zdancewic", - title = "{TALx86}: {A} Realistic Typed Assembly Language", - booktitle = WCSSS, - pages = "25--35", - year = 1999, - month = may, - address = "Atlanta, GA, USA", -} - -@inproceedings{AppelFelty:POPL00, - author = {Andrew W. Appel and Amy P. Felty}, - title = "{A Semantic Model of Types and Machine Instructions for Proof-Carrying Code}", - booktitle = POPL, - pages = "243--253", - alt_address-comment = {Boston, MA}, - alt_publisher-comment = {ACM Press}, - year = {2000}, - month = {Jan}, - url = {citeseer.ist.psu.edu/appel00semantic.html} } - - -@inproceedings{SplayTrees:STOC83, - author = {Daniel Dominic Sleator and Robert Endre Tarjan}, - title = {Self-adjusting binary trees}, - booktitle = STOC, - year = {1983}, - isbn = {0-89791-099-0}, - pages-comment = {235--245}, - doi-comment = {http://doi.acm.org/10.1145/800061.808752}, - alt_publishe-comment = {ACM Press}, - alt_address-comment = {New York, NY, USA}, -} - -@INPROCEEDINGS{RTL:DAC02, -author={Brinkmann, R. and Drechsler, R.}, -booktitle={Design Automation Conference, 2002. Proceedings of ASP-DAC 2002. 7th Asia and South Pacific and the 15th International Conference on VLSI Design. Proceedings.}, -title={RTL-datapath verification using integer linear programming}, -year={2002}, -volume={}, -number={}, -pages={741 -746}, -keywords={HDL operators;RTL-datapath verification;VHDL;Verilog-HDL designs;arithmetic properties;bitvector arithmetic;decision procedure;formal verification;hardware description language;integer linear arithmetic constraints;integer linear programming;modulo semantics;register transfer level;digital arithmetic;formal verification;hardware description languages;integer programming;linear programming;logic CAD;}, -doi={10.1109/ASPDAC.2002.995022}, -ISSN={} -} - -@inproceedings{BaggyBoundsChecking:UsenixSec09, - author = {Akritidis, Periklis and Costa, Manuel and Castro, Miguel and -Hand, Steven}, - title = {Baggy bounds checking: an efficient and backwards-compatible -defense against out-of-bounds errors}, - booktitle = {Proceedings of the 18th conference on USENIX security -symposium}, - series = {SSYM'09}, - year = {2009}, - location = {Montreal, Canada}, - pages = {51--66}, - numpages = {16}, - url = {http://dl.acm.org/citation.cfm?id=1855768.1855772}, - acmid = {1855772}, - publisher = {USENIX Association}, - address = {Berkeley, CA, USA}, -} - diff --git a/paper/sigplanconf.cls b/paper/sigplanconf.cls deleted file mode 100644 index 2dcfeb64efa69aec516eeac9ac2138483136ad77..0000000000000000000000000000000000000000 --- a/paper/sigplanconf.cls +++ /dev/null @@ -1,1311 +0,0 @@ -%----------------------------------------------------------------------------- -% -% LaTeX Class/Style File -% -% Name: sigplanconf.cls -% -% Purpose: A LaTeX 2e class file for SIGPLAN conference proceedings. -% This class file supercedes acm_proc_article-sp, -% sig-alternate, and sigplan-proc. -% -% Author: Paul C. Anagnostopoulos -% Windfall Software -% 978 371-2316 -% paul [atsign] windfall.com -% -% Created: 12 September 2004 -% -% Revisions: See end of file. -% -% This work is licensed under the Creative Commons Attribution License. -% To view a copy of this license, visit -% http://creativecommons.org/licenses/by/3.0/ -% or send a letter to Creative Commons, 171 2nd Street, Suite 300, -% San Francisco, California, 94105, U.S.A. -% -%----------------------------------------------------------------------------- - - -\NeedsTeXFormat{LaTeX2e}[1995/12/01] -\ProvidesClass{sigplanconf}[2013/07/02 v2.8 ACM SIGPLAN Proceedings] - -% The following few pages contain LaTeX programming extensions adapted -% from the ZzTeX macro package. - -% Token Hackery -% ----- ------- - - -\def \@expandaftertwice {\expandafter\expandafter\expandafter} -\def \@expandafterthrice {\expandafter\expandafter\expandafter\expandafter - \expandafter\expandafter\expandafter} - -% This macro discards the next token. - -\def \@discardtok #1{}% token - -% This macro removes the `pt' following a dimension. - -{\catcode `\p = 12 \catcode `\t = 12 - -\gdef \@remover #1pt{#1} - -} % \catcode - -% This macro extracts the contents of a macro and returns it as plain text. -% Usage: \expandafter\@defof \meaning\macro\@mark - -\def \@defof #1:->#2\@mark{#2} - -% Control Sequence Names -% ------- -------- ----- - - -\def \@name #1{% {\tokens} - \csname \expandafter\@discardtok \string#1\endcsname} - -\def \@withname #1#2{% {\command}{\tokens} - \expandafter#1\csname \expandafter\@discardtok \string#2\endcsname} - -% Flags (Booleans) -% ----- ---------- - -% The boolean literals \@true and \@false are appropriate for use with -% the \if command, which tests the codes of the next two characters. - -\def \@true {TT} -\def \@false {FL} - -\def \@setflag #1=#2{\edef #1{#2}}% \flag = boolean - -% IF and Predicates -% -- --- ---------- - -% A "predicate" is a macro that returns \@true or \@false as its value. -% Such values are suitable for use with the \if conditional. For example: -% -% \if \@oddp{\x} <then-clause> \else <else-clause> \fi - -% A predicate can be used with \@setflag as follows: -% -% \@setflag \flag = {<predicate>} - -% Here are the predicates for TeX's repertoire of conditional -% commands. These might be more appropriately interspersed with -% other definitions in this module, but what the heck. -% Some additional "obvious" predicates are defined. - -\def \@eqlp #1#2{\ifnum #1 = #2\@true \else \@false \fi} -\def \@neqlp #1#2{\ifnum #1 = #2\@false \else \@true \fi} -\def \@lssp #1#2{\ifnum #1 < #2\@true \else \@false \fi} -\def \@gtrp #1#2{\ifnum #1 > #2\@true \else \@false \fi} -\def \@zerop #1{\ifnum #1 = 0\@true \else \@false \fi} -\def \@onep #1{\ifnum #1 = 1\@true \else \@false \fi} -\def \@posp #1{\ifnum #1 > 0\@true \else \@false \fi} -\def \@negp #1{\ifnum #1 < 0\@true \else \@false \fi} -\def \@oddp #1{\ifodd #1\@true \else \@false \fi} -\def \@evenp #1{\ifodd #1\@false \else \@true \fi} -\def \@rangep #1#2#3{\if \@orp{\@lssp{#1}{#2}}{\@gtrp{#1}{#3}}\@false \else - \@true \fi} -\def \@tensp #1{\@rangep{#1}{10}{19}} - -\def \@dimeqlp #1#2{\ifdim #1 = #2\@true \else \@false \fi} -\def \@dimneqlp #1#2{\ifdim #1 = #2\@false \else \@true \fi} -\def \@dimlssp #1#2{\ifdim #1 < #2\@true \else \@false \fi} -\def \@dimgtrp #1#2{\ifdim #1 > #2\@true \else \@false \fi} -\def \@dimzerop #1{\ifdim #1 = 0pt\@true \else \@false \fi} -\def \@dimposp #1{\ifdim #1 > 0pt\@true \else \@false \fi} -\def \@dimnegp #1{\ifdim #1 < 0pt\@true \else \@false \fi} - -\def \@vmodep {\ifvmode \@true \else \@false \fi} -\def \@hmodep {\ifhmode \@true \else \@false \fi} -\def \@mathmodep {\ifmmode \@true \else \@false \fi} -\def \@textmodep {\ifmmode \@false \else \@true \fi} -\def \@innermodep {\ifinner \@true \else \@false \fi} - -\long\def \@codeeqlp #1#2{\if #1#2\@true \else \@false \fi} - -\long\def \@cateqlp #1#2{\ifcat #1#2\@true \else \@false \fi} - -\long\def \@tokeqlp #1#2{\ifx #1#2\@true \else \@false \fi} -\long\def \@xtokeqlp #1#2{\expandafter\ifx #1#2\@true \else \@false \fi} - -\long\def \@definedp #1{% - \expandafter\ifx \csname \expandafter\@discardtok \string#1\endcsname - \relax \@false \else \@true \fi} - -\long\def \@undefinedp #1{% - \expandafter\ifx \csname \expandafter\@discardtok \string#1\endcsname - \relax \@true \else \@false \fi} - -\def \@emptydefp #1{\ifx #1\@empty \@true \else \@false \fi}% {\name} - -\let \@emptylistp = \@emptydefp - -\long\def \@emptyargp #1{% {#n} - \@empargp #1\@empargq\@mark} -\long\def \@empargp #1#2\@mark{% - \ifx #1\@empargq \@true \else \@false \fi} -\def \@empargq {\@empargq} - -\def \@emptytoksp #1{% {\tokenreg} - \expandafter\@emptoksp \the#1\@mark} - -\long\def \@emptoksp #1\@mark{\@emptyargp{#1}} - -\def \@voidboxp #1{\ifvoid #1\@true \else \@false \fi} -\def \@hboxp #1{\ifhbox #1\@true \else \@false \fi} -\def \@vboxp #1{\ifvbox #1\@true \else \@false \fi} - -\def \@eofp #1{\ifeof #1\@true \else \@false \fi} - - -% Flags can also be used as predicates, as in: -% -% \if \flaga <then-clause> \else <else-clause> \fi - - -% Now here we have predicates for the common logical operators. - -\def \@notp #1{\if #1\@false \else \@true \fi} - -\def \@andp #1#2{\if #1% - \if #2\@true \else \@false \fi - \else - \@false - \fi} - -\def \@orp #1#2{\if #1% - \@true - \else - \if #2\@true \else \@false \fi - \fi} - -\def \@xorp #1#2{\if #1% - \if #2\@false \else \@true \fi - \else - \if #2\@true \else \@false \fi - \fi} - -% Arithmetic -% ---------- - -\def \@increment #1{\advance #1 by 1\relax}% {\count} - -\def \@decrement #1{\advance #1 by -1\relax}% {\count} - -% Options -% ------- - - -\@setflag \@authoryear = \@false -\@setflag \@blockstyle = \@false -\@setflag \@copyrightwanted = \@true -\@setflag \@explicitsize = \@false -\@setflag \@mathtime = \@false -\@setflag \@natbib = \@true -\@setflag \@ninepoint = \@true -\newcount{\@numheaddepth} \@numheaddepth = 3 -\@setflag \@onecolumn = \@false -\@setflag \@preprint = \@false -\@setflag \@reprint = \@false -\@setflag \@tenpoint = \@false -\@setflag \@times = \@false - -% Note that all the dangerous article class options are trapped. - -\DeclareOption{9pt}{\@setflag \@ninepoint = \@true - \@setflag \@explicitsize = \@true} - -\DeclareOption{10pt}{\PassOptionsToClass{10pt}{article}% - \@setflag \@ninepoint = \@false - \@setflag \@tenpoint = \@true - \@setflag \@explicitsize = \@true} - -\DeclareOption{11pt}{\PassOptionsToClass{11pt}{article}% - \@setflag \@ninepoint = \@false - \@setflag \@explicitsize = \@true} - -\DeclareOption{12pt}{\@unsupportedoption{12pt}} - -\DeclareOption{a4paper}{\@unsupportedoption{a4paper}} - -\DeclareOption{a5paper}{\@unsupportedoption{a5paper}} - -\DeclareOption{authoryear}{\@setflag \@authoryear = \@true} - -\DeclareOption{b5paper}{\@unsupportedoption{b5paper}} - -\DeclareOption{blockstyle}{\@setflag \@blockstyle = \@true} - -\DeclareOption{cm}{\@setflag \@times = \@false} - -\DeclareOption{computermodern}{\@setflag \@times = \@false} - -\DeclareOption{executivepaper}{\@unsupportedoption{executivepaper}} - -\DeclareOption{indentedstyle}{\@setflag \@blockstyle = \@false} - -\DeclareOption{landscape}{\@unsupportedoption{landscape}} - -\DeclareOption{legalpaper}{\@unsupportedoption{legalpaper}} - -\DeclareOption{letterpaper}{\@unsupportedoption{letterpaper}} - -\DeclareOption{mathtime}{\@setflag \@mathtime = \@true} - -\DeclareOption{natbib}{\@setflag \@natbib = \@true} - -\DeclareOption{nonatbib}{\@setflag \@natbib = \@false} - -\DeclareOption{nocopyrightspace}{\@setflag \@copyrightwanted = \@false} - -\DeclareOption{notitlepage}{\@unsupportedoption{notitlepage}} - -\DeclareOption{numberedpars}{\@numheaddepth = 4} - -\DeclareOption{numbers}{\@setflag \@authoryear = \@false} - -%%%\DeclareOption{onecolumn}{\@setflag \@onecolumn = \@true} - -\DeclareOption{preprint}{\@setflag \@preprint = \@true} - -\DeclareOption{reprint}{\@setflag \@reprint = \@true} - -\DeclareOption{times}{\@setflag \@times = \@true} - -\DeclareOption{titlepage}{\@unsupportedoption{titlepage}} - -\DeclareOption{twocolumn}{\@setflag \@onecolumn = \@false} - -\DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}} - -\ExecuteOptions{9pt,indentedstyle,times} -\@setflag \@explicitsize = \@false -\ProcessOptions - -\if \@onecolumn - \if \@notp{\@explicitsize}% - \@setflag \@ninepoint = \@false - \PassOptionsToClass{11pt}{article}% - \fi - \PassOptionsToClass{twoside,onecolumn}{article} -\else - \PassOptionsToClass{twoside,twocolumn}{article} -\fi -\LoadClass{article} - -\def \@unsupportedoption #1{% - \ClassError{proc}{The standard '#1' option is not supported.}} - -% This can be used with the 'reprint' option to get the final folios. - -\def \setpagenumber #1{% - \setcounter{page}{#1}} - -\AtEndDocument{\label{sigplanconf@finalpage}} - -% Utilities -% --------- - - -\newcommand{\setvspace}[2]{% - #1 = #2 - \advance #1 by -1\parskip} - -% Document Parameters -% -------- ---------- - - -% Page: - -\setlength{\hoffset}{-1in} -\setlength{\voffset}{-1in} - -\setlength{\topmargin}{1in} -\setlength{\headheight}{0pt} -\setlength{\headsep}{0pt} - -\if \@onecolumn - \setlength{\evensidemargin}{.75in} - \setlength{\oddsidemargin}{.75in} -\else - \setlength{\evensidemargin}{.75in} - \setlength{\oddsidemargin}{.75in} -\fi - -% Text area: - -\newdimen{\standardtextwidth} -\setlength{\standardtextwidth}{42pc} - -\if \@onecolumn - \setlength{\textwidth}{40.5pc} -\else - \setlength{\textwidth}{\standardtextwidth} -\fi - -\setlength{\topskip}{8pt} -\setlength{\columnsep}{2pc} -\setlength{\textheight}{54.5pc} - -% Running foot: - -\setlength{\footskip}{30pt} - -% Paragraphs: - -\if \@blockstyle - \setlength{\parskip}{5pt plus .1pt minus .5pt} - \setlength{\parindent}{0pt} -\else - \setlength{\parskip}{0pt} - \setlength{\parindent}{12pt} -\fi - -\setlength{\lineskip}{.5pt} -\setlength{\lineskiplimit}{\lineskip} - -\frenchspacing -\pretolerance = 400 -\tolerance = \pretolerance -\setlength{\emergencystretch}{5pt} -\clubpenalty = 10000 -\widowpenalty = 10000 -\setlength{\hfuzz}{.5pt} - -% Standard vertical spaces: - -\newskip{\standardvspace} -\setvspace{\standardvspace}{5pt plus 1pt minus .5pt} - -% Margin paragraphs: - -\setlength{\marginparwidth}{36pt} -\setlength{\marginparsep}{2pt} -\setlength{\marginparpush}{8pt} - - -\setlength{\skip\footins}{8pt plus 3pt minus 1pt} -\setlength{\footnotesep}{9pt} - -\renewcommand{\footnoterule}{% - \hrule width .5\columnwidth height .33pt depth 0pt} - -\renewcommand{\@makefntext}[1]{% - \noindent \@makefnmark \hspace{1pt}#1} - -% Floats: - -\setcounter{topnumber}{4} -\setcounter{bottomnumber}{1} -\setcounter{totalnumber}{4} - -\renewcommand{\fps@figure}{tp} -\renewcommand{\fps@table}{tp} -\renewcommand{\topfraction}{0.90} -\renewcommand{\bottomfraction}{0.30} -\renewcommand{\textfraction}{0.10} -\renewcommand{\floatpagefraction}{0.75} - -\setcounter{dbltopnumber}{4} - -\renewcommand{\dbltopfraction}{\topfraction} -\renewcommand{\dblfloatpagefraction}{\floatpagefraction} - -\setlength{\floatsep}{18pt plus 4pt minus 2pt} -\setlength{\textfloatsep}{18pt plus 4pt minus 3pt} -\setlength{\intextsep}{10pt plus 4pt minus 3pt} - -\setlength{\dblfloatsep}{18pt plus 4pt minus 2pt} -\setlength{\dbltextfloatsep}{20pt plus 4pt minus 3pt} - -% Miscellaneous: - -\errorcontextlines = 5 - -% Fonts -% ----- - - -\if \@times - \renewcommand{\rmdefault}{ptm}% - \if \@mathtime - \usepackage[mtbold,noTS1]{mathtime}% - \else -%%% \usepackage{mathptm}% - \fi -\else - \relax -\fi - -\if \@ninepoint - -\renewcommand{\normalsize}{% - \@setfontsize{\normalsize}{9pt}{10pt}% - \setlength{\abovedisplayskip}{5pt plus 1pt minus .5pt}% - \setlength{\belowdisplayskip}{\abovedisplayskip}% - \setlength{\abovedisplayshortskip}{3pt plus 1pt minus 2pt}% - \setlength{\belowdisplayshortskip}{\abovedisplayshortskip}} - -\renewcommand{\tiny}{\@setfontsize{\tiny}{5pt}{6pt}} - -\renewcommand{\scriptsize}{\@setfontsize{\scriptsize}{7pt}{8pt}} - -\renewcommand{\small}{% - \@setfontsize{\small}{8pt}{9pt}% - \setlength{\abovedisplayskip}{4pt plus 1pt minus 1pt}% - \setlength{\belowdisplayskip}{\abovedisplayskip}% - \setlength{\abovedisplayshortskip}{2pt plus 1pt}% - \setlength{\belowdisplayshortskip}{\abovedisplayshortskip}} - -\renewcommand{\footnotesize}{% - \@setfontsize{\footnotesize}{8pt}{9pt}% - \setlength{\abovedisplayskip}{4pt plus 1pt minus .5pt}% - \setlength{\belowdisplayskip}{\abovedisplayskip}% - \setlength{\abovedisplayshortskip}{2pt plus 1pt}% - \setlength{\belowdisplayshortskip}{\abovedisplayshortskip}} - -\renewcommand{\large}{\@setfontsize{\large}{11pt}{13pt}} - -\renewcommand{\Large}{\@setfontsize{\Large}{14pt}{18pt}} - -\renewcommand{\LARGE}{\@setfontsize{\LARGE}{18pt}{20pt}} - -\renewcommand{\huge}{\@setfontsize{\huge}{20pt}{25pt}} - -\renewcommand{\Huge}{\@setfontsize{\Huge}{25pt}{30pt}} - -\else\if \@tenpoint - -\relax - -\else - -\relax - -\fi\fi - -% Abstract -% -------- - - -\renewenvironment{abstract}{% - \section*{Abstract}% - \normalsize}{% - } - -% Bibliography -% ------------ - - -\renewenvironment{thebibliography}[1] - {\section*{\refname - \@mkboth{\MakeUppercase\refname}{\MakeUppercase\refname}}% - \list{\@biblabel{\@arabic\c@enumiv}}% - {\settowidth\labelwidth{\@biblabel{#1}}% - \leftmargin\labelwidth - \advance\leftmargin\labelsep - \@openbib@code - \usecounter{enumiv}% - \let\p@enumiv\@empty - \renewcommand\theenumiv{\@arabic\c@enumiv}}% - \bibfont - \clubpenalty4000 - \@clubpenalty \clubpenalty - \widowpenalty4000% - \sfcode`\.\@m} - {\def\@noitemerr - {\@latex@warning{Empty `thebibliography' environment}}% - \endlist} - -\if \@natbib - -\if \@authoryear - \typeout{Using natbib package with 'authoryear' citation style.} - \usepackage[authoryear,square]{natbib} - \bibpunct{(}{)}{;}{a}{}{,} % Change fences to parentheses; - % citation separator to semicolon; - % eliminate comma between author and year. - \let \cite = \citep -\else - \typeout{Using natbib package with 'numbers' citation style.} - \usepackage[numbers,sort&compress,square]{natbib} -\fi -\setlength{\bibsep}{3pt plus .5pt minus .25pt} - -\fi - -\def \bibfont {\small} - -% Categories -% ---------- - - -\@setflag \@firstcategory = \@true - -\newcommand{\category}[3]{% - \if \@firstcategory - \paragraph*{Categories and Subject Descriptors}% - \@setflag \@firstcategory = \@false - \else - \unskip ;\hspace{.75em}% - \fi - \@ifnextchar [{\@category{#1}{#2}{#3}}{\@category{#1}{#2}{#3}[]}} - -\def \@category #1#2#3[#4]{% - {\let \and = \relax - #1 [\textit{#2}]% - \if \@emptyargp{#4}% - \if \@notp{\@emptyargp{#3}}: #3\fi - \else - :\space - \if \@notp{\@emptyargp{#3}}#3---\fi - \textrm{#4}% - \fi}} - -% Copyright Notice -% --------- ------ - - -\def \ftype@copyrightbox {8} -\def \@toappear {} -\def \@permission {} -\def \@reprintprice {} - -\def \@copyrightspace {% - \@float{copyrightbox}[b]% - \vbox to 1.2in{% - \vfill - \parbox[b]{20pc}{% - \scriptsize - \if \@preprint - [Copyright notice will appear here - once 'preprint' option is removed.]\par - \else - \@toappear - \fi - \if \@reprint - \noindent Reprinted from \@conferencename, - \@proceedings, - \@conferenceinfo, - pp.~\number\thepage--\pageref{sigplanconf@finalpage}.\par - \fi}}% - \end@float} - -\newcommand{\reprintprice}[1]{% - \gdef \@reprintprice {#1}} - -\reprintprice{\$15.00} - -\long\def \toappear #1{% - \def \@toappear {#1}} - -\toappear{% - \noindent \@permission \par - \vspace{2pt} - \noindent \textsl{\@conferencename}, \quad \@conferenceinfo. \par - \noindent Copyright \copyright\ \@copyrightyear\ ACM \@copyrightdata - \dots \@reprintprice.\par - \noindent http://dx.doi.org/10.1145/\@doi } - -\newcommand{\permission}[1]{% - \gdef \@permission {#1}} - -\permission{% - Permission to make digital or hard copies of all or part of this work for - personal or classroom use is granted without fee provided that copies are - not made or distributed for profit or commercial advantage and that copies - bear this notice and the full citation on the first page. Copyrights for - components of this work owned by others than ACM must be honored. - Abstracting with credit is permitted. To copy otherwise, or republish, to - post on servers or to redistribute to lists, requires prior specific - permission and/or a fee. Request permissions from permissions@acm.org.} - -% These are two new rights management and bibstrip text blocks. - -\newcommand{\exclusivelicense}{% - \permission{% - Permission to make digital or hard copies of all or part of this work for - personal or classroom use is granted without fee provided that copies are - not made or distributed for profit or commercial advantage and that copies - bear this notice and the full citation on the first page. Copyrights for - components of this work owned by others than the author(s) must be honored. - Abstracting with credit is permitted. To copy otherwise, or republish, to - post on servers or to redistribute to lists, requires prior specific - permission and/or a fee. Request permissions from permissions@acm.org.} - \toappear{% - \noindent \@permission \par - \vspace{2pt} - \noindent \textsl{\@conferencename}, \quad \@conferenceinfo. \par - \noindent Copyright is held by the owner/author(s). Publication rights licensed to ACM. \par - \noindent ACM \@copyrightdata \dots \@reprintprice.\par - \noindent http://dx.doi.org/10.1145/\@doi}} - -\newcommand{\permissiontopublish}{% - \permission{% - Permission to make digital or hard copies of part or all of this work for - personal or classroom use is granted without fee provided that copies are - not made or distributed for profit or commercial advantage and that copies - bear this notice and the full citation on the first page. Copyrights for - third-party components of this work must be honored. - For all other uses, contact the owner/author(s).}% - \toappear{% - \noindent \@permission \par - \vspace{2pt} - \noindent \textsl{\@conferencename}, \quad \@conferenceinfo. \par - \noindent Copyright is held by the owner/author(s). \par - \noindent ACM \@copyrightdata.\par - \noindent http://dx.doi.org/10.1145/\@doi}} - -% The following permission notices are -% for the traditional copyright transfer agreement option. - -% Exclusive license and permission-to-publish -% give more complicated permission notices. -% These are not covered here. - -\newcommand{\ACMCanadapermission}{% - \permission{% - ACM acknowledges that this contribution was authored or - co-authored by an affiliate of the Canadian National - Government. As such, the Crown in Right of Canada retains an equal - interest in the copyright. Reprint requests should be forwarded to - ACM.}} - -\newcommand{\ACMUSpermission}{% - \permission{% - ACM acknowledges that this contribution was authored or - co-authored by a contractor or affiliate of the United States - Government. As such, the United States Government retains a - nonexclusive, royalty-free right to publish or reproduce this - article, or to allow others to do so, for Government purposes - only.}} - -\newcommand{\USpublicpermission}{% - \permission{% - This paper is authored by an employee(s) of the United States - Government and is in the public domain. Non-exclusive copying or - redistribution is allowed, provided that the article citation is - given and the authors and the agency are clearly identified as its - source.}% - \toappear{% - \noindent \@permission \par - \vspace{2pt} - \noindent \textsl{\@conferencename}, \quad \@conferenceinfo. \par - \noindent ACM \@copyrightdata.\par - \noindent http://dx.doi.org/10.1145/\@doi}} - -\newcommand{\authorversion}[4]{% - \permission{% - Copyright \copyright\ ACM, #1. This is the author's version of the work. - It is posted here by permission of ACM for your personal use. - Not for redistribution. The definitive version was published in - #2, #3, http://dx.doi.org/10.1145/#4.}} - -% Enunciations -% ------------ - - -\def \@begintheorem #1#2{% {name}{number} - \trivlist - \item[\hskip \labelsep \textsc{#1 #2.}]% - \itshape\selectfont - \ignorespaces} - -\def \@opargbegintheorem #1#2#3{% {name}{number}{title} - \trivlist - \item[% - \hskip\labelsep \textsc{#1\ #2}% - \if \@notp{\@emptyargp{#3}}\nut (#3).\fi]% - \itshape\selectfont - \ignorespaces} - -% Figures -% ------- - - -\@setflag \@caprule = \@true - -\long\def \@makecaption #1#2{% - \addvspace{4pt} - \if \@caprule - \hrule width \hsize height .33pt - \vspace{4pt} - \fi - \setbox \@tempboxa = \hbox{\@setfigurenumber{#1.}\nut #2}% - \if \@dimgtrp{\wd\@tempboxa}{\hsize}% - \noindent \@setfigurenumber{#1.}\nut #2\par - \else - \centerline{\box\@tempboxa}% - \fi} - -\newcommand{\nocaptionrule}{% - \@setflag \@caprule = \@false} - -\def \@setfigurenumber #1{% - {\rmfamily \bfseries \selectfont #1}} - -% Hierarchy -% --------- - - -\setcounter{secnumdepth}{\@numheaddepth} - -\newskip{\@sectionaboveskip} -\setvspace{\@sectionaboveskip}{10pt plus 3pt minus 2pt} - -\newskip{\@sectionbelowskip} -\if \@blockstyle - \setlength{\@sectionbelowskip}{0.1pt}% -\else - \setlength{\@sectionbelowskip}{4pt}% -\fi - -\renewcommand{\section}{% - \@startsection - {section}% - {1}% - {0pt}% - {-\@sectionaboveskip}% - {\@sectionbelowskip}% - {\large \bfseries \raggedright}} - -\newskip{\@subsectionaboveskip} -\setvspace{\@subsectionaboveskip}{8pt plus 2pt minus 2pt} - -\newskip{\@subsectionbelowskip} -\if \@blockstyle - \setlength{\@subsectionbelowskip}{0.1pt}% -\else - \setlength{\@subsectionbelowskip}{4pt}% -\fi - -\renewcommand{\subsection}{% - \@startsection% - {subsection}% - {2}% - {0pt}% - {-\@subsectionaboveskip}% - {\@subsectionbelowskip}% - {\normalsize \bfseries \raggedright}} - -\renewcommand{\subsubsection}{% - \@startsection% - {subsubsection}% - {3}% - {0pt}% - {-\@subsectionaboveskip} - {\@subsectionbelowskip}% - {\normalsize \bfseries \raggedright}} - -\newskip{\@paragraphaboveskip} -\setvspace{\@paragraphaboveskip}{6pt plus 2pt minus 2pt} - -\renewcommand{\paragraph}{% - \@startsection% - {paragraph}% - {4}% - {0pt}% - {\@paragraphaboveskip} - {-1em}% - {\normalsize \bfseries \if \@times \itshape \fi}} - -\renewcommand{\subparagraph}{% - \@startsection% - {subparagraph}% - {4}% - {0pt}% - {\@paragraphaboveskip} - {-1em}% - {\normalsize \itshape}} - -% Standard headings: - -\newcommand{\acks}{\section*{Acknowledgments}} - -\newcommand{\keywords}{\paragraph*{Keywords}} - -\newcommand{\terms}{\paragraph*{General Terms}} - -% Identification -% -------------- - - -\def \@conferencename {} -\def \@conferenceinfo {} -\def \@copyrightyear {} -\def \@copyrightdata {[to be supplied]} -\def \@proceedings {[Unknown Proceedings]} - - -\newcommand{\conferenceinfo}[2]{% - \gdef \@conferencename {#1}% - \gdef \@conferenceinfo {#2}} - -\newcommand{\copyrightyear}[1]{% - \gdef \@copyrightyear {#1}} - -\let \CopyrightYear = \copyrightyear - -\newcommand{\copyrightdata}[1]{% - \gdef \@copyrightdata {#1}} - -\let \crdata = \copyrightdata - -\newcommand{\doi}[1]{% - \gdef \@doi {#1}} - -\newcommand{\proceedings}[1]{% - \gdef \@proceedings {#1}} - -% Lists -% ----- - - -\setlength{\leftmargini}{13pt} -\setlength\leftmarginii{13pt} -\setlength\leftmarginiii{13pt} -\setlength\leftmarginiv{13pt} -\setlength{\labelsep}{3.5pt} - -\setlength{\topsep}{\standardvspace} -\if \@blockstyle - \setlength{\itemsep}{1pt} - \setlength{\parsep}{3pt} -\else - \setlength{\itemsep}{1pt} - \setlength{\parsep}{3pt} -\fi - -\renewcommand{\labelitemi}{{\small \centeroncapheight{\textbullet}}} -\renewcommand{\labelitemii}{\centeroncapheight{\rule{2.5pt}{2.5pt}}} -\renewcommand{\labelitemiii}{$-$} -\renewcommand{\labelitemiv}{{\Large \textperiodcentered}} - -\renewcommand{\@listi}{% - \leftmargin = \leftmargini - \listparindent = 0pt} -%%% \itemsep = 1pt -%%% \parsep = 3pt} -%%% \listparindent = \parindent} - -\let \@listI = \@listi - -\renewcommand{\@listii}{% - \leftmargin = \leftmarginii - \topsep = 1pt - \labelwidth = \leftmarginii - \advance \labelwidth by -\labelsep - \listparindent = \parindent} - -\renewcommand{\@listiii}{% - \leftmargin = \leftmarginiii - \labelwidth = \leftmarginiii - \advance \labelwidth by -\labelsep - \listparindent = \parindent} - -\renewcommand{\@listiv}{% - \leftmargin = \leftmarginiv - \labelwidth = \leftmarginiv - \advance \labelwidth by -\labelsep - \listparindent = \parindent} - -% Mathematics -% ----------- - - -\def \theequation {\arabic{equation}} - -% Miscellaneous -% ------------- - - -\newcommand{\balancecolumns}{% - \vfill\eject - \global\@colht = \textheight - \global\ht\@cclv = \textheight} - -\newcommand{\nut}{\hspace{.5em}} - -\newcommand{\softraggedright}{% - \let \\ = \@centercr - \leftskip = 0pt - \rightskip = 0pt plus 10pt} - -% Program Code -% ------- ---- - - -\newcommand{\mono}[1]{% - {\@tempdima = \fontdimen2\font - \texttt{\spaceskip = 1.1\@tempdima #1}}} - -% Running Heads and Feet -% ------- ----- --- ---- - - -\def \@preprintfooter {} - -\newcommand{\preprintfooter}[1]{% - \gdef \@preprintfooter {#1}} - -\if \@preprint - -\def \ps@plain {% - \let \@mkboth = \@gobbletwo - \let \@evenhead = \@empty - \def \@evenfoot {\scriptsize - \rlap{\textit{\@preprintfooter}}\hfil - \thepage \hfil - \llap{\textit{\@formatyear}}}% - \let \@oddhead = \@empty - \let \@oddfoot = \@evenfoot} - -\else\if \@reprint - -\def \ps@plain {% - \let \@mkboth = \@gobbletwo - \let \@evenhead = \@empty - \def \@evenfoot {\scriptsize \hfil \thepage \hfil}% - \let \@oddhead = \@empty - \let \@oddfoot = \@evenfoot} - -\else - -\let \ps@plain = \ps@empty -\let \ps@headings = \ps@empty -\let \ps@myheadings = \ps@empty - -\fi\fi - -\def \@formatyear {% - \number\year/\number\month/\number\day} - -% Special Characters -% ------- ---------- - - -\DeclareRobustCommand{\euro}{% - \protect{\rlap{=}}{\sf \kern .1em C}} - -% Title Page -% ----- ---- - - -\@setflag \@addauthorsdone = \@false - -\def \@titletext {\@latex@error{No title was provided}{}} -\def \@subtitletext {} - -\newcount{\@authorcount} - -\newcount{\@titlenotecount} -\newtoks{\@titlenotetext} - -\def \@titlebanner {} - -\renewcommand{\title}[1]{% - \gdef \@titletext {#1}} - -\newcommand{\subtitle}[1]{% - \gdef \@subtitletext {#1}} - -\newcommand{\authorinfo}[3]{% {names}{affiliation}{email/URL} - \global\@increment \@authorcount - \@withname\gdef {\@authorname\romannumeral\@authorcount}{#1}% - \@withname\gdef {\@authoraffil\romannumeral\@authorcount}{#2}% - \@withname\gdef {\@authoremail\romannumeral\@authorcount}{#3}} - -\renewcommand{\author}[1]{% - \@latex@error{The \string\author\space command is obsolete; - use \string\authorinfo}{}} - -\newcommand{\titlebanner}[1]{% - \gdef \@titlebanner {#1}} - -\renewcommand{\maketitle}{% - \pagestyle{plain}% - \if \@onecolumn - {\hsize = \standardtextwidth - \@maketitle}% - \else - \twocolumn[\@maketitle]% - \fi - \@placetitlenotes - \if \@copyrightwanted \@copyrightspace \fi} - -\def \@maketitle {% - \begin{center} - \@settitlebanner - \let \thanks = \titlenote - {\leftskip = 0pt plus 0.25\linewidth - \rightskip = 0pt plus 0.25 \linewidth - \parfillskip = 0pt - \spaceskip = .7em - \noindent \LARGE \bfseries \@titletext \par} - \vskip 6pt - \noindent \Large \@subtitletext \par - \vskip 12pt - \ifcase \@authorcount - \@latex@error{No authors were specified for this paper}{}\or - \@titleauthors{i}{}{}\or - \@titleauthors{i}{ii}{}\or - \@titleauthors{i}{ii}{iii}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{}{}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% - \@titleauthors{vii}{}{}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% - \@titleauthors{vii}{viii}{}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% - \@titleauthors{vii}{viii}{ix}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% - \@titleauthors{vii}{viii}{ix}\@titleauthors{x}{}{}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% - \@titleauthors{vii}{viii}{ix}\@titleauthors{x}{xi}{}\or - \@titleauthors{i}{ii}{iii}\@titleauthors{iv}{v}{vi}% - \@titleauthors{vii}{viii}{ix}\@titleauthors{x}{xi}{xii}% - \else - \@latex@error{Cannot handle more than 12 authors}{}% - \fi - \vspace{1.75pc} - \end{center}} - -\def \@settitlebanner {% - \if \@andp{\@preprint}{\@notp{\@emptydefp{\@titlebanner}}}% - \vbox to 0pt{% - \vskip -32pt - \noindent \textbf{\@titlebanner}\par - \vss}% - \nointerlineskip - \fi} - -\def \@titleauthors #1#2#3{% - \if \@andp{\@emptyargp{#2}}{\@emptyargp{#3}}% - \noindent \@setauthor{40pc}{#1}{\@false}\par - \else\if \@emptyargp{#3}% - \noindent \@setauthor{17pc}{#1}{\@false}\hspace{3pc}% - \@setauthor{17pc}{#2}{\@false}\par - \else - \noindent \@setauthor{12.5pc}{#1}{\@false}\hspace{2pc}% - \@setauthor{12.5pc}{#2}{\@false}\hspace{2pc}% - \@setauthor{12.5pc}{#3}{\@true}\par - \relax - \fi\fi - \vspace{20pt}} - -\def \@setauthor #1#2#3{% {width}{text}{unused} - \vtop{% - \def \and {% - \hspace{16pt}} - \hsize = #1 - \normalfont - \centering - \large \@name{\@authorname#2}\par - \vspace{5pt} - \normalsize \@name{\@authoraffil#2}\par - \vspace{2pt} - \textsf{\@name{\@authoremail#2}}\par}} - -\def \@maybetitlenote #1{% - \if \@andp{#1}{\@gtrp{\@authorcount}{3}}% - \titlenote{See page~\pageref{@addauthors} for additional authors.}% - \fi} - -\newtoks{\@fnmark} - -\newcommand{\titlenote}[1]{% - \global\@increment \@titlenotecount - \ifcase \@titlenotecount \relax \or - \@fnmark = {\ast}\or - \@fnmark = {\dagger}\or - \@fnmark = {\ddagger}\or - \@fnmark = {\S}\or - \@fnmark = {\P}\or - \@fnmark = {\ast\ast}% - \fi - \,$^{\the\@fnmark}$% - \edef \reserved@a {\noexpand\@appendtotext{% - \noexpand\@titlefootnote{\the\@fnmark}}}% - \reserved@a{#1}} - -\def \@appendtotext #1#2{% - \global\@titlenotetext = \expandafter{\the\@titlenotetext #1{#2}}} - -\newcount{\@authori} - -\iffalse -\def \additionalauthors {% - \if \@gtrp{\@authorcount}{3}% - \section{Additional Authors}% - \label{@addauthors}% - \noindent - \@authori = 4 - {\let \\ = ,% - \loop - \textbf{\@name{\@authorname\romannumeral\@authori}}, - \@name{\@authoraffil\romannumeral\@authori}, - email: \@name{\@authoremail\romannumeral\@authori}.% - \@increment \@authori - \if \@notp{\@gtrp{\@authori}{\@authorcount}} \repeat}% - \par - \fi - \global\@setflag \@addauthorsdone = \@true} -\fi - -\let \addauthorsection = \additionalauthors - -\def \@placetitlenotes { - \the\@titlenotetext} - -% Utilities -% --------- - - -\newcommand{\centeroncapheight}[1]{% - {\setbox\@tempboxa = \hbox{#1}% - \@measurecapheight{\@tempdima}% % Calculate ht(CAP) - ht(text) - \advance \@tempdima by -\ht\@tempboxa % ------------------ - \divide \@tempdima by 2 % 2 - \raise \@tempdima \box\@tempboxa}} - -\newbox{\@measbox} - -\def \@measurecapheight #1{% {\dimen} - \setbox\@measbox = \hbox{ABCDEFGHIJKLMNOPQRSTUVWXYZ}% - #1 = \ht\@measbox} - -\long\def \@titlefootnote #1#2{% - \insert\footins{% - \reset@font\footnotesize - \interlinepenalty\interfootnotelinepenalty - \splittopskip\footnotesep - \splitmaxdepth \dp\strutbox \floatingpenalty \@MM - \hsize\columnwidth \@parboxrestore -%%% \protected@edef\@currentlabel{% -%%% \csname p@footnote\endcsname\@thefnmark}% - \color@begingroup - \def \@makefnmark {$^{#1}$}% - \@makefntext{% - \rule\z@\footnotesep\ignorespaces#2\@finalstrut\strutbox}% - \color@endgroup}} - -% LaTeX Modifications -% ----- ------------- - -\def \@seccntformat #1{% - \@name{\the#1}% - \@expandaftertwice\@seccntformata \csname the#1\endcsname.\@mark - \quad} - -\def \@seccntformata #1.#2\@mark{% - \if \@emptyargp{#2}.\fi} - -% Revision History -% -------- ------- - - -% Date Person Ver. Change -% ---- ------ ---- ------ - -% 2004.09.12 PCA 0.1--4 Preliminary development. - -% 2004.11.18 PCA 0.5 Start beta testing. - -% 2004.11.19 PCA 0.6 Obsolete \author and replace with -% \authorinfo. -% Add 'nocopyrightspace' option. -% Compress article opener spacing. -% Add 'mathtime' option. -% Increase text height by 6 points. - -% 2004.11.28 PCA 0.7 Add 'cm/computermodern' options. -% Change default to Times text. - -% 2004.12.14 PCA 0.8 Remove use of mathptm.sty; it cannot -% coexist with latexsym or amssymb. - -% 2005.01.20 PCA 0.9 Rename class file to sigplanconf.cls. - -% 2005.03.05 PCA 0.91 Change default copyright data. - -% 2005.03.06 PCA 0.92 Add at-signs to some macro names. - -% 2005.03.07 PCA 0.93 The 'onecolumn' option defaults to '11pt', -% and it uses the full type width. - -% 2005.03.15 PCA 0.94 Add at-signs to more macro names. -% Allow margin paragraphs during review. - -% 2005.03.22 PCA 0.95 Implement \euro. -% Remove proof and newdef environments. - -% 2005.05.06 PCA 1.0 Eliminate 'onecolumn' option. -% Change footer to small italic and eliminate -% left portion if no \preprintfooter. -% Eliminate copyright notice if preprint. -% Clean up and shrink copyright box. - -% 2005.05.30 PCA 1.1 Add alternate permission statements. - -% 2005.06.29 PCA 1.1 Publish final first edition of guide. - -% 2005.07.14 PCA 1.2 Add \subparagraph. -% Use block paragraphs in lists, and adjust -% spacing between items and paragraphs. - -% 2006.06.22 PCA 1.3 Add 'reprint' option and associated -% commands. - -% 2006.08.24 PCA 1.4 Fix bug in \maketitle case command. - -% 2007.03.13 PCA 1.5 The title banner only displays with the -% 'preprint' option. - -% 2007.06.06 PCA 1.6 Use \bibfont in \thebibliography. -% Add 'natbib' option to load and configure -% the natbib package. - -% 2007.11.20 PCA 1.7 Balance line lengths in centered article -% title (thanks to Norman Ramsey). - -% 2009.01.26 PCA 1.8 Change natbib \bibpunct values. - -% 2009.03.24 PCA 1.9 Change natbib to use the 'numbers' option. -% Change templates to use 'natbib' option. - -% 2009.09.01 PCA 2.0 Add \reprintprice command (suggested by -% Stephen Chong). - -% 2009.09.08 PCA 2.1 Make 'natbib' the default; add 'nonatbib'. -% SB Add 'authoryear' and 'numbers' (default) to -% control citation style when using natbib. -% Add \bibpunct to change punctuation for -% 'authoryear' style. - -% 2009.09.21 PCA 2.2 Add \softraggedright to the thebibliography -% environment. Also add to template so it will -% happen with natbib. - -% 2009.09.30 PCA 2.3 Remove \softraggedright from thebibliography. -% Just include in the template. - -% 2010.05.24 PCA 2.4 Obfuscate class author's email address. - -% 2011.11.08 PCA 2.5 Add copyright notice to this file. -% Remove 'sort' option from natbib when using -% 'authoryear' style. -% Add the \authorversion command. - -% 2013.02.22 PCA 2.6 Change natbib fences to parentheses when -% using 'authoryear' style. - -% 2013.05.17 PCA 2.7 Change standard and author copyright text. - -% 2013.07.02 TU 2.8 More changes to permission/copyright notes. -% Replaced ambiguous \authorpermission with -% \exclusivelicense and \permissiontopublish - -