diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu
index df06503d8c50d8dddf82bc4647bca5b98624289e..fe11c6a73b369782db0e8095d315ff24e7d9392f 100644
--- a/src/ComputeForce.cu
+++ b/src/ComputeForce.cu
@@ -118,23 +118,83 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) :
 		const int maxPairs = 1<<25;
 		gpuErrchk(cudaMalloc(&numPairs_d,       sizeof(int)));
 		gpuErrchk(cudaMalloc(&pairLists_d,      sizeof(int2)*maxPairs));
-                gpuErrchk(cudaBindTexture(0, pairListsTex, pairLists_d, sizeof(int2)*maxPairs)); //Han-Yi
+                // gpuErrchk(cudaBindTexture(0, pairListsTex, pairLists_d, sizeof(int2)*maxPairs)); //Han-Yi
 		gpuErrchk(cudaMalloc(&pairTabPotType_d, sizeof(int)*maxPairs));
-                gpuErrchk(cudaBindTexture(0, pairTabPotTypeTex, pairTabPotType_d, sizeof(int)*maxPairs)); //Han-Yi
+
+		// create texture object
+		{
+		    cudaResourceDesc resDesc;
+		    memset(&resDesc, 0, sizeof(resDesc));
+		    resDesc.resType = cudaResourceTypeLinear;
+		    resDesc.res.linear.devPtr = pairLists_d;
+		    resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
+		    resDesc.res.linear.desc.x = 32; // bits per channel
+		    resDesc.res.linear.desc.y = 32; // bits per channel
+		    resDesc.res.linear.sizeInBytes = maxPairs*sizeof(int2);
+
+		    cudaTextureDesc texDesc;
+		    memset(&texDesc, 0, sizeof(texDesc));
+		    texDesc.readMode = cudaReadModeElementType;
+
+		    // create texture object: we only have to do this once!
+		    pairLists_tex=0;
+		    cudaCreateTextureObject(&pairLists_tex, &resDesc, &texDesc, NULL);
+
+		}
+
+		// create texture object
+		{
+		    cudaResourceDesc resDesc;
+		    memset(&resDesc, 0, sizeof(resDesc));
+		    resDesc.resType = cudaResourceTypeLinear;
+		    resDesc.res.linear.devPtr = pairTabPotType_d;
+		    resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
+		    resDesc.res.linear.desc.x = 32; // bits per channel
+		    resDesc.res.linear.sizeInBytes = maxPairs*sizeof(int);
+
+		    cudaTextureDesc texDesc;
+		    memset(&texDesc, 0, sizeof(texDesc));
+		    texDesc.readMode = cudaReadModeElementType;
+
+		    // create texture object: we only have to do this once!
+		    pairTabPotType_tex = 0;
+		    cudaCreateTextureObject(&pairTabPotType_tex, &resDesc, &texDesc, NULL);
+
+		}
+
 
                 //Han-Yi Chou
                 int nCells = decomp.nCells.x * decomp.nCells.y * decomp.nCells.z;
                 //int* nCells_dev;
 		if (nCells < MAX_CELLS_FOR_CELLNEIGHBORLIST) {
 		    int3 *Cells_dev;
-		    gpuErrchk(cudaMalloc(&CellNeighborsList,sizeof(int)*27*nCells));
+		    size_t sz = 27*nCells*sizeof(int);
+		    gpuErrchk(cudaMalloc(&CellNeighborsList, sz));
 		    //gpuErrchk(cudaMalloc(&nCells_dev,sizeof(int)));
 		    gpuErrchk(cudaMalloc(&Cells_dev,sizeof(int3)));
 		    //gpuErrchk(cudaMemcpy(nCells_dev,&nCells,1,cudaMemcpyHostToDevice);
 		    gpuErrchk(cudaMemcpy(Cells_dev,&(decomp.nCells),sizeof(int3),cudaMemcpyHostToDevice));
 		    createNeighborsList<<<256,256>>>(Cells_dev,CellNeighborsList);
 		    gpuErrchk(cudaFree(Cells_dev));
-		    cudaBindTexture(0, NeighborsTex, CellNeighborsList, 27*nCells*sizeof(int));
+
+		    // create texture object
+		    {
+			cudaResourceDesc resDesc;
+			memset(&resDesc, 0, sizeof(resDesc));
+			resDesc.resType = cudaResourceTypeLinear;
+			resDesc.res.linear.devPtr = CellNeighborsList;
+			resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
+			resDesc.res.linear.desc.x = 32; // bits per channel
+			resDesc.res.linear.sizeInBytes = sz;
+
+			cudaTextureDesc texDesc;
+			memset(&texDesc, 0, sizeof(texDesc));
+			texDesc.readMode = cudaReadModeElementType;
+
+			// create texture object: we only have to do this once!
+			neighbors_tex=0;
+			cudaCreateTextureObject(&neighbors_tex, &resDesc, &texDesc, NULL);
+		    }
 		}
 	}
 	
@@ -185,7 +245,7 @@ ComputeForce::~ComputeForce() {
 
 		gpuErrchk(cudaFree(sys_d));
                 //Han-Yi Unbind the texture
-                gpuErrchk(cudaUnbindTexture(PosTex));
+                gpuErrchk(cudaDestroyTextureObject(pos_tex));
 		gpuErrchk( cudaFree(pos_d) );
 		gpuErrchk( cudaFree(forceInternal_d) );
 		gpuErrchk( cudaFree(type_d) );
@@ -215,11 +275,11 @@ ComputeForce::~ComputeForce() {
 	}
 
 	gpuErrchk(cudaFree(numPairs_d));
-        gpuErrchk(cudaUnbindTexture(pairListsTex)); //Han-Yi 
+	gpuErrchk(cudaDestroyTextureObject(pairLists_tex));
 	gpuErrchk(cudaFree(pairLists_d));
-        gpuErrchk(cudaUnbindTexture(pairTabPotTypeTex));
+        gpuErrchk(cudaDestroyTextureObject(pairTabPotType_tex));
 	gpuErrchk(cudaFree(pairTabPotType_d));
-        gpuErrchk(cudaUnbindTexture(NeighborsTex));
+        gpuErrchk(cudaDestroyTextureObject(neighbors_tex));
         gpuErrchk(cudaFree( CellNeighborsList));
 
 }
@@ -521,11 +581,11 @@ void ComputeForce::decompose() {
       #if __CUDA_ARCH__ >= 520
       createPairlists<64,64,8><<<dim3(128,128,numReplicas),dim3(64,1,1)>>>(pos_d, num, numReplicas, sys_d, decomp_d, nCells, numPairs_d,
                                                                              pairLists_d, numParts, type_d, pairTabPotType_d, excludes_d,
-                                                                             excludeMap_d, numExcludes, pairlistdist2);
+									   excludeMap_d, numExcludes, pairlistdist2, pos_tex, neighbors_tex);
       #else //__CUDA_ARCH__ == 300
       createPairlists<64,64,8><<<dim3(256,256,numReplicas),dim3(64,1,1)>>>(pos_d, num, numReplicas, sys_d, decomp_d, nCells, numPairs_d,
                                                                            pairLists_d, numParts, type_d, pairTabPotType_d, excludes_d, 
-                                                                           excludeMap_d, numExcludes, pairlistdist2);
+                                                                           excludeMap_d, numExcludes, pairlistdist2, pos_tex, neighbors_tex);
       #endif
        
       gpuKernelCheck();
@@ -736,8 +796,8 @@ float ComputeForce::computeTabulated(bool get_energy) {
 	{
                 //gpuErrchk(cudaBindTexture(0,  PosTex, pos_d,sizeof(Vector3)*num*numReplicas));
 		//computeTabulatedKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, sys_d,
-	    computeTabulatedKernel<64><<< dim3(2048,1,1), dim3(64,1,1), 0, gpuman.get_next_stream() >>>(forceInternal_d, pos_d, sys_d,
-													cutoff2, numPairs_d, pairLists_d, pairTabPotType_d, tablePot_d);
+	    computeTabulatedKernel<64><<< dim3(2048,1,1), dim3(64,1,1), 0, gpuman.get_next_stream() >>>(forceInternal_d, sys_d,
+													cutoff2, numPairs_d, pairLists_d, pairTabPotType_d, tablePot_d, pairLists_tex, pos_tex, pairTabPotType_tex);
                   gpuKernelCheck();
                 //gpuErrchk(cudaUnbindTexture(PosTex));
 	}
@@ -843,8 +903,28 @@ void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos)
 	const size_t tot_num = num * numReplicas;
 
 	gpuErrchk(cudaMalloc(&pos_d, sizeof(Vector3) * tot_num));
+	{
         //Han-Yi bind to the texture
-        gpuErrchk(cudaBindTexture(0,  PosTex, pos_d,sizeof(Vector3)*tot_num));
+	    cudaResourceDesc resDesc;
+	    memset(&resDesc, 0, sizeof(resDesc));
+	    resDesc.resType = cudaResourceTypeLinear;
+	    resDesc.res.linear.devPtr = pos_d;
+	    resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
+	    resDesc.res.linear.desc.x = 32; // bits per channel
+	    resDesc.res.linear.desc.y = 32; // bits per channel
+	    resDesc.res.linear.desc.z = 32; // bits per channel
+	    resDesc.res.linear.desc.w = 32; // bits per channel
+	    resDesc.res.linear.sizeInBytes = tot_num*sizeof(float4);
+	    
+	    cudaTextureDesc texDesc;
+	    memset(&texDesc, 0, sizeof(texDesc));
+	    texDesc.readMode = cudaReadModeElementType;
+	    
+	    // create texture object: we only have to do this once!
+	    pos_tex=0;
+	    cudaCreateTextureObject(&pos_tex, &resDesc, &texDesc, NULL);
+	}
+
         gpuErrchk(cudaDeviceSynchronize());
 
 	gpuErrchk(cudaMemcpyAsync(pos_d, pos, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
@@ -858,43 +938,21 @@ void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom
 {
         const size_t tot_num = num * numReplicas;
 
-        gpuErrchk(cudaMalloc(&pos_d, sizeof(Vector3) * tot_num));
         gpuErrchk(cudaMalloc(&mom_d, sizeof(Vector3) * tot_num));
-        //Han-Yi Chou bind to the texture
-                  gpuErrchk(cudaBindTexture(0, PosTex, pos_d,sizeof(Vector3)*tot_num));
-        //gpuErrchk(cudaBindTexture(0, MomTex, mom_d,sizeof(Vector3)*tot_num));
-                  gpuErrchk(cudaDeviceSynchronize());
-
-        gpuErrchk(cudaMemcpyAsync(pos_d, pos, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
         gpuErrchk(cudaMemcpyAsync(mom_d, mom, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
 
-        gpuErrchk(cudaMalloc(&forceInternal_d, sizeof(Vector3) * num * numReplicas));
-        gpuErrchk(cudaMemcpyAsync(forceInternal_d, forceInternal, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
-
+	copyToCUDA(forceInternal,pos);
         gpuErrchk(cudaDeviceSynchronize());
 }
 void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom, float* random)
 {
         const size_t tot_num = num * numReplicas;
 
-        gpuErrchk(cudaMalloc(&pos_d, sizeof(Vector3) * tot_num));
-        gpuErrchk(cudaMalloc(&mom_d, sizeof(Vector3) * tot_num));
         gpuErrchk(cudaMalloc(&ran_d, sizeof(float) * tot_num));
-
-        //Han-Yi Chou bind to the texture
-                  gpuErrchk(cudaBindTexture(0, PosTex, pos_d,sizeof(Vector3)*tot_num));
-        //gpuErrchk(cudaBindTexture(0, MomTex, mom_d,sizeof(Vector3)*tot_num));
-                  gpuErrchk(cudaDeviceSynchronize());
-
-        gpuErrchk(cudaMemcpyAsync(pos_d, pos, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
-        gpuErrchk(cudaMemcpyAsync(mom_d, mom, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
         gpuErrchk(cudaMemcpyAsync(ran_d, random, sizeof(float) * tot_num, cudaMemcpyHostToDevice));
 
-        gpuErrchk(cudaMalloc(&forceInternal_d, sizeof(Vector3) * num * numReplicas));
-        gpuErrchk(cudaMemcpyAsync(forceInternal_d, forceInternal, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
-
+	copyToCUDA(forceInternal, pos, mom);
         gpuErrchk(cudaDeviceSynchronize());
-
 }
 
 void ComputeForce::setForceInternalOnDevice(Vector3* f) {
diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh
index 012931969c94c0183245d96dd9c8aa7ae2808f4d..4f7dfef108e16939c6f9c1ffdc5d82ad81190c4a 100644
--- a/src/ComputeForce.cuh
+++ b/src/ComputeForce.cuh
@@ -13,10 +13,10 @@
 
 #define MAX_CELLS_FOR_CELLNEIGHBORLIST 1<<25
 
-texture<int,    1, cudaReadModeElementType>      NeighborsTex;
-texture<int,    1, cudaReadModeElementType> pairTabPotTypeTex;
-texture<int2,   1, cudaReadModeElementType>      pairListsTex;
-texture<float4, 1, cudaReadModeElementType>            PosTex;
+// texture<int,    1, cudaReadModeElementType>      NeighborsTex;
+// texture<int,    1, cudaReadModeElementType> pairTabPotTypeTex;
+//texture<int2,   1, cudaReadModeElementType>      pairListsTex;
+// texture<float4, 1, cudaReadModeElementType>            PosTex;
 
 __host__ __device__
 EnergyForce ComputeForce::coulombForce(Vector3 r, float alpha,
@@ -303,7 +303,7 @@ __global__ void createPairlists(Vector3* __restrict__ pos, const int num, const
                                 const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp,
                                 const int nCells,int* g_numPairs, int2* g_pair, int numParts, const int* __restrict__ type,
                                 int* __restrict__ g_pairTabPotType, const Exclude* __restrict__ excludes,
-                                const int2* __restrict__ excludeMap, const int numExcludes, float pairlistdist2)
+                                const int2* __restrict__ excludeMap, const int numExcludes, float pairlistdist2, cudaTextureObject_t PosTex, cudaTextureObject_t NeighborsTex)
 {
     __shared__ float4 __align__(16) particle[N];
     __shared__ int     Index_i[N];
@@ -336,7 +336,7 @@ __global__ void createPairlists(Vector3* __restrict__ pos, const int num, const
                 if(tid + pid_i < Ni && tid < N)
                 {
                     Index_i [tid] = cellInfo[rangeI.first+pid_i+tid].particle;
-                    particle[tid] = tex1Dfetch(PosTex,Index_i[tid]);
+                    particle[tid] = tex1Dfetch<float4>(PosTex,Index_i[tid]);
                 }
                 __syncthreads();
 
@@ -360,7 +360,7 @@ __global__ void createPairlists(Vector3* __restrict__ pos, const int num, const
 
 			int neighbor_cell;
 			if (nCells < MAX_CELLS_FOR_CELLNEIGHBORLIST) {
-			    neighbor_cell = tex1Dfetch(NeighborsTex,idx+27*cellid_i);
+			    neighbor_cell = tex1Dfetch<int>(NeighborsTex,idx+27*cellid_i);
 			} else {
 			    int3 cells = decomp->nCells;
 			    int3 cell_idx = make_int3(cellid_i %  cells.z,
@@ -405,7 +405,7 @@ __global__ void createPairlists(Vector3* __restrict__ pos, const int num, const
                                 continue;
                             }
 
-                            float4 b = tex1Dfetch(PosTex,aj);
+                            float4 b = tex1Dfetch<float4>(PosTex,aj);
                             Vector3 B(b.x,b.y,b.z);
 
                             float dr = (sys->wrapDiff(A-B)).length2();
@@ -453,7 +453,7 @@ __global__ void createPairlists(Vector3* __restrict__ pos, const int num, const
         {
             int ai      = cellInfo[rangeI.first+pid_i].particle;
 
-            float4 a = tex1Dfetch(PosTex,ai);
+            float4 a = tex1Dfetch<float4>(PosTex,ai);
             Vector3 A(a.x,a.y,a.z);
 
             int2 ex_pair = make_int2(-1,-1); 
@@ -812,9 +812,11 @@ __global__ void computeTabulatedKernel(
 #endif
 //#if 0
 template<const int BlockSize>
-__global__ void computeTabulatedKernel(Vector3* force, const Vector3* __restrict__ pos, const BaseGrid* __restrict__ sys, 
+__global__ void computeTabulatedKernel(Vector3* force, const BaseGrid* __restrict__ sys, 
                                        float cutoff2, const int* __restrict__ g_numPairs, const int2* __restrict__ g_pair, 
-                                       const int* __restrict__ g_pairTabPotType, TabulatedPotential** __restrict__ tablePot)
+                                       const int* __restrict__ g_pairTabPotType, TabulatedPotential** __restrict__ tablePot,
+				       cudaTextureObject_t pairListsTex, cudaTextureObject_t PosTex, cudaTextureObject_t pairTabPotTypeTex
+    )
 {
     const int numPairs = *g_numPairs;
     const int tid = threadIdx.x + blockDim.x * threadIdx.y
@@ -826,7 +828,7 @@ __global__ void computeTabulatedKernel(Vector3* force, const Vector3* __restrict
     for (int i = tid; i < numPairs; i += TotalThreads) 
     {
         //int2 pair = g_pair[i];
-        int2 pair = tex1Dfetch(pairListsTex,i);
+        int2 pair = tex1Dfetch<int2>(pairListsTex,i);
         //int  ind  = tex1Dfetch(pairTabPotTypeTex,i); 
 
         int ai = pair.x;
@@ -834,12 +836,12 @@ __global__ void computeTabulatedKernel(Vector3* force, const Vector3* __restrict
                         
         //int ind = g_pairTabPotType[i];
 
-        Vector3 a(tex1Dfetch(PosTex, ai));
-        Vector3 b(tex1Dfetch(PosTex, aj));
+        Vector3 a(tex1Dfetch<float4>(PosTex, ai));
+        Vector3 b(tex1Dfetch<float4>(PosTex, aj));
         Vector3 dr = sys->wrapDiff(b-a);
         
         float d2 = dr.length2();
-        int  ind  = tex1Dfetch(pairTabPotTypeTex,i);
+        int  ind  = tex1Dfetch<int>(pairTabPotTypeTex,i);
         if (tablePot[ind] != NULL && d2 <= cutoff2) 
         {
             Vector3 f = tablePot[ind]->computef(dr,d2);
diff --git a/src/ComputeForce.h b/src/ComputeForce.h
index a523404f3cd5abf04af60cb35df4bd3ea6eed1b1..c58b997ab5d58b2ca413343f15bb57269a782618 100644
--- a/src/ComputeForce.h
+++ b/src/ComputeForce.h
@@ -218,17 +218,23 @@ private:
 	// Pairlists
 	float pairlistdist2;
 	int2 *pairLists_d;
+	cudaTextureObject_t pairLists_tex;
+
 	int *pairTabPotType_d;
+	cudaTextureObject_t pairTabPotType_tex;
 
 	int *numPairs_d;
 
         //Han-Yi Chou
         int *CellNeighborsList;	
 	//MLog: List of variables that need to be moved over to ComputeForce class. Members of this list will be set to static to avoid large alterations in working code, thereby allowing us to access these variables easily.
+	cudaTextureObject_t neighbors_tex;
+
 	//BrownianParticleType* part;
 	//float electricConst;
 	//int fullLongRange;
 	Vector3* pos_d;
+	cudaTextureObject_t pos_tex;
         Vector3* mom_d;
         float*   ran_d;
 	Vector3* forceInternal_d;
diff --git a/src/Makefile b/src/Makefile
index 2ada08a70a232651b7218a14c144816534448e0a..3c94c5370bc7cc3a064e8a260a2d577a994d537e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,7 +4,7 @@ INCLUDE = $(CUDA_PATH)/include
 
 
 ifeq ($(dbg),1)
-	NV_FLAGS += -g -G 
+	NV_FLAGS += -g -G -O0
 	EX_FLAGS = -g -m$(OS_SIZE)
 else
 	NV_FLAGS = -Xptxas -O3
@@ -31,7 +31,7 @@ CC_FLAGS += -DVERSION="\"$(VERSION)\"" -DSIGNAL
 
 
 CC_FLAGS += -I$(CUDA_PATH)/include
-CC_FLAGS += -Wall -Wno-write-strings -std=c++0x -pedantic# TODO: test on Mac OSX and other architectures
+CC_FLAGS += -Wall -Wno-write-strings -std=c++14 -pedantic# TODO: test on Mac OSX and other architectures
 ifeq ($(dbg),1)
 NV_FLAGS += -lineinfo
 else
diff --git a/src/TabulatedPotential.h b/src/TabulatedPotential.h
index 807db9e95adc3cc08a400d2ff3d3bc955344189e..e409bdb0283228a6bc5ba8485c06535a7dcc986c 100644
--- a/src/TabulatedPotential.h
+++ b/src/TabulatedPotential.h
@@ -282,20 +282,21 @@ public:
     template <bool is_periodic>
 	HOST DEVICE inline float2 linearly_interpolate(float x, float start=0.0f) const {
 	float w = (x - start) * step_inv;
-	int home = int( floorf(w) );
-	w = w - home;
+	int signed_home = int( floorf(w) );
+	w = w - signed_home;
 	// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
-	if (home < 0) {
-	    if (is_periodic) home += size;
+	if (signed_home < 0) {
+	    if (is_periodic) signed_home += size;
 	    else return make_float2(pot[0],0.0f);
 	}
-	else if (home >= size) {
+	unsigned int home = signed_home;
+	if (home >= size) {
 	    if (is_periodic) home -= size;
 	    else return make_float2(pot[size-1],0.0f);
 	}
 
 	float u0 = pot[home];
-	float du = home+1 < size ? pot[home+1]-u0 : is_periodic ? pot[0]-u0 : 0;
+	float du = ((unsigned int) home+1) < size ? pot[home+1]-u0 : is_periodic ? pot[0]-u0 : 0;
 
 	return make_float2(du*w+u0, du*step_inv);
     }