diff --git a/.gitignore b/.gitignore index b2f9571aa2b37baf9283524aacdb0cfb962d921b..e627a39e6ee07c986b54975c9346902de6dc2f4e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,12 @@ /*.txt /*.psf /*.pdb -/*.bd \ No newline at end of file +/*.exb +/*.bd +/*.namd + +output +potentials + +__pycache__ +.backup diff --git a/CanonicalNucleotideAtoms.py b/CanonicalNucleotideAtoms.py index 29ffd4810f499ebf615e9838656a288d5aebd897..fb31fa8de77098a4a5ef68a7c53a877e4508bc81 100644 --- a/CanonicalNucleotideAtoms.py +++ b/CanonicalNucleotideAtoms.py @@ -17,7 +17,8 @@ def stringToIntTuples(string, tupleLen, offset): return ret class CanonicalNucleotide(): - DefaultOrientation = rotationAboutAxis([0,0,1], 68) + # DefaultOrientation = rotationAboutAxis([0,0,1], 68) + DefaultOrientation = rotationAboutAxis([0,0,1], 68+180) def __init__(self, prefix): d = np.loadtxt("%s.dat" % prefix, dtype='i4,S4,S4,f4,f4,f4,f4,f4') @@ -60,10 +61,18 @@ class CanonicalNucleotide(): ## Apply transformation for i in range(len(c)): rvec = np.array([c.props[k][i] for k in ('x','y','z')]) - rvec = (scale*rvec).dot( R ) + rvec = rvec.dot( R ) for k,r,o in zip(('x','y','z'), rvec, translation): c.props[k][i] = r + o + if scale != 1: + i0 = self.index("C3'") + r0 = np.array([c.props[k][i0] for k in ('x','y','z')]) + for i in range(len(c)): + rvec = np.array([c.props[k][i] for k in ('x','y','z')]) + rvec = scale*(rvec-r0) + r0 + for k,r in zip(('x','y','z'), rvec): + c.props[k][i] = r return c # def __iter__(self): diff --git a/atomicModel.py b/atomicModel.py index 243002dd72bd5bd133acfdca3e4a636b62ec01c0..63542339ca01b4a29f4a20fd83c933f33c2c98ea 100644 --- a/atomicModel.py +++ b/atomicModel.py @@ -220,8 +220,8 @@ class atomicModel(): random.seed(randomSeed) + self.latticeType = atomicModel._determineLatticeType(part) self._buildModel(part) - self.latticeType = self._determineLatticeType(part) def _helixStrandsToEnds(self, helixStrands): @@ -307,7 +307,8 @@ class atomicModel(): ## Loop over oligos/segments segments = [] ntCount = 0 - for oligo in part.oligos(): + keyFn = lambda o: (o.length(), o._strand5p.idNum(), o._strand5p.idx5Prime()) + for oligo in reversed(sorted(part.oligos(), key=keyFn)): # if oligo.isLoop(): # # print("A loop exists") # raise Exception("Loop strands are not supported") @@ -334,6 +335,13 @@ class atomicModel(): bpr,tpr,eulerZ,z = [vh_props[k][hid] for k in keys] twist_per_base = tpr*360./bpr + ## override twist_per_base if square lattice + if self.latticeType == "square": + # assert(twist_per_base == 360.0*3/32) + twist_per_base == 360.0*3/32 + else: + assert(twist_per_base == 360.0/10.5) + zIdxToPos = lambda idx: (x*10,y*10,z-3.4*idx) zIdxToAngle = lambda idx: idx*twist_per_base + eulerZ @@ -448,13 +456,18 @@ class atomicModel(): node1.addXover(node2, isFwd1) node2.addXover(node1, isFwd2) - def _determineLatticeType(self, part): + def _getNeighborHelixDict(part): props, origins = part.helixPropertiesAndOrigins() - origins = [np.array(o) for o in origins] + neighborHelices = dict() + for i in range(len(origins)): + neighborHelices[i] = [int(j.strip('[,]')) for j in props['neighbors'][i].split() if j != '[]'] + return neighborHelices + def _determineLatticeType(part): + props, origins = part.helixPropertiesAndOrigins() + origins = [np.array(o) for o in origins] neighborVecs = [] - for i in range(len(props['neighbors'])): - js = [int(j.strip('[,]')) for j in props['neighbors'][i].split()] + for i,js in atomicModel._getNeighborHelixDict(part).items(): for j in js: neighborVecs.append( origins[j]-origins[i] ) @@ -591,10 +604,12 @@ class atomicModel(): def setScaffoldSeq(self,sequence): ## get longest segment - segs = sort( [(len(s),s) for s in self.segments], key=lambda x: x[0] ) + segs = sorted( [(len(s),s) for s in self.segments], key=lambda x: x[0] ) seg = segs[-1][1] assert( len(seg) > len(segs[-2][1]) ) - assert( len(sequence) >= len(seg) ) + if len(seg) < len(sequence): + raise Exception("Sequence (%d) too short for scaffold (%d)!" % (len(sequence),len(seg))) + for nt,seq in zip(seg,sequence): assert(seq in ("A","T","C","G")) nt.sequence = seq @@ -925,14 +940,12 @@ class atomicModel(): raise Exception("Not implemented") else: props, origins = self.part.helixPropertiesAndOrigins() - neighborHelices = dict() - for hid in helixIds: - neighborHelices[hid] = [int(j.strip('[,]')) for j in props['neighbors'][hid].split()] + neighborHelices = atomicModel._getNeighborHelixDict(self.part) if self.latticeType == 'honeycomb': - minDist = 7 + minDist = 8 # TODO: test against server elif self.latticeType == 'square': - minDist = 10 + minDist = 11 for seg in self.segments: for nt in seg: diff --git a/resources/cadnano2pdb.seq.m13mp18.dat b/resources/cadnano2pdb.seq.m13mp18.dat new file mode 100644 index 0000000000000000000000000000000000000000..edfc67497262cd16c2ffc711859b0231deb96549 --- /dev/null +++ b/resources/cadnano2pdb.seq.m13mp18.dat @@ -0,0 +1,122 @@ +;m13mp18 from http://genome-www.stanford.edu/vectordb/vector_descrip/COMPLETE/M13MP18.SEQ.html +aatgctacta ctattagtag aattgatgcc accttttcag ctcgcgcccc aaatgaaaat +atagctaaac aggttattga ccatttgcga aatgtatcta atggtcaaac taaatctact +cgttcgcaga attgggaatc aactgttaca tggaatgaaa cttccagaca ccgtacttta +gttgcatatt taaaacatgt tgagctacag caccagattc agcaattaag ctctaagcca +tccgcaaaaa tgacctctta tcaaaaggag caattaaagg tactctctaa tcctgacctg +ttggagtttg cttccggtct ggttcgcttt gaagctcgaa ttaaaacgcg atatttgaag +tctttcgggc ttcctcttaa tctttttgat gcaatccgct ttgcttctga ctataatagt +cagggtaaag acctgatttt tgatttatgg tcattctcgt tttctgaact gtttaaagca +tttgaggggg attcaatgaa tatttatgac gattccgcag tattggacgc tatccagtct +aaacatttta ctattacccc ctctggcaaa acttcttttg caaaagcctc tcgctatttt +ggtttttatc gtcgtctggt aaacgagggt tatgatagtg ttgctcttac tatgcctcgt +aattcctttt ggcgttatgt atctgcatta gttgaatgtg gtattcctaa atctcaactg +atgaatcttt ctacctgtaa taatgttgtt ccgttagttc gttttattaa cgtagatttt +tcttcccaac gtcctgactg gtataatgag ccagttctta aaatcgcata aggtaattca +caatgattaa agttgaaatt aaaccatctc aagcccaatt tactactcgt tctggtgttc +tcgtcagggc aagccttatt cactgaatga gcagctttgt tacgttgatt tgggtaatga +atatccggtt cttgtcaaga ttactcttga tgaaggtcag ccagcctatg cgcctggtct +gtacaccgtt catctgtcct ctttcaaagt tggtcagttc ggttccctta tgattgaccg +tctgcgcctc gttccggcta agtaacatgg agcaggtcgc ggatttcgac acaatttatc +aggcgatgat acaaatctcc gttgtacttt gtttcgcgct tggtataatc gctgggggtc +aaagatgagt gttttagtgt attctttcgc ctctttcgtt ttaggttggt gccttcgtag +tggcattacg tattttaccc gtttaatgga aacttcctca tgaaaaagtc tttagtcctc +aaagcctctg tagccgttgc taccctcgtt ccgatgctgt ctttcgctgc tgagggtgac +gatcccgcaa aagcggcctt taactccctg caagcctcag cgaccgaata tatcggttat +gcgtgggcga tggttgttgt cattgtcggc gcaactatcg gtatcaagct gtttaagaaa +ttcacctcga aagcaagctg ataaaccgat acaattaaag gctccttttg gagccttttt +ttttggagat tttcaacgtg aaaaaattat tattcgcaat tcctttagtt gttcctttct +attctcactc cgctgaaact gttgaaagtt gtttagcaaa accccataca gaaaattcat +ttactaacgt ctggaaagac gacaaaactt tagatcgtta cgctaactat gagggttgtc +tgtggaatgc tacaggcgtt gtagtttgta ctggtgacga aactcagtgt tacggtacat +gggttcctat tgggcttgct atccctgaaa atgagggtgg tggctctgag ggtggcggtt +ctgagggtgg cggttctgag ggtggcggta ctaaacctcc tgagtacggt gatacaccta +ttccgggcta tacttatatc aaccctctcg acggcactta tccgcctggt actgagcaaa +accccgctaa tcctaatcct tctcttgagg agtctcagcc tcttaatact ttcatgtttc +agaataatag gttccgaaat aggcaggggg cattaactgt ttatacgggc actgttactc +aaggcactga ccccgttaaa acttattacc agtacactcc tgtatcatca aaagccatgt +atgacgctta ctggaacggt aaattcagag actgcgcttt ccattctggc tttaatgaag +atccattcgt ttgtgaatat caaggccaat cgtctgacct gcctcaacct cctgtcaatg +ctggcggcgg ctctggtggt ggttctggtg gcggctctga gggtggtggc tctgagggtg +gcggttctga gggtggcggc tctgagggag gcggttccgg tggtggctct ggttccggtg +attttgatta tgaaaagatg gcaaacgcta ataagggggc tatgaccgaa aatgccgatg +aaaacgcgct acagtctgac gctaaaggca aacttgattc tgtcgctact gattacggtg +ctgctatcga tggtttcatt ggtgacgttt ccggccttgc taatggtaat ggtgctactg +gtgattttgc tggctctaat tcccaaatgg ctcaagtcgg tgacggtgat aattcacctt +taatgaataa tttccgtcaa tatttacctt ccctccctca atcggttgaa tgtcgccctt +ttgtctttag cgctggtaaa ccatatgaat tttctattga ttgtgacaaa ataaacttat +tccgtggtgt ctttgcgttt cttttatatg ttgccacctt tatgtatgta ttttctacgt +ttgctaacat actgcgtaat aaggagtctt aatcatgcca gttcttttgg gtattccgtt +attattgcgt ttcctcggtt tccttctggt aactttgttc ggctatctgc ttacttttct +taaaaagggc ttcggtaaga tagctattgc tatttcattg tttcttgctc ttattattgg +gcttaactca attcttgtgg gttatctctc tgatattagc gctcaattac cctctgactt +tgttcagggt gttcagttaa ttctcccgtc taatgcgctt ccctgttttt atgttattct +ctctgtaaag gctgctattt tcatttttga cgttaaacaa aaaatcgttt cttatttgga +ttgggataaa taatatggct gtttattttg taactggcaa attaggctct ggaaagacgc +tcgttagcgt tggtaagatt caggataaaa ttgtagctgg gtgcaaaata gcaactaatc +ttgatttaag gcttcaaaac ctcccgcaag tcgggaggtt cgctaaaacg cctcgcgttc +ttagaatacc ggataagcct tctatatctg atttgcttgc tattgggcgc ggtaatgatt +cctacgatga aaataaaaac ggcttgcttg ttctcgatga gtgcggtact tggtttaata +cccgttcttg gaatgataag gaaagacagc cgattattga ttggtttcta catgctcgta +aattaggatg ggatattatt tttcttgttc aggacttatc tattgttgat aaacaggcgc +gttctgcatt agctgaacat gttgtttatt gtcgtcgtct ggacagaatt actttacctt +ttgtcggtac tttatattct cttattactg gctcgaaaat gcctctgcct aaattacatg +ttggcgttgt taaatatggc gattctcaat taagccctac tgttgagcgt tggctttata +ctggtaagaa tttgtataac gcatatgata ctaaacaggc tttttctagt aattatgatt +ccggtgttta ttcttattta acgccttatt tatcacacgg tcggtatttc aaaccattaa +atttaggtca gaagatgaaa ttaactaaaa tatatttgaa aaagttttct cgcgttcttt +gtcttgcgat tggatttgca tcagcattta catatagtta tataacccaa cctaagccgg +aggttaaaaa ggtagtctct cagacctatg attttgataa attcactatt gactcttctc +agcgtcttaa tctaagctat cgctatgttt tcaaggattc taagggaaaa ttaattaata +gcgacgattt acagaagcaa ggttattcac tcacatatat tgatttatgt actgtttcca +ttaaaaaagg taattcaaat gaaattgtta aatgtaatta attttgtttt cttgatgttt +gtttcatcat cttcttttgc tcaggtaatt gaaatgaata attcgcctct gcgcgatttt +gtaacttggt attcaaagca atcaggcgaa tccgttattg tttctcccga tgtaaaaggt +actgttactg tatattcatc tgacgttaaa cctgaaaatc tacgcaattt ctttatttct +gttttacgtg ctaataattt tgatatggtt ggttcaattc cttccataat tcagaagtat +aatccaaaca atcaggatta tattgatgaa ttgccatcat ctgataatca ggaatatgat +gataattccg ctccttctgg tggtttcttt gttccgcaaa atgataatgt tactcaaact +tttaaaatta ataacgttcg ggcaaaggat ttaatacgag ttgtcgaatt gtttgtaaag +tctaatactt ctaaatcctc aaatgtatta tctattgacg gctctaatct attagttgtt +agtgcaccta aagatatttt agataacctt cctcaattcc tttctactgt tgatttgcca +actgaccaga tattgattga gggtttgata tttgaggttc agcaaggtga tgctttagat +ttttcatttg ctgctggctc tcagcgtggc actgttgcag gcggtgttaa tactgaccgc +ctcacctctg ttttatcttc tgctggtggt tcgttcggta tttttaatgg cgatgtttta +gggctatcag ttcgcgcatt aaagactaat agccattcaa aaatattgtc tgtgccacgt +attcttacgc tttcaggtca gaagggttct atctctgttg gccagaatgt cccttttatt +actggtcgtg tgactggtga atctgccaat gtaaataatc catttcagac gattgagcgt +caaaatgtag gtatttccat gagcgttttt cctgttgcaa tggctggcgg taatattgtt +ctggatatta ccagcaaggc cgatagtttg agttcttcta ctcaggcaag tgatgttatt +actaatcaaa gaagtattgc tacaacggtt aatttgcgtg atggacagac tcttttactc +ggtggcctca ctgattataa aaacacttct caagattctg gcgtaccgtt cctgtctaaa +atccctttaa tcggcctcct gtttagctcc cgctctgatt ccaacgagga aagcacgtta +tacgtgctcg tcaaagcaac catagtacgc gccctgtagc ggcgcattaa gcgcggcggg +tgtggtggtt acgcgcagcg tgaccgctac acttgccagc gccctagcgc ccgctccttt +cgctttcttc ccttcctttc tcgccacgtt cgccggcttt ccccgtcaag ctctaaatcg +ggggctccct ttagggttcc gatttagtgc tttacggcac ctcgacccca aaaaacttga +tttgggtgat ggttcacgta gtgggccatc gccctgatag acggtttttc gccctttgac +gttggagtcc acgttcttta atagtggact cttgttccaa actggaacaa cactcaaccc +tatctcgggc tattcttttg atttataagg gattttgccg atttcggaac caccatcaaa +caggattttc gcctgctggg gcaaaccagc gtggaccgct tgctgcaact ctctcagggc +caggcggtga agggcaatca gctgttgccc gtctcgctgg tgaaaagaaa aaccaccctg +gcgcccaata cgcaaaccgc ctctccccgc gcgttggccg attcattaat gcagctggca +cgacaggttt cccgactgga aagcgggcag tgagcgcaac gcaattaatg tgagttagct +cactcattag gcaccccagg ctttacactt tatgcttccg gctcgtatgt tgtgtggaat +tgtgagcgga taacaatttc acacaggaaa cagctatgac catgattacg aattcgagct +cggtacccgg ggatcctcta gagtcgacct gcaggcatgc aagcttggca ctggccgtcg +ttttacaacg tcgtgactgg gaaaaccctg gcgttaccca acttaatcgc cttgcagcac +atcccccttt cgccagctgg cgtaatagcg aagaggcccg caccgatcgc ccttcccaac +agttgcgcag cctgaatggc gaatggcgct ttgcctggtt tccggcacca gaagcggtgc +cggaaagctg gctggagtgc gatcttcctg aggccgatac ggtcgtcgtc ccctcaaact +ggcagatgca cggttacgat gcgcccatct acaccaacgt aacctatccc attacggtca +atccgccgtt tgttcccacg gagaatccga cgggttgtta ctcgctcaca tttaatgttg +atgaaagctg gctacaggaa ggccagacgc gaattatttt tgatggcgtt cctattggtt +aaaaaatgag ctgatttaac aaaaatttaa cgcgaatttt aacaaaatat taacgtttac +aatttaaata tttgcttata caatcttcct gtttttgggg cttttctgat tatcaaccgg +ggtacatatg attgacatgc tagttttacg attaccgttc atcgattctc ttgtttgctc +cagactctca ggcaatgacc tgatagcctt tgtagatctc tcaaaaatag ctaccctctc +cggcattaat ttatcagcta gaacggttga atatcatatt gatggtgatt tgactgtctc +cggcctttct cacccttttg aatctttacc tacacattac tcaggcattg catttaaaat +atatgagggt tctaaaaatt tttatccttg cgttgaaata aaggcttctc ccgcaaaagt +attacagggt cataatgttt ttggtacaac cgatttagct ttatgctctg aggctttatt +gcttaatttt gctaattctt tgccttgcct gtatgattta ttggatgtt diff --git a/run.py b/run.py index 0eb760c6adf031ff448befff8a2d0fa2024de5b9..19c6006fb0f43110e4fad1cfc0b49cfd03eb0797 100644 --- a/run.py +++ b/run.py @@ -14,7 +14,25 @@ import sys # twistLp = 1e-6 twistLp = 90 -def simulateDesign(inFile, outPrefix, gpu=0, shiftEulerZ=0): + +def readSequenceFile(sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'): + seq = [] + with open(sequenceFile) as ch: + for l in ch: + l = l.strip().replace(" ", "") + if l[0] in (";","#"): continue + seq.extend([c.upper() for c in l]) + return seq + +def readArbdCoords(fname): + coords = [] + with open(fname) as fh: + for line in fh: + coords.append([float(x) for x in line.split()[1:]]) + return np.array(coords) + +def simulateDesign(inFile, outPrefix, numCoarsestSteps=5000000, gpu=0, shiftEulerZ=0, + sequenceFile='resources/cadnano2pdb.seq.m13mp18.dat'): # ----------------- # # Read cadnano file # # ----------------- # @@ -43,14 +61,6 @@ def simulateDesign(inFile, outPrefix, gpu=0, shiftEulerZ=0): cadnano.fileio.v2decode.decode(doc, data) - def readArbdCoords(fname): - coords = [] - with open(fname) as fh: - for line in fh: - coords.append([float(x) for x in line.split()[1:]]) - return np.array(coords) - - parts = [p for p in doc.getParts()] if len(parts) != 1: raise Exception("Only documents containing a single 'part' are implemented at this time.") @@ -77,7 +87,7 @@ def simulateDesign(inFile, outPrefix, gpu=0, shiftEulerZ=0): finerModel = beadModelTwist(part, twistPersistenceLength=twistLp, maxBpsPerDNode=1, maxNtsPerSNode=1) - finestModel = atomicModel(part, ntScale=1.0) + finestModel = atomicModel(part, ntScale=0.5) models = [coarsestModel, coarseModel, fineModel, finerModel, finestModel] @@ -123,7 +133,7 @@ def simulateDesign(inFile, outPrefix, gpu=0, shiftEulerZ=0): # ------------------------------- # out = outPrefix + "-1" - coarsestModel.simulate(out, outputDirectory='output', numSteps=15000000, timestep=200e-6, + coarsestModel.simulate(out, outputDirectory='output', numSteps=numCoarsestSteps, timestep=200e-6, arbd="/home/cmaffeo2/development/cuda/arbd.dev/src/arbd", gpu=gpu) coarsestModelCoords = readArbdCoords( "output/%s.0.restart" % out ) @@ -131,7 +141,7 @@ def simulateDesign(inFile, outPrefix, gpu=0, shiftEulerZ=0): coarseModel.backmap( coarsestModel, coarsestModelCoords, dsDnaHelixNeighborDist=100, dsDnaAllNeighborDist=50, ssDnaHelixNeighborDist=50) - coarseModel.simulate( out, outputDirectory='output', numSteps=1000000, + coarseModel.simulate( out, outputDirectory='output', numSteps=100000, arbd="/home/cmaffeo2/development/cuda/arbd.dev/src/arbd", gpu=gpu) coarseModelCoords = readArbdCoords( "output/%s.0.restart" % out ) @@ -139,7 +149,7 @@ def simulateDesign(inFile, outPrefix, gpu=0, shiftEulerZ=0): fineModel.backmap( coarseModel, coarseModelCoords, dsDnaHelixNeighborDist=100, dsDnaAllNeighborDist=50, ssDnaHelixNeighborDist=50) - fineModel.simulate( out, outputDirectory='output', numSteps=1000000, + fineModel.simulate( out, outputDirectory='output', numSteps=100000, arbd="/home/cmaffeo2/development/cuda/arbd.dev/src/arbd", gpu=gpu) fineModelCoords = readArbdCoords( "output/%s.0.restart" % out ) @@ -147,21 +157,21 @@ def simulateDesign(inFile, outPrefix, gpu=0, shiftEulerZ=0): finerModel.backmap( fineModel, fineModelCoords, dsDnaHelixNeighborDist=100, dsDnaAllNeighborDist=50, ssDnaHelixNeighborDist=50) - finerModel.simulate( out, outputDirectory='output', numSteps=1000000, + finerModel.simulate(out, outputDirectory='output', numSteps=100000, timestep=50e-6, arbd="/home/cmaffeo2/development/cuda/arbd.dev/src/arbd", gpu=gpu) finerModelCoords = readArbdCoords( "output/%s.0.restart" % out ) out = outPrefix + "-5" + finestModel.setScaffoldSeq(readSequenceFile(sequenceFile)) finestModel.randomizeUnassignedNtSeqs() + # finestModel.setUnassignedNtSeqs("T") finestModel.writePdbPsf( out + ".ideal" ) finestModel.backmap( finerModel, finerModelCoords ) - finestModel.writePdbPsf( out ) - + finestModel.writeNamdFiles( out ) # def simulateDesign(f,out,gpu=0,shiftEulerZ=180): # print(out) - if len(sys.argv) > 1: files = sys.argv[1:] else: @@ -169,9 +179,9 @@ else: print(files) -gpu=0 +gpu=1 for f in files[:]: # f = files print(f) out = re.match('json/(.*).json',f).group(1) - simulateDesign(f,out,gpu=gpu) + simulateDesign(f, out, numCoarsestSteps=5000000, gpu=gpu) diff --git a/test.py b/test.py deleted file mode 100644 index 8b15a12138c3ceffffd900402f3a9bdb6710b01a..0000000000000000000000000000000000000000 --- a/test.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- - -import json -import cadnano -from cadnano.document import Document -from encode import encodeDocument - -with open('input.json') as ch: - data = json.load(ch) - -try: - doc = Document() - cadnano.fileio.v3decode.decode(doc, data) -except: - doc = Document() - cadnano.fileio.v2decode.decode(doc, data) - -encodeDocument(doc)