diff --git a/mrdna/readers/.nfs000000000007668300005e88 b/mrdna/readers/.nfs000000000007668300005e88 new file mode 100644 index 0000000000000000000000000000000000000000..ed1479fd8b315ab84d5703322bf45d99a30438d1 Binary files /dev/null and b/mrdna/readers/.nfs000000000007668300005e88 differ diff --git a/mrdna/readers/segmentmodel_from_cadnano.bak.py b/mrdna/readers/segmentmodel_from_cadnano.bak.py new file mode 100644 index 0000000000000000000000000000000000000000..bfb56d9964fe0a45b3f72d62a8e833452892e79a --- /dev/null +++ b/mrdna/readers/segmentmodel_from_cadnano.bak.py @@ -0,0 +1,225 @@ +# -*- coding: utf-8 -*- +import numpy as np +import os,sys +from glob import glob +import re +import pandas as pd +pd.options.mode.chained_assignment = None # default='warn' +from .segmentmodel_from_lists import model_from_basepair_stack_3prime + +from ..arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis +from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment +from ..model.dna_sequence import m13 as m13seq +import json +import re +import pdb + + + +## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined +## TODO: catch circular strands in "get_5prime" cadnano calls +## TODO: handle special motifs +## - doubly-nicked helices +## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix) +## - circular constructs + + +def get_lattice(part): + lattice_type = None + _gt = part.getGridType() + try: + lattice_type = _gt.name.lower() + except: + if _gt == 1: + lattice_type = 'square' + elif _gt == 2: + lattice_type = 'honeycomb' + else: + print(lattice_type) + return lattice_type + + +def read_json_file(filename): + import cadnano + from cadnano.document import Document + + try: + with open(filename) as ch: + json_data = json.load(ch) + except: + with open(filename) as ch: + content = "" + for l in ch: + l = re.sub(r"'", r'"', l) + # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name + # l = re.sub(r"{\s*(\w)", r'{"\1', l) + # l = re.sub(r",\s*(\w)", r',"\1', l) + # l = re.sub(r"(\w):", r'\1":', l) + content += l+"\n" + json_data = json.loads(content) + + try: + doc = Document() + cadnano.fileio.v3decode.decode(doc, json_data) + decoder = 3 + except: + doc = Document() + cadnano.fileio.v2decode.decode(doc, json_data) + decoder = 2 + + parts = [p for p in doc.getParts()] + if len(parts) != 1: + raise Exception("Only documents containing a single cadnano part are implemented at this time.") + part = parts[0] + + if decoder == 2: + """ It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here """ + l = get_lattice(part) + if l == 'square': + for id_num in part.getIdNums(): + if part.vh_properties.loc[id_num,'eulerZ'] == 0: + part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5) + df=pd.DataFrame(json_data["vstrands"]) + n_df=df.set_index("num") + return part + +def get_helix_angle(part, helix_id, indices): + """ Get "start_orientation" for helix """ + # import ipdb + # ipdb.set_trace() + + """ FROM CADNANO2.5 + + angle is CCW + - angle is CW + Right handed DNA rotates clockwise from 5' to 3' + we use the convention the 5' end starts at 0 degrees + and it's pair is minor_groove_angle degrees away + direction, hence the minus signs. eulerZ + """ + + hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id, + ['helical_pitch', + 'bases_per_repeat', + 'turns_per_repeat', + 'eulerZ', + 'minor_groove_angle']] + twist_per_base = tpr*360./bpr + # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180 + angle = eulerZ + twist_per_base*indices - 0.5*mgroove + return rotationAboutAxis(np.array((0,0,1)),angle) + +def gen_id_series(strand,part): + df=pd.DataFrame(columns=["vh","zid","fwd","stack_tuple","threeprime_tuple","x","y","z"],index=range(strand.totalLength()),dtype=object) + df["vh"]=strand._id_num + df["fwd"]=strand.isForward() + df["x"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10 + df["y"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10 + id_lo,id_hi=strand.idxs() + zids=[str(i) for i in range(id_lo,id_hi+1)] + insert_dict={} + insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()]) + z=np.arange(id_lo,id_hi+1) + zids=[str(i) for i in range(id_lo,id_hi+1)] + z=list(np.arange(id_lo,id_hi+1)) + zids=[str(i) for i in range(id_lo,id_hi+1)] + for insert_base in insert_dict: + z_ind=zids.index(str(insert_base)) + z_val=insert_dict[insert_base] + z_pos_ind=z.index(insert_base) + zids.pop(z_ind) + z.pop(z_pos_ind) + if z_val!=-1: + #l=[str(insert_base)+"."+str(i) for i in range(z_val+1)] + l=list(range(z_val+1)) + l.reverse() + for k in l: + zids.insert(z_ind,str(insert_base)+"."+str(k)) + z.insert(z_pos_ind,insert_base+k/(z_val+1)) + df["zid"]=zids + df["z"]=np.array(z)*3.4 + + + L=[(df["vh"][i],df["zid"][i],df["fwd"][i]) for i in df.index] + if strand.isForward()==True: + df["stack_tuple"]=L[1:]+[-1] + if strand.connection3p() is None: + df["threeprime_tuple"]=L[1:]+[-1] + else: + df["threeprime_tuple"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())] + + + else: + df["stack_tuple"]=[-1]+L[0:-1] + if strand.connection3p() is None: + df["threeprime_tuple"]=[-1]+L[0:-1] + else: + df["threeprime_tuple"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1] + ## cadnano 3.1 sequence assign is wrong if there is insertion or deletion. + df["r"]=[np.array([df["x"][i],df["y"][i],df["z"][i]],dtype=np.float32) for i in df.index] + + return [pd.Series(df.loc[i]) for i in df.index] + + +def gen_prop_table(part): + strand_set=[] + for i in part.getidNums(): + fwd,rev=part.getStrandSets(i) + [strand_set.append(i) for i in fwd.strands()] + [strand_set.append(i) for i in rev.strands()] + id_series=[] + for i in strand_set: + id_series=id_series+gen_id_series(i,part) + + nt_prop=pd.DataFrame(id_series) + nt_prop.reset_index(inplace=True) + nt_prop["seq"]=-1 + ind_tuple=list(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"])) + stacks=[] + for i in list(nt_prop["stack_tuple"]): + if i ==-1: + stacks.append(i) + else: + stacks.append(ind_tuple.index(i)) + nt_prop["stack"]=stacks + tprime=[] + for i in list(nt_prop["threeprime_tuple"]): + if i ==-1: + tprime.append(i) + else: + tprime.append(ind_tuple.index(i)) + nt_prop["threeprime"]=tprime + vhzid=list(zip(nt_prop["vh"],nt_prop["zid"])) + nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid] + nt_prop=nt_prop.fillna(-1) + counter=-1 + bp=-np.ones(len(nt_prop.index),dtype=int) + bp_map=dict(zip(ind_tuple,nt_prop.index)) + for i,j,k in ind_tuple: + counter+=1 + try: + bp[counter]=bp_map[(i,j,not(k))] + except: + pass + nt_prop["bp"]=bp + + return nt_prop + +def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters): + part=read_json_file(json_file) + + nt_prop=gen_prop_table(part) + + + if seq is None: + if nt_prop["seq"][0]==-1: + seq=None + else: + seq=nt_prop["seq"] + + r=np.array(list(nt_prop['r'])) + bp=np.array(list(nt_prop['bp'])) + three_prime=np.array((list(nt_prop["threeprime"]))) + stack=np.array((list(nt_prop["stack"]))) + orientation=np.array(list(nt_prop["orientation"])) + model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters ) + return model diff --git a/mrdna/readers/segmentmodel_from_scadnano.py b/mrdna/readers/segmentmodel_from_scadnano.py index 7311227e03cba381200fa20bf36cad3122c4830b..da2d4c4fe4ab984432f6bf414b6ca002e9496590 100644 --- a/mrdna/readers/segmentmodel_from_scadnano.py +++ b/mrdna/readers/segmentmodel_from_scadnano.py @@ -85,9 +85,12 @@ def gen_strand(strand,design,group_index=0): strand_df["threeprime"]=list(strand_df.index[1:]+group_index)+[-1] stack=np.where(strand_df["stacked"],strand_df["threeprime"],-1) strand_df["stack"]=stack - if len(strand.dna_sequence)==len(strand_df.index): - strand_df["seq"]=list(strand.dna_sequence) - else: + try: + if len(strand.dna_sequence)==len(strand_df.index): + strand_df["seq"]=list(strand.dna_sequence) + else: + strand_df["seq"]=-1 + except: strand_df["seq"]=-1 strand_df["index"]=strand_df.index+group_index #strand_df["seg_index"]=index @@ -128,10 +131,12 @@ def mrdna_model_from_scadnano(sc_file,seq=None,**model_parameters): bp=np.array(list(nt_prop['bp'])) three_prime=np.array((list(nt_prop["threeprime"]))) stack=np.array((list(nt_prop["stack"]))) + if nt_prop["seq"][0]==-1: seq=None else: seq=nt_prop["seq"] + orientation=[rotationAboutAxis(np.array((0,0,1)),i) for i in list(nt_prop["orientation_angle"])] model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters ) model._dataframe=nt_prop