add

08f835d2 · pinyili2 · 47aef9dc · 08f835d2 · 08f835d2 · 08f835d2
Commit 08f835d2 authored 7 months ago by pinyili2
--- a/mrdna/readers/.nfs000000000007668300005e88
+++ b/mrdna/readers/.nfs000000000007668300005e88
--- a/mrdna/readers/segmentmodel_from_cadnano.bak.py
+++ b/mrdna/readers/segmentmodel_from_cadnano.bak.py
+# -*- coding: utf-8 -*-
+import numpy as np
+import os,sys
+from glob import glob
+import re
+import pandas as pd
+pd.options.mode.chained_assignment = None  # default='warn'
+from .segmentmodel_from_lists import model_from_basepair_stack_3prime
+
+from ..arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis
+from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
+from ..model.dna_sequence import m13 as m13seq
+import json
+import re
+import pdb
+
+
+
+## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
+## TODO: catch circular strands in "get_5prime" cadnano calls
+## TODO: handle special motifs
+##   - doubly-nicked helices
+##   - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
+##   - circular constructs
+
+
+def get_lattice(part):
+    lattice_type = None
+    _gt = part.getGridType()
+    try:
+        lattice_type = _gt.name.lower()
+    except:
+        if _gt == 1:
+            lattice_type = 'square'
+        elif _gt == 2:
+            lattice_type = 'honeycomb'
+        else:
+            print(lattice_type)
+    return lattice_type
+
+
+def read_json_file(filename):
+    import cadnano
+    from cadnano.document import Document
+
+    try:
+        with open(filename) as ch:
+            json_data = json.load(ch)
+    except:
+        with open(filename) as ch:
+            content = ""
+            for l in ch:
+                l = re.sub(r"'", r'"', l)
+                # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
+                # l = re.sub(r"{\s*(\w)", r'{"\1', l)
+                # l = re.sub(r",\s*(\w)", r',"\1', l)
+                # l = re.sub(r"(\w):", r'\1":', l)
+                content += l+"\n"
+            json_data = json.loads(content)
+
+    try:
+        doc = Document()
+        cadnano.fileio.v3decode.decode(doc, json_data)
+        decoder = 3
+    except:
+        doc = Document()
+        cadnano.fileio.v2decode.decode(doc, json_data)
+        decoder = 2
+
+    parts = [p for p in doc.getParts()]
+    if len(parts) != 1:
+        raise Exception("Only documents containing a single cadnano part are implemented at this time.")
+    part = parts[0]
+
+    if decoder == 2:
+        """ It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here """
+        l = get_lattice(part)
+        if l == 'square':
+            for id_num in part.getIdNums():
+                if part.vh_properties.loc[id_num,'eulerZ'] == 0:
+                    part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)
+        df=pd.DataFrame(json_data["vstrands"])
+        n_df=df.set_index("num")
+    return part
+
+def get_helix_angle(part, helix_id, indices):
+    """ Get "start_orientation" for helix """
+        # import ipdb
+        # ipdb.set_trace()
+
+    """ FROM CADNANO2.5
+    + angle is CCW
+    - angle is CW
+    Right handed DNA rotates clockwise from 5' to 3'
+    we use the convention the 5' end starts at 0 degrees
+    and it's pair is minor_groove_angle degrees away
+    direction, hence the minus signs.  eulerZ
+    """
+
+    hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,
+                                                                    ['helical_pitch',
+                                                                     'bases_per_repeat',
+                                                                     'turns_per_repeat',
+                                                                     'eulerZ',
+                                                                     'minor_groove_angle']]
+    twist_per_base = tpr*360./bpr
+        # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180
+    angle = eulerZ + twist_per_base*indices - 0.5*mgroove
+    return rotationAboutAxis(np.array((0,0,1)),angle)
+
+def gen_id_series(strand,part):
+    df=pd.DataFrame(columns=["vh","zid","fwd","stack_tuple","threeprime_tuple","x","y","z"],index=range(strand.totalLength()),dtype=object)
+    df["vh"]=strand._id_num
+    df["fwd"]=strand.isForward()
+    df["x"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10
+    df["y"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10
+    id_lo,id_hi=strand.idxs()
+    zids=[str(i) for i in range(id_lo,id_hi+1)]
+    insert_dict={}
+    insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()])
+    z=np.arange(id_lo,id_hi+1)
+    zids=[str(i) for i in range(id_lo,id_hi+1)]
+    z=list(np.arange(id_lo,id_hi+1))
+    zids=[str(i) for i in range(id_lo,id_hi+1)]
+    for insert_base in insert_dict:
+        z_ind=zids.index(str(insert_base))
+        z_val=insert_dict[insert_base]
+        z_pos_ind=z.index(insert_base)
+        zids.pop(z_ind)
+        z.pop(z_pos_ind)
+        if z_val!=-1:
+            #l=[str(insert_base)+"."+str(i) for i in range(z_val+1)]
+            l=list(range(z_val+1))
+            l.reverse()
+            for k in l:                
+                zids.insert(z_ind,str(insert_base)+"."+str(k))
+                z.insert(z_pos_ind,insert_base+k/(z_val+1))
+    df["zid"]=zids
+    df["z"]=np.array(z)*3.4
+    
+    
+    L=[(df["vh"][i],df["zid"][i],df["fwd"][i]) for i in df.index]
+    if strand.isForward()==True:
+        df["stack_tuple"]=L[1:]+[-1]
+        if strand.connection3p() is None:
+            df["threeprime_tuple"]=L[1:]+[-1]
+        else:
+            df["threeprime_tuple"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]
+        
+    
+    else:
+        df["stack_tuple"]=[-1]+L[0:-1]
+        if strand.connection3p() is None:
+            df["threeprime_tuple"]=[-1]+L[0:-1]
+        else:
+            df["threeprime_tuple"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1]
+    ## cadnano 3.1 sequence assign is wrong if there is insertion or deletion. 
+    df["r"]=[np.array([df["x"][i],df["y"][i],df["z"][i]],dtype=np.float32) for i in df.index]
+    
+    return [pd.Series(df.loc[i]) for i in df.index]
+
+
+def gen_prop_table(part):
+    strand_set=[]
+    for i in part.getidNums():
+        fwd,rev=part.getStrandSets(i)
+        [strand_set.append(i) for i in fwd.strands()]
+        [strand_set.append(i) for i in rev.strands()]
+    id_series=[]
+    for i in strand_set:
+        id_series=id_series+gen_id_series(i,part)
+    
+    nt_prop=pd.DataFrame(id_series)
+    nt_prop.reset_index(inplace=True)
+    nt_prop["seq"]=-1
+    ind_tuple=list(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"]))
+    stacks=[]
+    for i in list(nt_prop["stack_tuple"]):
+        if i ==-1:
+            stacks.append(i)
+        else:
+            stacks.append(ind_tuple.index(i))
+    nt_prop["stack"]=stacks
+    tprime=[]
+    for i in list(nt_prop["threeprime_tuple"]):
+        if i ==-1:
+            tprime.append(i)
+        else:
+            tprime.append(ind_tuple.index(i))
+    nt_prop["threeprime"]=tprime
+    vhzid=list(zip(nt_prop["vh"],nt_prop["zid"]))
+    nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid]
+    nt_prop=nt_prop.fillna(-1)
+    counter=-1
+    bp=-np.ones(len(nt_prop.index),dtype=int)
+    bp_map=dict(zip(ind_tuple,nt_prop.index))
+    for i,j,k in ind_tuple:
+        counter+=1
+        try:
+            bp[counter]=bp_map[(i,j,not(k))]
+        except:
+            pass
+    nt_prop["bp"]=bp
+
+    return nt_prop
+
+def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters):
+    part=read_json_file(json_file)
+    
+    nt_prop=gen_prop_table(part)
+
+
+    if seq is None:
+        if nt_prop["seq"][0]==-1:
+            seq=None
+        else:
+            seq=nt_prop["seq"]
+    
+    r=np.array(list(nt_prop['r']))
+    bp=np.array(list(nt_prop['bp']))
+    three_prime=np.array((list(nt_prop["threeprime"])))
+    stack=np.array((list(nt_prop["stack"])))
+    orientation=np.array(list(nt_prop["orientation"]))
+    model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
+    return model
--- a/mrdna/readers/segmentmodel_from_scadnano.py
+++ b/mrdna/readers/segmentmodel_from_scadnano.py
@@ -85,9 +85,12 @@ def gen_strand(strand,design,group_index=0):
    strand_df["threeprime"]=list(strand_df.index[1:]+group_index)+[-1]
    stack=np.where(strand_df["stacked"],strand_df["threeprime"],-1)
    strand_df["stack"]=stack
-    if len(strand.dna_sequence)==len(strand_df.index):
-        strand_df["seq"]=list(strand.dna_sequence)
-    else:
+    try:
+        if len(strand.dna_sequence)==len(strand_df.index):
+            strand_df["seq"]=list(strand.dna_sequence)
+        else:
+            strand_df["seq"]=-1
+    except:
        strand_df["seq"]=-1
    strand_df["index"]=strand_df.index+group_index
    #strand_df["seg_index"]=index
@@ -128,10 +131,12 @@ def mrdna_model_from_scadnano(sc_file,seq=None,**model_parameters):
    bp=np.array(list(nt_prop['bp']))
    three_prime=np.array((list(nt_prop["threeprime"])))
    stack=np.array((list(nt_prop["stack"])))
+    
    if nt_prop["seq"][0]==-1:
        seq=None
    else:
        seq=nt_prop["seq"]
+
    orientation=[rotationAboutAxis(np.array((0,0,1)),i) for i in list(nt_prop["orientation_angle"])]
    model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
    model._dataframe=nt_prop