From 08f835d242b3fb637694d6526dfbb0b435d37877 Mon Sep 17 00:00:00 2001
From: pinyili2 <pinyili2@illinois.edu>
Date: Tue, 20 Aug 2024 17:34:06 -0500
Subject: [PATCH] add

---
 mrdna/readers/.nfs000000000007668300005e88    | Bin 0 -> 16384 bytes
 .../readers/segmentmodel_from_cadnano.bak.py  | 225 ++++++++++++++++++
 mrdna/readers/segmentmodel_from_scadnano.py   |  11 +-
 3 files changed, 233 insertions(+), 3 deletions(-)
 create mode 100644 mrdna/readers/.nfs000000000007668300005e88
 create mode 100644 mrdna/readers/segmentmodel_from_cadnano.bak.py

diff --git a/mrdna/readers/.nfs000000000007668300005e88 b/mrdna/readers/.nfs000000000007668300005e88
new file mode 100644
index 0000000000000000000000000000000000000000..ed1479fd8b315ab84d5703322bf45d99a30438d1
GIT binary patch
literal 16384
zcmeHOO^h5z6)v0~JAffdkS|D9rS~ws>z<z7o!FQ~b6D1nv4nS#Z4!xHYv}2&nJL_V
zuBzGDi4RC1BqTU<h~UZzkPsIR2tf{z<ideVWC;Z>C?KRDE{GF+uey6@cQy`07$HQp
z{B2Ly&wEwhd#|dxx;$8Y@%jyZIb3D9o@VU#@BC=^=JTi77vDU^N}11OD%UCNy)H8t
zrR`~<?yh|zEXOk~V;#ygm3bj`D3VE2x06IB>S&?%g}U5nq<1ov<Wc=si6{|DqtGZ5
zxygz|r2U~PvVI()R9=MTtP$t#jseHO`(ofM>#ncaP+#(%<Ig_x`S(@Z6?P0b1{?#9
z0mp!2z%k$$a11yG{<j%0<zwv2=)$SG8^ikkfrb0u>ua}8A1&lRRzElQR~Pc@M|X1!
zI0hU8jseGjW56-s7;p?Y1{?#90mp!2;J?5?lrZ)z^!}1w`0@R}dH;X=qm2Cqcmwze
z@Lk~Bz}JD#0#5=@06+W)W4k~AaNrzp8u;B=#(oRD1^gVi2Xujt0e}54V^d%cxC*p^
zr-9SJTOVTV*T7GKSAeeqd%!ku3i#s(8T$k9Ht+`UI`A{#JHQmU3w#FX0Ox=|J;m4z
z*aFT156&?5W8j;>H-N7I5?BY$1J3||JI&auzz=}$178C&;Bny3Pcrrn@BnxbxD1>J
z{_+I40EWP)fb+mpz!~7d<BWX|_!e*<co`S~J3tRO1N;gbC$9i0kN`1o9(WBKGX-!C
zARGOn*_b)*EBsMhEip-j`a&yUk3t%6nTg7EzZ9ykL@6Tk$OI1}E&c<7cUxnT$~a2<
zr8M!lZtl1Y_@Gn)H}t2XDB#&h6~(^bJzhr2g!|)usS0$}NTI{Dh<8cUL0={T;sTfF
zdG?-2C1N9sJm6!I$AX)&;F-)Pnrk`Ab;xTKwse#yT$+}K5~;Y<RI*V0N_RiXN2%b6
z7%3q%kEYSgR+<Vv(Usu1$oFxK2I{Pyi!Cl7Q5!TF6%1c(YTJr8<!Eg97)nLLuWWB}
zRhY;?bc_z&ok|F$qc|L5Ou@S?TdB>fW}tCfFWHm}M^fq}^q#+PbLR%%+JxNgo%Qfy
zMGMEZ#ATTbLM=?+s3;cxvK3QjWo4m^En!nh(lueYXFJg^ltlj`>Y>Mng#xLd-LjpC
zAs<0zWeD3ypKL|M*QVhzPh>3g@<!DfCCFv1MT5Mb46R1`(MyK5eJC#ud@rA5Ue!os
zl}UI7=bBznQ6~H^=>p~Pxa;j}^d?WVcW2%o>Qi$)#BXHMhxnk0#XIn@?xocg|59gd
zHN1FIlpH?H15Zlo@zo91=%+g|Ruuykl^jrZJ=vaQ+6OC5Stv<)ug6>Z-Xv0@HP1n^
zWjT$E0hN~Fc_YbM*A3S~WbmTu1Q%@2{Ydj-$YXJ7I9y%(<T6hRxLICMQDa^8svU}K
z11p`p)&TESlNfd)zbq6*Q#uPEK;|Q^3m8|4!)Qah6A|MOnWEOO*JdW>>%i5!t9K52
zsUmXms|T?t$)S92Cr@XP98c&h8mWV;$($5Lh*leYb*C(M9Mo``m8qceh#^Fn86z-^
z%tTmw3{fd^%adN_&-M6PMJ6>$qgA`5?Q}^7f$9+U_#9>V%Xj7>bJ!`qe%SivfZR}~
zLKflvdQ~W_Za!oM{Lnq;CPF&2Ua31-$DXhp%&k1v*L|qI?!TeWs99~B#^VlKipJ4z
zjHywgeKG;Koj1QJUuW<OJ)U0h_VQ}9ZF1?pL@vyg2=!#(E6?AXE^l}NSFOD@Z=)6P
z^rX;*D#4y!p=!2P;O(vbOSSGFt#$uBYK@I4^+u<IkuAQ9xkU}rVj9Lp*10zknmkf>
zo?q>DFLk>e8l-W+a@0<vmuGDvs4eq$IWuGY5(g#CG03gwq**)1;KYuyLBEgj(Xzer
zPIigLh?KlB9?u<usyCGP5N?bT=$cA1=9ma`pZt*2E^`EXQ+Ls^sLH#V+K<O`d;Alj
zmG{tiPsd05Rvu}eHv!JYNL4++*F#C)%*x}eluG8t_g>n(xqW^6+6JHG(Ew8{X1hcf
zp)mW3W*Dm?z8REAQXSx=N2FRDC+nz6xM2%+5;NF#P|c!MC5#RI5r!r^R8dlME!M7w
zVb<i&E7qgQBF_r~pd}>RAgZQ9)j@A*B#E{ShqJ*oC6n9eUR`QI(l+|RnyI$Vt0sXB
zwzLFcZ5~S%Ptr({!&HN}Zn9`5#{7#>5<{gP7n3yM13?Rv_%4l~u_`ncDmfQfX=X6r
zh?X+GO{NIZ)q;eUNwlzmlhSgkS_8mnI4_K6bs}%M(VrBPL5k>*R$bj1%DlaEWoN@K
zg<yV-aFS8Vv%<)sZF`j+M_47!yI0Tm)C!Ll9v9(FneYxB1`EsdNNp`J<YRfoZ&hcV
zZ_ruisy~}*kW`PD(6UKtB<2)r5Sm4EqKI4v-k4Bh?KFjT8ihOuKl6zeZf{p?&D?;&
z-p*w-OVP5b8TMKIT$gJem1urZOjV-7&>qSTaT>d?1x6-X#MafbwJrraZpdkhA3e(5
z6g=AHfZwuH=`Fil5=osGpm8vcSV(T9Mxdx_NL&;u(T!vp0f@dWn~LF`p7%$`l4w;6
zKiV7=6SH|Bbxjghc#DV5Q>ig2H@%x5MyD4LT9Wszr}n$n(hMuX!mb>&<tU8DSS^U8
zA8A_J)0zO0PXR-d5n?8SWqYk@r1BQ4b(C;rFo=I;(xkx()`+9D7&L96-%Wm@13lBt
z6Bdu<j0EImlZ4eZTjK2%gkq6^Ym94V*uG3DjNbo0j`#R2fZqS{%4x6H@qD{Z|1{;f
z%Q4^>a11yG90QI4$ADwNG2j?*3^)cH1OML)(3V-fv1oTB7$w+Wc-^=6>VL4m>!yLv
z@lkPrPmFda0UsI1_?U-piP$+pnz$G6BcHf>w0RKlm6b{=?H^^dFR6L8S9-K)UWSoU
X(afhk3_mya`{b^7qzFDtEwg_B-uLQ4

literal 0
HcmV?d00001

diff --git a/mrdna/readers/segmentmodel_from_cadnano.bak.py b/mrdna/readers/segmentmodel_from_cadnano.bak.py
new file mode 100644
index 0000000..bfb56d9
--- /dev/null
+++ b/mrdna/readers/segmentmodel_from_cadnano.bak.py
@@ -0,0 +1,225 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import os,sys
+from glob import glob
+import re
+import pandas as pd
+pd.options.mode.chained_assignment = None  # default='warn'
+from .segmentmodel_from_lists import model_from_basepair_stack_3prime
+
+from ..arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis
+from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
+from ..model.dna_sequence import m13 as m13seq
+import json
+import re
+import pdb
+
+
+
+## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
+## TODO: catch circular strands in "get_5prime" cadnano calls
+## TODO: handle special motifs
+##   - doubly-nicked helices
+##   - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
+##   - circular constructs
+
+
+def get_lattice(part):
+    lattice_type = None
+    _gt = part.getGridType()
+    try:
+        lattice_type = _gt.name.lower()
+    except:
+        if _gt == 1:
+            lattice_type = 'square'
+        elif _gt == 2:
+            lattice_type = 'honeycomb'
+        else:
+            print(lattice_type)
+    return lattice_type
+
+
+def read_json_file(filename):
+    import cadnano
+    from cadnano.document import Document
+
+    try:
+        with open(filename) as ch:
+            json_data = json.load(ch)
+    except:
+        with open(filename) as ch:
+            content = ""
+            for l in ch:
+                l = re.sub(r"'", r'"', l)
+                # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
+                # l = re.sub(r"{\s*(\w)", r'{"\1', l)
+                # l = re.sub(r",\s*(\w)", r',"\1', l)
+                # l = re.sub(r"(\w):", r'\1":', l)
+                content += l+"\n"
+            json_data = json.loads(content)
+
+    try:
+        doc = Document()
+        cadnano.fileio.v3decode.decode(doc, json_data)
+        decoder = 3
+    except:
+        doc = Document()
+        cadnano.fileio.v2decode.decode(doc, json_data)
+        decoder = 2
+
+    parts = [p for p in doc.getParts()]
+    if len(parts) != 1:
+        raise Exception("Only documents containing a single cadnano part are implemented at this time.")
+    part = parts[0]
+
+    if decoder == 2:
+        """ It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here """
+        l = get_lattice(part)
+        if l == 'square':
+            for id_num in part.getIdNums():
+                if part.vh_properties.loc[id_num,'eulerZ'] == 0:
+                    part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5)
+        df=pd.DataFrame(json_data["vstrands"])
+        n_df=df.set_index("num")
+    return part
+
+def get_helix_angle(part, helix_id, indices):
+    """ Get "start_orientation" for helix """
+        # import ipdb
+        # ipdb.set_trace()
+
+    """ FROM CADNANO2.5
+    + angle is CCW
+    - angle is CW
+    Right handed DNA rotates clockwise from 5' to 3'
+    we use the convention the 5' end starts at 0 degrees
+    and it's pair is minor_groove_angle degrees away
+    direction, hence the minus signs.  eulerZ
+    """
+
+    hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id,
+                                                                    ['helical_pitch',
+                                                                     'bases_per_repeat',
+                                                                     'turns_per_repeat',
+                                                                     'eulerZ',
+                                                                     'minor_groove_angle']]
+    twist_per_base = tpr*360./bpr
+        # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180
+    angle = eulerZ + twist_per_base*indices - 0.5*mgroove
+    return rotationAboutAxis(np.array((0,0,1)),angle)
+
+def gen_id_series(strand,part):
+    df=pd.DataFrame(columns=["vh","zid","fwd","stack_tuple","threeprime_tuple","x","y","z"],index=range(strand.totalLength()),dtype=object)
+    df["vh"]=strand._id_num
+    df["fwd"]=strand.isForward()
+    df["x"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10
+    df["y"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10
+    id_lo,id_hi=strand.idxs()
+    zids=[str(i) for i in range(id_lo,id_hi+1)]
+    insert_dict={}
+    insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()])
+    z=np.arange(id_lo,id_hi+1)
+    zids=[str(i) for i in range(id_lo,id_hi+1)]
+    z=list(np.arange(id_lo,id_hi+1))
+    zids=[str(i) for i in range(id_lo,id_hi+1)]
+    for insert_base in insert_dict:
+        z_ind=zids.index(str(insert_base))
+        z_val=insert_dict[insert_base]
+        z_pos_ind=z.index(insert_base)
+        zids.pop(z_ind)
+        z.pop(z_pos_ind)
+        if z_val!=-1:
+            #l=[str(insert_base)+"."+str(i) for i in range(z_val+1)]
+            l=list(range(z_val+1))
+            l.reverse()
+            for k in l:                
+                zids.insert(z_ind,str(insert_base)+"."+str(k))
+                z.insert(z_pos_ind,insert_base+k/(z_val+1))
+    df["zid"]=zids
+    df["z"]=np.array(z)*3.4
+    
+    
+    L=[(df["vh"][i],df["zid"][i],df["fwd"][i]) for i in df.index]
+    if strand.isForward()==True:
+        df["stack_tuple"]=L[1:]+[-1]
+        if strand.connection3p() is None:
+            df["threeprime_tuple"]=L[1:]+[-1]
+        else:
+            df["threeprime_tuple"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]
+        
+    
+    else:
+        df["stack_tuple"]=[-1]+L[0:-1]
+        if strand.connection3p() is None:
+            df["threeprime_tuple"]=[-1]+L[0:-1]
+        else:
+            df["threeprime_tuple"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1]
+    ## cadnano 3.1 sequence assign is wrong if there is insertion or deletion. 
+    df["r"]=[np.array([df["x"][i],df["y"][i],df["z"][i]],dtype=np.float32) for i in df.index]
+    
+    return [pd.Series(df.loc[i]) for i in df.index]
+
+
+def gen_prop_table(part):
+    strand_set=[]
+    for i in part.getidNums():
+        fwd,rev=part.getStrandSets(i)
+        [strand_set.append(i) for i in fwd.strands()]
+        [strand_set.append(i) for i in rev.strands()]
+    id_series=[]
+    for i in strand_set:
+        id_series=id_series+gen_id_series(i,part)
+    
+    nt_prop=pd.DataFrame(id_series)
+    nt_prop.reset_index(inplace=True)
+    nt_prop["seq"]=-1
+    ind_tuple=list(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"]))
+    stacks=[]
+    for i in list(nt_prop["stack_tuple"]):
+        if i ==-1:
+            stacks.append(i)
+        else:
+            stacks.append(ind_tuple.index(i))
+    nt_prop["stack"]=stacks
+    tprime=[]
+    for i in list(nt_prop["threeprime_tuple"]):
+        if i ==-1:
+            tprime.append(i)
+        else:
+            tprime.append(ind_tuple.index(i))
+    nt_prop["threeprime"]=tprime
+    vhzid=list(zip(nt_prop["vh"],nt_prop["zid"]))
+    nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid]
+    nt_prop=nt_prop.fillna(-1)
+    counter=-1
+    bp=-np.ones(len(nt_prop.index),dtype=int)
+    bp_map=dict(zip(ind_tuple,nt_prop.index))
+    for i,j,k in ind_tuple:
+        counter+=1
+        try:
+            bp[counter]=bp_map[(i,j,not(k))]
+        except:
+            pass
+    nt_prop["bp"]=bp
+
+    return nt_prop
+
+def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters):
+    part=read_json_file(json_file)
+    
+    nt_prop=gen_prop_table(part)
+
+
+    if seq is None:
+        if nt_prop["seq"][0]==-1:
+            seq=None
+        else:
+            seq=nt_prop["seq"]
+    
+    r=np.array(list(nt_prop['r']))
+    bp=np.array(list(nt_prop['bp']))
+    three_prime=np.array((list(nt_prop["threeprime"])))
+    stack=np.array((list(nt_prop["stack"])))
+    orientation=np.array(list(nt_prop["orientation"]))
+    model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
+    return model
diff --git a/mrdna/readers/segmentmodel_from_scadnano.py b/mrdna/readers/segmentmodel_from_scadnano.py
index 7311227..da2d4c4 100644
--- a/mrdna/readers/segmentmodel_from_scadnano.py
+++ b/mrdna/readers/segmentmodel_from_scadnano.py
@@ -85,9 +85,12 @@ def gen_strand(strand,design,group_index=0):
     strand_df["threeprime"]=list(strand_df.index[1:]+group_index)+[-1]
     stack=np.where(strand_df["stacked"],strand_df["threeprime"],-1)
     strand_df["stack"]=stack
-    if len(strand.dna_sequence)==len(strand_df.index):
-        strand_df["seq"]=list(strand.dna_sequence)
-    else:
+    try:
+        if len(strand.dna_sequence)==len(strand_df.index):
+            strand_df["seq"]=list(strand.dna_sequence)
+        else:
+            strand_df["seq"]=-1
+    except:
         strand_df["seq"]=-1
     strand_df["index"]=strand_df.index+group_index
     #strand_df["seg_index"]=index
@@ -128,10 +131,12 @@ def mrdna_model_from_scadnano(sc_file,seq=None,**model_parameters):
     bp=np.array(list(nt_prop['bp']))
     three_prime=np.array((list(nt_prop["threeprime"])))
     stack=np.array((list(nt_prop["stack"])))
+    
     if nt_prop["seq"][0]==-1:
         seq=None
     else:
         seq=nt_prop["seq"]
+
     orientation=[rotationAboutAxis(np.array((0,0,1)),i) for i in list(nt_prop["orientation_angle"])]
     model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
     model._dataframe=nt_prop
-- 
GitLab