From 08f835d242b3fb637694d6526dfbb0b435d37877 Mon Sep 17 00:00:00 2001 From: pinyili2 <pinyili2@illinois.edu> Date: Tue, 20 Aug 2024 17:34:06 -0500 Subject: [PATCH] add --- mrdna/readers/.nfs000000000007668300005e88 | Bin 0 -> 16384 bytes .../readers/segmentmodel_from_cadnano.bak.py | 225 ++++++++++++++++++ mrdna/readers/segmentmodel_from_scadnano.py | 11 +- 3 files changed, 233 insertions(+), 3 deletions(-) create mode 100644 mrdna/readers/.nfs000000000007668300005e88 create mode 100644 mrdna/readers/segmentmodel_from_cadnano.bak.py diff --git a/mrdna/readers/.nfs000000000007668300005e88 b/mrdna/readers/.nfs000000000007668300005e88 new file mode 100644 index 0000000000000000000000000000000000000000..ed1479fd8b315ab84d5703322bf45d99a30438d1 GIT binary patch literal 16384 zcmeHOO^h5z6)v0~JAffdkS|D9rS~ws>z<z7o!FQ~b6D1nv4nS#Z4!xHYv}2&nJL_V zuBzGDi4RC1BqTU<h~UZzkPsIR2tf{z<ideVWC;Z>C?KRDE{GF+uey6@cQy`07$HQp z{B2Ly&wEwhd#|dxx;$8Y@%jyZIb3D9o@VU#@BC=^=JTi77vDU^N}11OD%UCNy)H8t zrR`~<?yh|zEXOk~V;#ygm3bj`D3VE2x06IB>S&?%g}U5nq<1ov<Wc=si6{|DqtGZ5 zxygz|r2U~PvVI()R9=MTtP$t#jseHO`(ofM>#ncaP+#(%<Ig_x`S(@Z6?P0b1{?#9 z0mp!2z%k$$a11yG{<j%0<zwv2=)$SG8^ikkfrb0u>ua}8A1&lRRzElQR~Pc@M|X1! zI0hU8jseGjW56-s7;p?Y1{?#90mp!2;J?5?lrZ)z^!}1w`0@R}dH;X=qm2Cqcmwze z@Lk~Bz}JD#0#5=@06+W)W4k~AaNrzp8u;B=#(oRD1^gVi2Xujt0e}54V^d%cxC*p^ zr-9SJTOVTV*T7GKSAeeqd%!ku3i#s(8T$k9Ht+`UI`A{#JHQmU3w#FX0Ox=|J;m4z z*aFT156&?5W8j;>H-N7I5?BY$1J3||JI&auzz=}$178C&;Bny3Pcrrn@BnxbxD1>J z{_+I40EWP)fb+mpz!~7d<BWX|_!e*<co`S~J3tRO1N;gbC$9i0kN`1o9(WBKGX-!C zARGOn*_b)*EBsMhEip-j`a&yUk3t%6nTg7EzZ9ykL@6Tk$OI1}E&c<7cUxnT$~a2< zr8M!lZtl1Y_@Gn)H}t2XDB#&h6~(^bJzhr2g!|)usS0$}NTI{Dh<8cUL0={T;sTfF zdG?-2C1N9sJm6!I$AX)&;F-)Pnrk`Ab;xTKwse#yT$+}K5~;Y<RI*V0N_RiXN2%b6 z7%3q%kEYSgR+<Vv(Usu1$oFxK2I{Pyi!Cl7Q5!TF6%1c(YTJr8<!Eg97)nLLuWWB} zRhY;?bc_z&ok|F$qc|L5Ou@S?TdB>fW}tCfFWHm}M^fq}^q#+PbLR%%+JxNgo%Qfy zMGMEZ#ATTbLM=?+s3;cxvK3QjWo4m^En!nh(lueYXFJg^ltlj`>Y>Mng#xLd-LjpC zAs<0zWeD3ypKL|M*QVhzPh>3g@<!DfCCFv1MT5Mb46R1`(MyK5eJC#ud@rA5Ue!os zl}UI7=bBznQ6~H^=>p~Pxa;j}^d?WVcW2%o>Qi$)#BXHMhxnk0#XIn@?xocg|59gd zHN1FIlpH?H15Zlo@zo91=%+g|Ruuykl^jrZJ=vaQ+6OC5Stv<)ug6>Z-Xv0@HP1n^ zWjT$E0hN~Fc_YbM*A3S~WbmTu1Q%@2{Ydj-$YXJ7I9y%(<T6hRxLICMQDa^8svU}K z11p`p)&TESlNfd)zbq6*Q#uPEK;|Q^3m8|4!)Qah6A|MOnWEOO*JdW>>%i5!t9K52 zsUmXms|T?t$)S92Cr@XP98c&h8mWV;$($5Lh*leYb*C(M9Mo``m8qceh#^Fn86z-^ z%tTmw3{fd^%adN_&-M6PMJ6>$qgA`5?Q}^7f$9+U_#9>V%Xj7>bJ!`qe%SivfZR}~ zLKflvdQ~W_Za!oM{Lnq;CPF&2Ua31-$DXhp%&k1v*L|qI?!TeWs99~B#^VlKipJ4z zjHywgeKG;Koj1QJUuW<OJ)U0h_VQ}9ZF1?pL@vyg2=!#(E6?AXE^l}NSFOD@Z=)6P z^rX;*D#4y!p=!2P;O(vbOSSGFt#$uBYK@I4^+u<IkuAQ9xkU}rVj9Lp*10zknmkf> zo?q>DFLk>e8l-W+a@0<vmuGDvs4eq$IWuGY5(g#CG03gwq**)1;KYuyLBEgj(Xzer zPIigLh?KlB9?u<usyCGP5N?bT=$cA1=9ma`pZt*2E^`EXQ+Ls^sLH#V+K<O`d;Alj zmG{tiPsd05Rvu}eHv!JYNL4++*F#C)%*x}eluG8t_g>n(xqW^6+6JHG(Ew8{X1hcf zp)mW3W*Dm?z8REAQXSx=N2FRDC+nz6xM2%+5;NF#P|c!MC5#RI5r!r^R8dlME!M7w zVb<i&E7qgQBF_r~pd}>RAgZQ9)j@A*B#E{ShqJ*oC6n9eUR`QI(l+|RnyI$Vt0sXB zwzLFcZ5~S%Ptr({!&HN}Zn9`5#{7#>5<{gP7n3yM13?Rv_%4l~u_`ncDmfQfX=X6r zh?X+GO{NIZ)q;eUNwlzmlhSgkS_8mnI4_K6bs}%M(VrBPL5k>*R$bj1%DlaEWoN@K zg<yV-aFS8Vv%<)sZF`j+M_47!yI0Tm)C!Ll9v9(FneYxB1`EsdNNp`J<YRfoZ&hcV zZ_ruisy~}*kW`PD(6UKtB<2)r5Sm4EqKI4v-k4Bh?KFjT8ihOuKl6zeZf{p?&D?;& z-p*w-OVP5b8TMKIT$gJem1urZOjV-7&>qSTaT>d?1x6-X#MafbwJrraZpdkhA3e(5 z6g=AHfZwuH=`Fil5=osGpm8vcSV(T9Mxdx_NL&;u(T!vp0f@dWn~LF`p7%$`l4w;6 zKiV7=6SH|Bbxjghc#DV5Q>ig2H@%x5MyD4LT9Wszr}n$n(hMuX!mb>&<tU8DSS^U8 zA8A_J)0zO0PXR-d5n?8SWqYk@r1BQ4b(C;rFo=I;(xkx()`+9D7&L96-%Wm@13lBt z6Bdu<j0EImlZ4eZTjK2%gkq6^Ym94V*uG3DjNbo0j`#R2fZqS{%4x6H@qD{Z|1{;f z%Q4^>a11yG90QI4$ADwNG2j?*3^)cH1OML)(3V-fv1oTB7$w+Wc-^=6>VL4m>!yLv z@lkPrPmFda0UsI1_?U-piP$+pnz$G6BcHf>w0RKlm6b{=?H^^dFR6L8S9-K)UWSoU X(afhk3_mya`{b^7qzFDtEwg_B-uLQ4 literal 0 HcmV?d00001 diff --git a/mrdna/readers/segmentmodel_from_cadnano.bak.py b/mrdna/readers/segmentmodel_from_cadnano.bak.py new file mode 100644 index 0000000..bfb56d9 --- /dev/null +++ b/mrdna/readers/segmentmodel_from_cadnano.bak.py @@ -0,0 +1,225 @@ +# -*- coding: utf-8 -*- +import numpy as np +import os,sys +from glob import glob +import re +import pandas as pd +pd.options.mode.chained_assignment = None # default='warn' +from .segmentmodel_from_lists import model_from_basepair_stack_3prime + +from ..arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis +from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment +from ..model.dna_sequence import m13 as m13seq +import json +import re +import pdb + + + +## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined +## TODO: catch circular strands in "get_5prime" cadnano calls +## TODO: handle special motifs +## - doubly-nicked helices +## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix) +## - circular constructs + + +def get_lattice(part): + lattice_type = None + _gt = part.getGridType() + try: + lattice_type = _gt.name.lower() + except: + if _gt == 1: + lattice_type = 'square' + elif _gt == 2: + lattice_type = 'honeycomb' + else: + print(lattice_type) + return lattice_type + + +def read_json_file(filename): + import cadnano + from cadnano.document import Document + + try: + with open(filename) as ch: + json_data = json.load(ch) + except: + with open(filename) as ch: + content = "" + for l in ch: + l = re.sub(r"'", r'"', l) + # https://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name + # l = re.sub(r"{\s*(\w)", r'{"\1', l) + # l = re.sub(r",\s*(\w)", r',"\1', l) + # l = re.sub(r"(\w):", r'\1":', l) + content += l+"\n" + json_data = json.loads(content) + + try: + doc = Document() + cadnano.fileio.v3decode.decode(doc, json_data) + decoder = 3 + except: + doc = Document() + cadnano.fileio.v2decode.decode(doc, json_data) + decoder = 2 + + parts = [p for p in doc.getParts()] + if len(parts) != 1: + raise Exception("Only documents containing a single cadnano part are implemented at this time.") + part = parts[0] + + if decoder == 2: + """ It seems cadnano2.5 (as of ce6ff019) does not set the EulerZ for square lattice structures correctly, doing so here """ + l = get_lattice(part) + if l == 'square': + for id_num in part.getIdNums(): + if part.vh_properties.loc[id_num,'eulerZ'] == 0: + part.vh_properties.loc[id_num,'eulerZ'] = 360*(6/10.5) + df=pd.DataFrame(json_data["vstrands"]) + n_df=df.set_index("num") + return part + +def get_helix_angle(part, helix_id, indices): + """ Get "start_orientation" for helix """ + # import ipdb + # ipdb.set_trace() + + """ FROM CADNANO2.5 + + angle is CCW + - angle is CW + Right handed DNA rotates clockwise from 5' to 3' + we use the convention the 5' end starts at 0 degrees + and it's pair is minor_groove_angle degrees away + direction, hence the minus signs. eulerZ + """ + + hp, bpr, tpr, eulerZ, mgroove = part.vh_properties.loc[helix_id, + ['helical_pitch', + 'bases_per_repeat', + 'turns_per_repeat', + 'eulerZ', + 'minor_groove_angle']] + twist_per_base = tpr*360./bpr + # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180 + angle = eulerZ + twist_per_base*indices - 0.5*mgroove + return rotationAboutAxis(np.array((0,0,1)),angle) + +def gen_id_series(strand,part): + df=pd.DataFrame(columns=["vh","zid","fwd","stack_tuple","threeprime_tuple","x","y","z"],index=range(strand.totalLength()),dtype=object) + df["vh"]=strand._id_num + df["fwd"]=strand.isForward() + df["x"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10 + df["y"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10 + id_lo,id_hi=strand.idxs() + zids=[str(i) for i in range(id_lo,id_hi+1)] + insert_dict={} + insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()]) + z=np.arange(id_lo,id_hi+1) + zids=[str(i) for i in range(id_lo,id_hi+1)] + z=list(np.arange(id_lo,id_hi+1)) + zids=[str(i) for i in range(id_lo,id_hi+1)] + for insert_base in insert_dict: + z_ind=zids.index(str(insert_base)) + z_val=insert_dict[insert_base] + z_pos_ind=z.index(insert_base) + zids.pop(z_ind) + z.pop(z_pos_ind) + if z_val!=-1: + #l=[str(insert_base)+"."+str(i) for i in range(z_val+1)] + l=list(range(z_val+1)) + l.reverse() + for k in l: + zids.insert(z_ind,str(insert_base)+"."+str(k)) + z.insert(z_pos_ind,insert_base+k/(z_val+1)) + df["zid"]=zids + df["z"]=np.array(z)*3.4 + + + L=[(df["vh"][i],df["zid"][i],df["fwd"][i]) for i in df.index] + if strand.isForward()==True: + df["stack_tuple"]=L[1:]+[-1] + if strand.connection3p() is None: + df["threeprime_tuple"]=L[1:]+[-1] + else: + df["threeprime_tuple"]=L[1:]+[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())] + + + else: + df["stack_tuple"]=[-1]+L[0:-1] + if strand.connection3p() is None: + df["threeprime_tuple"]=[-1]+L[0:-1] + else: + df["threeprime_tuple"]=[(strand.connection3p().idNum(),str(strand.connection3p().idx5Prime()),strand.connection3p().isForward())]+L[0:-1] + ## cadnano 3.1 sequence assign is wrong if there is insertion or deletion. + df["r"]=[np.array([df["x"][i],df["y"][i],df["z"][i]],dtype=np.float32) for i in df.index] + + return [pd.Series(df.loc[i]) for i in df.index] + + +def gen_prop_table(part): + strand_set=[] + for i in part.getidNums(): + fwd,rev=part.getStrandSets(i) + [strand_set.append(i) for i in fwd.strands()] + [strand_set.append(i) for i in rev.strands()] + id_series=[] + for i in strand_set: + id_series=id_series+gen_id_series(i,part) + + nt_prop=pd.DataFrame(id_series) + nt_prop.reset_index(inplace=True) + nt_prop["seq"]=-1 + ind_tuple=list(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"])) + stacks=[] + for i in list(nt_prop["stack_tuple"]): + if i ==-1: + stacks.append(i) + else: + stacks.append(ind_tuple.index(i)) + nt_prop["stack"]=stacks + tprime=[] + for i in list(nt_prop["threeprime_tuple"]): + if i ==-1: + tprime.append(i) + else: + tprime.append(ind_tuple.index(i)) + nt_prop["threeprime"]=tprime + vhzid=list(zip(nt_prop["vh"],nt_prop["zid"])) + nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid] + nt_prop=nt_prop.fillna(-1) + counter=-1 + bp=-np.ones(len(nt_prop.index),dtype=int) + bp_map=dict(zip(ind_tuple,nt_prop.index)) + for i,j,k in ind_tuple: + counter+=1 + try: + bp[counter]=bp_map[(i,j,not(k))] + except: + pass + nt_prop["bp"]=bp + + return nt_prop + +def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters): + part=read_json_file(json_file) + + nt_prop=gen_prop_table(part) + + + if seq is None: + if nt_prop["seq"][0]==-1: + seq=None + else: + seq=nt_prop["seq"] + + r=np.array(list(nt_prop['r'])) + bp=np.array(list(nt_prop['bp'])) + three_prime=np.array((list(nt_prop["threeprime"]))) + stack=np.array((list(nt_prop["stack"]))) + orientation=np.array(list(nt_prop["orientation"])) + model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters ) + return model diff --git a/mrdna/readers/segmentmodel_from_scadnano.py b/mrdna/readers/segmentmodel_from_scadnano.py index 7311227..da2d4c4 100644 --- a/mrdna/readers/segmentmodel_from_scadnano.py +++ b/mrdna/readers/segmentmodel_from_scadnano.py @@ -85,9 +85,12 @@ def gen_strand(strand,design,group_index=0): strand_df["threeprime"]=list(strand_df.index[1:]+group_index)+[-1] stack=np.where(strand_df["stacked"],strand_df["threeprime"],-1) strand_df["stack"]=stack - if len(strand.dna_sequence)==len(strand_df.index): - strand_df["seq"]=list(strand.dna_sequence) - else: + try: + if len(strand.dna_sequence)==len(strand_df.index): + strand_df["seq"]=list(strand.dna_sequence) + else: + strand_df["seq"]=-1 + except: strand_df["seq"]=-1 strand_df["index"]=strand_df.index+group_index #strand_df["seg_index"]=index @@ -128,10 +131,12 @@ def mrdna_model_from_scadnano(sc_file,seq=None,**model_parameters): bp=np.array(list(nt_prop['bp'])) three_prime=np.array((list(nt_prop["threeprime"]))) stack=np.array((list(nt_prop["stack"]))) + if nt_prop["seq"][0]==-1: seq=None else: seq=nt_prop["seq"] + orientation=[rotationAboutAxis(np.array((0,0,1)),i) for i in list(nt_prop["orientation_angle"])] model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters ) model._dataframe=nt_prop -- GitLab