diff --git a/mrdna/__init__.py b/mrdna/__init__.py index aaf23788c75c6869d7ad6a201243ebefc08aa289..28011c1266088c295d587ea0c57cfd180012588c 100644 --- a/mrdna/__init__.py +++ b/mrdna/__init__.py @@ -7,11 +7,11 @@ def _get_username(): except: return None -logging.basicConfig(format='%(name)s: %(levelname)s: %(message)s') +logging.basicConfig(format='%(asctime)s %(name)s: %(levelname)s: %(message)s') logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) _ch = logging.StreamHandler() -_ch.setFormatter(logging.Formatter('%(name)s: %(levelname)s: %(message)s')) +_ch.setFormatter(logging.Formatter('%(asctime)s %(name)s: %(levelname)s: %(message)s')) logger.addHandler(_ch) logger.propagate = False diff --git a/mrdna/readers/segmentmodel_from_cadnano.py b/mrdna/readers/segmentmodel_from_cadnano.py index a31a13488dd7784571d22da1b0d14e38468a66fe..e7a080e8b6515a5480b9d5a0e106fc99de983adb 100644 --- a/mrdna/readers/segmentmodel_from_cadnano.py +++ b/mrdna/readers/segmentmodel_from_cadnano.py @@ -10,6 +10,7 @@ from .segmentmodel_from_lists import model_from_basepair_stack_3prime from ..arbdmodel.coords import readArbdCoords, readAvgArbdCoords, rotationAboutAxis from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment from ..model.dna_sequence import m13 as m13seq +from .. import logger, devlogger import json import re import pdb @@ -108,24 +109,27 @@ def get_helix_angle(part, helix_id, indices): angle = eulerZ + twist_per_base*indices - 0.5*mgroove return rotationAboutAxis(np.array((0,0,1)),angle) -def gen_id_series(strand,part): +def gen_id_series(strand, part, insertion_dict): + vh = strand._id_num df=pd.DataFrame(columns=["vh","zid","fwd","stack_tuple","threeprime_tuple","x","y","z"],index=range(strand.totalLength()),dtype=object) - df["vh"]=strand._id_num + df["vh"]=vh df["fwd"]=strand.isForward() df["x"]=part.getVirtualHelixOrigin(strand._id_num)[0]*10 df["y"]=part.getVirtualHelixOrigin(strand._id_num)[1]*10 id_lo,id_hi=strand.idxs() - zids=[str(i) for i in range(id_lo,id_hi+1)] - insert_dict={} - insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()]) - z=np.arange(id_lo,id_hi+1) - zids=[str(i) for i in range(id_lo,id_hi+1)] z=list(np.arange(id_lo,id_hi+1)) zids=[str(i) for i in range(id_lo,id_hi+1)] + + insert_dict=dict([(j.idx(),j.length()) for j in strand.insertionsOnStrand()]) for insert_base in insert_dict: z_ind=zids.index(str(insert_base)) z_val=insert_dict[insert_base] z_pos_ind=z.index(insert_base) + + if vh not in insertion_dict: + insertion_dict[vh] = dict() + insertion_dict[vh][insert_base] = z_val # record insertions for later + zids.pop(z_ind) z.pop(z_pos_ind) if z_val!=-1: @@ -137,8 +141,7 @@ def gen_id_series(strand,part): z.insert(z_pos_ind,insert_base+k/(z_val+1)) df["zid"]=zids df["z"]=np.array(z)*3.4 - - + L=[(df["vh"][i],df["zid"][i],df["fwd"][i]) for i in df.index] if strand.isForward()==True: df["stack_tuple"]=L[1:]+[-1] @@ -161,29 +164,49 @@ def gen_id_series(strand,part): def gen_prop_table(part): + devlogger.info(f' Getting strand sets') strand_set=[] + id_series=[] + insertion_dict = dict() for i in part.getidNums(): fwd,rev=part.getStrandSets(i) [strand_set.append(i) for i in fwd.strands()] [strand_set.append(i) for i in rev.strands()] - id_series=[] for i in strand_set: - id_series=id_series+gen_id_series(i,part) - + id_series=id_series+gen_id_series(i,part, insertion_dict) + + devlogger.info(f' Getting sequences') nt_prop=pd.DataFrame(id_series) nt_prop.reset_index(inplace=True) nt_prop["seq"]=-1 ind_tuple=list(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"])) + + devlogger.info(f' Stacking strand ends') not_stacked,=np.where(nt_prop["stack_tuple"]==-1) + + def _get_next(vh, zid, fwd): + nzid = zid + fwd*2-1 + try: + insertions = insertion_dict[vh][nzid] + except: + insertions = 0 + if insertions < 0: return _get_next(vh, nzid, fwd) + elif insertions > 0: return f'{nzid}.0' + return str(nzid) + for i in not_stacked: - zid=int(nt_prop.loc[i]["zid"]) - fwd=nt_prop.loc[i]["fwd"] - zid+=fwd*2-1 + vh,zid,fwd = [nt_prop.loc[i][key] for key in ('vh','zid','fwd')] + devlogger.debug(f' Checking missing stack at {(vh,zid,fwd)}') + zid= _get_next(vh, int(zid.split('.')[0]), fwd) try: - ind_tuple.index((nt_prop.loc[i]["vh"],str(zid),fwd)) - nt_prop["stack_tuple"][i]=(nt_prop.loc[i]["vh"],str(zid),fwd) + _tup = (vh,zid,fwd) + ind_tuple.index(_tup) # continue if _tup not in ind_tuple + nt_prop["stack_tuple"][i] = _tup + devlogger.debug(f' Found missing stack {_tup}') except: continue + + devlogger.info(f' Building stacks') stacks=[] for i in list(nt_prop["stack_tuple"]): if i ==-1: @@ -191,6 +214,7 @@ def gen_prop_table(part): else: stacks.append(ind_tuple.index(i)) nt_prop["stack"]=stacks + devlogger.info(f' Building 3prime') tprime=[] for i in list(nt_prop["threeprime_tuple"]): if i ==-1: @@ -199,9 +223,11 @@ def gen_prop_table(part): tprime.append(ind_tuple.index(i)) nt_prop["threeprime"]=tprime vhzid=list(zip(nt_prop["vh"],nt_prop["zid"])) + devlogger.info(f' Finding orientations') nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices))) for helix_id,indices in vhzid] nt_prop=nt_prop.fillna(-1) counter=-1 + devlogger.info(f' Building bp and bp_map orientations') bp=-np.ones(len(nt_prop.index),dtype=int) bp_map=dict(zip(ind_tuple,nt_prop.index)) for i,j,k in ind_tuple: @@ -215,11 +241,13 @@ def gen_prop_table(part): return nt_prop def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters): + devlogger.info(f'Reading {json_file}') part=read_json_file(json_file) + devlogger.info(f'Generating property table') nt_prop=gen_prop_table(part) - + devlogger.info(f'Building arrays') if seq is None: if nt_prop["seq"][0]==-1: seq=None @@ -231,6 +259,31 @@ def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters): three_prime=np.array((list(nt_prop["threeprime"]))) stack=np.array((list(nt_prop["stack"]))) orientation=np.array(list(nt_prop["orientation"])) - model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters ) + devlogger.info(f'Building model from lists') + + model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, explicitly_unstacked = np.argwhere(stack == -1), **model_parameters ) + + devlogger.info(f'Fixing segment names') + hmap = model._reader_list_hmap + hrank = model._reader_list_hrank + fwd = model._reader_list_fwd + _starts = dict() + for seg in model.segments: + hid = int(seg._helix_idx) + sl = model._reader_list_hmap == hid + start_idx, end_idx = [np.argwhere(fwd & (hrank == i) & (hmap == hid)).flatten() for i in (0,seg.num_nt-1)] + for _a in (start_idx, end_idx): assert( _a.size == 1 ) + start = max(map(float, [nt_prop['zid'][i] for i in (start_idx,end_idx)] )) + cadnano_helix = int(nt_prop['vh'][start_idx]) + if cadnano_helix not in _starts: _starts[cadnano_helix] = [] + _starts[cadnano_helix].append( (seg,start) ) + + for vh,ss in _starts.items(): + devlogger.debug(f'Renaming segs in {vh}') + for i,(seg,s) in enumerate(sorted(ss, key=lambda x: -x[-1])): + devlogger.debug(f' seg {i}: {seg} {s} {seg.end3.connection}') + seg.name = f'{vh:d}-{i:d}' + seg.segname = seg.name + model._dataframe=nt_prop return model diff --git a/mrdna/readers/segmentmodel_from_lists.py b/mrdna/readers/segmentmodel_from_lists.py index 2a1a8dd336e6ca0de31568eb0fd3c2f3a622c6c4..12fe7dabc921ea9192965d06e4a8f67b133f6d77 100644 --- a/mrdna/readers/segmentmodel_from_lists.py +++ b/mrdna/readers/segmentmodel_from_lists.py @@ -230,7 +230,8 @@ def set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation=No def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, - sequence=None, orientation=None, + sequence=None, orientation=None, + explicitly_unstacked = tuple(), max_basepairs_per_bead = 5, max_nucleotides_per_bead = 5, local_twist = False, @@ -360,9 +361,9 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, doubleSegments = [] for hid in double_stranded_helices: seg = DoubleStrandedSegment(name=str(hid), - num_bp = np.sum(hmap==hid)//2) + num_bp = np.sum(hmap==hid)//2, + _helix_idx = hid) set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation) - assert(hid == len(doubleSegments)) doubleSegments.append(seg) @@ -370,7 +371,8 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, singleSegments = [] for hid in single_stranded_helices: seg = SingleStrandedSegment(name=str(hid), - num_nt = np.sum(hmap==hid)) + num_nt = np.sum(hmap==hid), + _helix_idx = hid) set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation) assert(hid == len(doubleSegments) + len(singleSegments)) @@ -414,7 +416,7 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, is_terminal2 = hmap[three_prime[bps[r2]]] == hmap[five_prime[r2]] """ Place connection """ - if is_terminal1 and is_terminal2: + if is_terminal1 and is_terminal2 and (r1 not in explicitly_unstacked) and (r2 not in explicitly_unstacked): end1 = seg1.end3 if f1 else seg1.start3 end2 = seg2.start5 if f2 else seg2.end5 seg1._connect_ends( end1, end2, type_='intrahelical') @@ -423,6 +425,28 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, else: seg1.add_crossover(nt1,seg2,nt2,[f1,f2]) + ## Convert terminal_crossovers to regular ones if other regular crossovers are nearby + for seg1 in doubleSegments: + cAB = seg1.get_connections_and_locations(exclude='intrahelical') + conns = {} + for c,A,B in cAB: + seg2 = B.container + if seg2 not in conns: conns[seg2] = [] + assert('crossover' in c.type_) + conns[seg2].append(c) + + devlogger.debug(f'{seg1}: {conns}') + nt_pos = {c:A.get_nt_pos() for c,A,B in cAB} + for c,A,B in cAB: + if c.type_ != 'terminal_crossover': continue + seg2 = B.container + if seg2 not in doubleSegments: continue + dists = np.abs(np.array([nt_pos[c] - nt_pos[c2] for c2 in conns[seg2]])) + devlogger.debug(f'Terminal crossover {c} distance: {dists}') + if np.sum((dists > 0) & (dists < 75)) > 0: + devlogger.debug(f'Fixing crossover at {c,A,B}') + c.type_ = 'crossover' + ## Add 5prime/3prime ends for r in prime5: seg = allSegments[hmap[r]] diff --git a/mrdna/segmentmodel.py b/mrdna/segmentmodel.py index 0eeef03a77ed0f2200364bd9d4f9207b7ed0e4df..7aa19bbf9a526061670fb40f5cfffd37f83b45db 100644 --- a/mrdna/segmentmodel.py +++ b/mrdna/segmentmodel.py @@ -2330,7 +2330,7 @@ class SegmentModel(ArbdModel): ret_list = [] for s in self.segments: - for c in filter(lambda c: c.type_ == "crossover", s.connections): + for c in filter(lambda c: c.type_ == "crossover" or c.type_ == "terminal_crossover", s.connections): seg1 = c.A.container seg2 = c.B.container nt1 = c.A.get_nt_pos() @@ -2391,7 +2391,7 @@ class SegmentModel(ArbdModel): def convert_crossovers_to_intrahelical(self, position_filter=None, terminal_only=False): - conn = self.get_crossovers_at_ends() if terminal_only else [c for c,A,B in self.get_connections('crossover')] + conn = self.get_crossovers_at_ends() if terminal_only else [c for c,A,B in self.get_connections('crossover')]+[c for c,A,B in self.get_connections('terminal_crossover')] for c in conn: if position_filter is not None: r1,r2 = [l.container.contour_to_position(l.address)