From d6069d75e1e489c4c733a3c2c39f2ff5dd622f7a Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Tue, 27 Aug 2024 10:50:55 -0500 Subject: [PATCH] Order of double-stranded segment creation and ds-segment direction should match original cadnano reader. Position and orientation of cadnano bases more consistent with original cadnano reader. --- mrdna/readers/segmentmodel_from_cadnano.py | 21 ++++++++++++++++----- mrdna/readers/segmentmodel_from_lists.py | 12 ++++++++---- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/mrdna/readers/segmentmodel_from_cadnano.py b/mrdna/readers/segmentmodel_from_cadnano.py index dde5399..4b32457 100644 --- a/mrdna/readers/segmentmodel_from_cadnano.py +++ b/mrdna/readers/segmentmodel_from_cadnano.py @@ -106,7 +106,7 @@ def get_helix_angle(part, helix_id, indices, fwd=True): 'minor_groove_angle']] twist_per_base = tpr*360./bpr # angle = eulerZ - twist_per_base*indices + 0.5*mgroove + 180 - angle = eulerZ + twist_per_base*indices - 0.5*mgroove + angle = eulerZ - twist_per_base*indices - 0.5*mgroove R = rotationAboutAxis(np.array((0,0,1)),angle) if fwd: R = R.dot(rotationAboutAxis((1,0,0),180)) return R @@ -142,7 +142,10 @@ def gen_id_series(strand, part, insertion_dict): zids.insert(z_ind,str(insert_base)+"."+str(k)) z.insert(z_pos_ind,insert_base+k/(z_val+1)) df["zid"] = zids - df["z"] = -np.array(z)*3.4 + df["z"] = -3.4 * np.linspace( id_lo+0.25, id_hi-0.25, len(df) ) # Note: this spreads + # insertions uniformly through + # the strand, not through + # helical region like original did L=[(df["vh"][i],df["zid"][i],df["fwd"][i]) for i in df.index] if strand.isForward()==True: @@ -226,8 +229,10 @@ def gen_prop_table(part): nt_prop["threeprime"]=tprime devlogger.info(f' Finding orientations') + # nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices)), _fwd) + # for helix_id,indices,_fwd in zip(nt_prop["vh"],nt_prop["zid"],nt_prop['fwd'])] nt_prop["orientation"]=[get_helix_angle(part, helix_id, int(float(indices)), _fwd) - for helix_id,indices,_fwd in zip(nt_prop["vh"],nt_prop["zid"],nt_prop['fwd'])] + for helix_id,indices,_fwd in zip(nt_prop["vh"],nt_prop["z"]/3.4,nt_prop['fwd'])] nt_prop=nt_prop.fillna(-1) counter=-1 devlogger.info(f' Building bp and bp_map orientations') @@ -262,9 +267,13 @@ def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters): three_prime=np.array((list(nt_prop["threeprime"]))) stack=np.array((list(nt_prop["stack"]))) orientation=np.array(list(nt_prop["orientation"])) + rank = -np.array(list(nt_prop["z"])) # arrange segments in usual way + rank = rank + (np.max(rank)+1)*np.array(list(nt_prop["vh"])) devlogger.info(f'Building model from lists') - model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, explicitly_unstacked = np.argwhere(stack == -1), **model_parameters ) + model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, + rank = rank, explicitly_unstacked = np.argwhere(stack == -1), + **model_parameters ) devlogger.info(f'Fixing segment names') hmap = model._reader_list_hmap @@ -285,7 +294,9 @@ def mrdna_model_from_cadnano(json_file,seq=None,**model_parameters): devlogger.debug(f'Renaming segs in {vh}') for i,(seg,s) in enumerate(sorted(ss, key=lambda x: x[-1])): devlogger.debug(f' seg {i}: {seg} {s} {seg.end3.connection}') - seg.name = f'{vh:d}-{i:d}' + _newname = f'{vh:d}-{i:d}' + devlogger.debug(f' {seg} -> {_newname}') + seg.name = _newname seg.segname = seg.name model._dataframe=nt_prop diff --git a/mrdna/readers/segmentmodel_from_lists.py b/mrdna/readers/segmentmodel_from_lists.py index 12fe7da..cf06c57 100644 --- a/mrdna/readers/segmentmodel_from_lists.py +++ b/mrdna/readers/segmentmodel_from_lists.py @@ -78,11 +78,15 @@ def find_stacks(centers, transforms): return stacks_above -def basepairs_and_stacks_to_helixmap(basepairs,stacks_above): +def basepairs_and_stacks_to_helixmap(basepairs,stacks_above, nucleotide_rank=None): + """ Decide how helices should be created from basepair and stack data. Optionally use nucleotide_rank to determine direction of each helix and order of helices """ helixmap = -np.ones(basepairs.shape, dtype=int) helixrank = -np.ones(basepairs.shape) is_fwd = np.ones(basepairs.shape, dtype=int) + + if nucleotide_rank is None: + nucleotide_rank = np.arange(basepairs.shape, dtype=int) ## Remove stacks with nts lacking a basepairs nobp = np.where(basepairs < 0)[0] @@ -93,7 +97,7 @@ def basepairs_and_stacks_to_helixmap(basepairs,stacks_above): end_ids = np.where( (stacks_above < 0)*(basepairs >= 0) )[0] hid = 0 - for end in end_ids: + for end in sorted(end_ids, key=lambda x: nucleotide_rank[x]): if helixmap[end] >= 0: continue rank = 0 @@ -231,7 +235,7 @@ def set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation=No def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, sequence=None, orientation=None, - explicitly_unstacked = tuple(), + rank=None, explicitly_unstacked = tuple(), max_basepairs_per_bead = 5, max_nucleotides_per_bead = 5, local_twist = False, @@ -325,7 +329,7 @@ def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime, five_prime = _three_prime_list_to_five_prime(three_prime) """ Build map of dsDNA helices and strands """ - hmap,hrank,fwd,intrahelical = basepairs_and_stacks_to_helixmap(bps,stack) + hmap,hrank,fwd,intrahelical = basepairs_and_stacks_to_helixmap(bps,stack,rank) double_stranded_helices = np.unique(hmap[hmap >= 0]) strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime) -- GitLab