From 8935b4585ca7bf501d11b01ffc682c879ebb4d8d Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Fri, 13 Jul 2018 18:40:19 -0500 Subject: [PATCH] Now clustering nucleotide types rather than rounding; gives more accurate number of nucleotides in a double helix --- segmentmodel.py | 76 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 25 deletions(-) diff --git a/segmentmodel.py b/segmentmodel.py index acb4955..8f3dd57 100644 --- a/segmentmodel.py +++ b/segmentmodel.py @@ -594,8 +594,6 @@ class Segment(ConnectableElement, Group): # print(" pausing at",l) return pos, l.container, pos+(2*is_fwd-1), is_fwd, 0 - import pdb - pdb.set_trace() raise Exception("Shouldn't be here") # print("Shouldn't be here") ## Made it to the end of the segment without finding a connection @@ -689,9 +687,6 @@ class Segment(ConnectableElement, Group): for b in existing_beads: assert(b.parent is not None) - # if self.name in ("31-2",): - # pdb.set_trace() - ## Add ends if they don't exist yet ## TODOTODO: test 1 nt segments? if len(existing_beads) == 0 or existing_beads[0].get_nt_position(self) >= 0.5: @@ -1604,26 +1599,43 @@ class SegmentModel(ArbdModel): """ Reassign bead types """ if self.DEBUG: print("Assigning bead types") beadtype_s = dict() - for segment in segments: - for b in segment: - # b.num_nt = np.around( b.num_nt, decimals=1 ) - b.num_nt = 3*np.around( float(b.num_nt)/3, decimals=1 ) - key = (b.type_.name[0].upper(), b.num_nt) - if key in beadtype_s: - b.type_ = beadtype_s[key] - else: - t = deepcopy(b.type_) - if key[0] == "D": - t.__dict__["nts"] = b.num_nt*2 - elif key[0] == "S": - t.__dict__["nts"] = b.num_nt - elif key[0] == "O": - t.__dict__["nts"] = b.num_nt - else: - raise Exception("TODO") - # print(t.nts) - t.name = t.name + "%03d" % (10*t.nts) - beadtype_s[key] = b.type_ = t + + import scipy.cluster.hierarchy as hcluster + beads = [b for s in segments for b in s if b.type_.name[0].upper() in ("D","O")] + data = np.array([b.num_nt for b in beads])[:,np.newaxis] + clusters = hcluster.fclusterdata(data, float(max_basepairs_per_bead)/80, criterion="distance") + cluster_size = [np.mean(data[clusters == i]) for i in np.unique(clusters)] + order = int(2-np.log10(2*max_basepairs_per_bead)//1) + for b,c in zip(beads,clusters): + num_nt0 = b.num_nt + b.num_nt = np.around( np.float32(cluster_size[c-1]), decimals=order ) + key = (b.type_.name[0].upper(), b.num_nt) + if key in beadtype_s: + b.type_ = beadtype_s[key] + else: + t = deepcopy(b.type_) + t.__dict__["nts"] = b.num_nt*2 + t.name = t.name + "%03d" % (t.nts*10**order) + print( "{} --> {} ({})".format(num_nt0, b.num_nt, t.name) ) + beadtype_s[key] = b.type_ = t + + beads = [b for s in segments for b in s if b.type_.name[0].upper() == "S"] + data = np.array([b.num_nt for b in beads])[:,np.newaxis] + clusters = hcluster.fclusterdata(data, float(max_nucleotides_per_bead)/40, criterion="distance") + cluster_size = [np.mean(data[clusters == i]) for i in np.unique(clusters)] + order = int(2-np.log10(max_nucleotides_per_bead)//1) + for b,c in zip(beads,clusters): + num_nt0 = b.num_nt + b.num_nt = np.around( np.float32(cluster_size[c-1]), decimals=order ) + key = (b.type_.name[0].upper(), b.num_nt) + if key in beadtype_s: + b.type_ = beadtype_s[key] + else: + t = deepcopy(b.type_) + t.__dict__["nts"] = b.num_nt + t.name = t.name + "%03d" % (t.nts*10**order) + print( "{} --> {} ({})".format(num_nt0, b.num_nt, t.name) ) + beadtype_s[key] = b.type_ = t """ Update bead indices """ self._countParticleTypes() # probably not needed here @@ -1667,6 +1679,20 @@ class SegmentModel(ArbdModel): bond = self.get_bond_potential(k,d) parent.add_bond( b1, b2, bond, exclude=True ) + # for s in self.segments: + # sepsum = 0 + # beadsum = 0 + # for b1 in s.beads: + # beadsum += b1.num_nt + # for bead_list in self._recursively_get_beads_within_bonds(b1, 1): + # assert(len(bead_list) == 1) + # if b1.idx < bead_list[-1].idx: # avoid double-counting + # for b2 in bead_list: + # if b2.parent == b1.parent: + # sepsum += dists[b1][b2] + # sepsum += sep + # print("Helix {}: bps {}, beads {}, separation {}".format(s.name, s.num_nt, beadsum, sepsum)) + """ Add intrahelical angle potentials """ if self.DEBUG: print("Adding intrahelical angle potentials") for b1,b2,b3 in self._get_intrahelical_angle_beads(): -- GitLab