From ad9e61890387a8dbead928c08531885cd9120874 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Mon, 16 Jul 2018 14:54:20 -0500 Subject: [PATCH] Updated code that assigns num_nt to bead types to be more maintainable and fail-safe --- segmentmodel.py | 60 +++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/segmentmodel.py b/segmentmodel.py index 20ca571..ffd15d5 100644 --- a/segmentmodel.py +++ b/segmentmodel.py @@ -1600,42 +1600,48 @@ class SegmentModel(ArbdModel): if self.DEBUG: print("Assigning bead types") beadtype_s = dict() + def _assign_bead_type(bead, num_nt, decimals): + num_nt0 = bead.num_nt + bead.num_nt = np.around( np.float32(num_nt), decimals=decimals ) + key = (bead.type_.name[0].upper(), bead.num_nt) + if key in beadtype_s: + bead.type_ = beadtype_s[key] + else: + t = deepcopy(bead.type_) + t.__dict__["nts"] = bead.num_nt*2 if t.name[0].upper() in ("D","O") else bead.num_nt + t.name = t.name + "%03d" % (t.nts*10**decimals) + print( "{} --> {} ({})".format(num_nt0, bead.num_nt, t.name) ) + beadtype_s[key] = bead.type_ = t + + # (cluster_size[c-1]) + + import scipy.cluster.hierarchy as hcluster beads = [b for s in segments for b in s if b.type_.name[0].upper() in ("D","O")] data = np.array([b.num_nt for b in beads])[:,np.newaxis] - clusters = hcluster.fclusterdata(data, float(max_basepairs_per_bead)/80, criterion="distance") - cluster_size = [np.mean(data[clusters == i]) for i in np.unique(clusters)] order = int(2-np.log10(2*max_basepairs_per_bead)//1) + try: + clusters = hcluster.fclusterdata(data, float(max_basepairs_per_bead)/80, criterion="distance") + cluster_size = [np.mean(data[clusters == i]) for i in np.unique(clusters)] + except: + ## TODO debug + clusters = data + cluster_size = np.arange(len(data))+1 for b,c in zip(beads,clusters): - num_nt0 = b.num_nt - b.num_nt = np.around( np.float32(cluster_size[c-1]), decimals=order ) - key = (b.type_.name[0].upper(), b.num_nt) - if key in beadtype_s: - b.type_ = beadtype_s[key] - else: - t = deepcopy(b.type_) - t.__dict__["nts"] = b.num_nt*2 - t.name = t.name + "%03d" % (t.nts*10**order) - print( "{} --> {} ({})".format(num_nt0, b.num_nt, t.name) ) - beadtype_s[key] = b.type_ = t + _assign_bead_type(b, cluster_size[c-1], decimals=order) - beads = [b for s in segments for b in s if b.type_.name[0].upper() == "S"] + beads = [b for s in segments for b in s if b.type_.name[0].upper() in ("S")] data = np.array([b.num_nt for b in beads])[:,np.newaxis] - clusters = hcluster.fclusterdata(data, float(max_nucleotides_per_bead)/40, criterion="distance") - cluster_size = [np.mean(data[clusters == i]) for i in np.unique(clusters)] order = int(2-np.log10(max_nucleotides_per_bead)//1) + try: + clusters = hcluster.fclusterdata(data, float(max_basepairs_per_bead)/40, criterion="distance") + cluster_size = [np.mean(data[clusters == i]) for i in np.unique(clusters)] + except: + ## TODO debug + clusters = data + cluster_size = np.arange(len(data))+1 for b,c in zip(beads,clusters): - num_nt0 = b.num_nt - b.num_nt = np.around( np.float32(cluster_size[c-1]), decimals=order ) - key = (b.type_.name[0].upper(), b.num_nt) - if key in beadtype_s: - b.type_ = beadtype_s[key] - else: - t = deepcopy(b.type_) - t.__dict__["nts"] = b.num_nt - t.name = t.name + "%03d" % (t.nts*10**order) - print( "{} --> {} ({})".format(num_nt0, b.num_nt, t.name) ) - beadtype_s[key] = b.type_ = t + _assign_bead_type(b, cluster_size[c-1], decimals=order) """ Update bead indices """ self._countParticleTypes() # probably not needed here -- GitLab