Slightly improved pdb reader basepair and stack search

861434ba · cmaffeo2 · e32b9c26 · 861434ba
Commit 861434ba authored 6 years ago by cmaffeo2
--- a/mrdna/readers/segmentmodel_from_pdb.py
+++ b/mrdna/readers/segmentmodel_from_pdb.py
@@ -70,37 +70,31 @@ def read_ter_from_pdb( *args ):
                last = line
    return ter
-def residues_within( cutoff, coords1, coords2, sel1, sel2, universe, selection_text ):
+def residues_within( cutoff, coords1, coords2, sel1, sel2 ):
-    assert(len(sel1.atoms) > 0 and len(sel2.atoms) > 0)
+    """
-    distance_matrix = distance_array( coords1, coords2 )
+    Returns lists of index of residues within sel1 and sel2 such that
-    if sel1 == sel2:
+    sel1.residue[arr_idx1[i]] is within cutoff of sel2.residue[arr_idx2[i]] for all i
-        ## ignore diagonal
-        distance_matrix = distance_matrix + np.eye(len(distance_matrix))*10*cutoff
-    # arr_idxs = np.where((distance_matrix > 0) * (distance_matrix < cutoff))
-    arr_idxs = np.where((distance_matrix < cutoff))
-    arr_idx1,arr_idx2 = arr_idxs
-    atoms = universe.select_atoms(selection_text)
+    Also returns distance_matrix
-    within = -np.ones( atoms.resindices[-1]+1, dtype=np.int )
-    if len(arr_idx1) == len(np.unique(arr_idx1)) and len(arr_idx2) == len(np.unique(arr_idx2)):
+    Ignores residues that are the same in sel1 and sel2
-        within[sel1.resindices[arr_idx1]] = sel2.resindices[arr_idx2]
+    """
-    else:
-        unique,ids,counts = np.unique(arr_idx1, return_index=True, return_counts=True)
-        res1 = sel1.resindices[unique[counts == 1]]
-        res2 = sel2.resindices[arr_idx2[ids[counts == 1]]]
-        within[res1] = res2
-        for i in np.where(counts > 1)[0]:
-            ids2 = arr_idx1 == unique[i]  
-            ij = np.argmin(distance_matrix[arr_idx1[ids2],arr_idx2[ids2]])
-            res1 = sel1.resindices[arr_idx1[ids2][ij]]
-            res2 = sel2.resindices[arr_idx2[ids2][ij]]
-            within[res1] = res2
+    assert(len(sel1.atoms) > 0 and len(sel2.atoms) > 0)
+    assert(len(sel1.residues) == len(coords1))
+    assert(len(sel2.residues) == len(coords2))
+    distance_matrix = distance_array( coords1, coords2 )
-        # within[sel1.resindices[arr_idx1]] = sel2.resindices[arr_idx2]
+    ## Ignore comparisons with self
-    return within
+    res_diff = sel1.residues.resindices[:,np.newaxis] - sel2.residues.resindices[np.newaxis,:]
+    assert( res_diff.shape == distance_matrix.shape )
+    distance_matrix[ res_diff == 0 ] = distance_matrix[ res_diff == 0 ] + 10 *cutoff
+    arr_idxs = np.where((distance_matrix < cutoff))
+    arr_idx1,arr_idx2 = arr_idxs
+    return arr_idx1, arr_idx2, distance_matrix
 def find_base_position_orientation( u, selection_text='all' ):
    ## Find orientation and center of each nucleotide
@@ -133,45 +127,54 @@ def find_basepairs( u, centers, transforms, selection_text='all' ):
    bonds = { b:"resname {} and name {}".format(resnames[b],bp_bond_atoms[b])
              for b in bases }
+    all_ = u.select_atoms(selection_text)
    sel1 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
                                                           bonds['A'],bonds['C'])) 
    sel2 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
                                                           bonds['T'],bonds['G']))
+    allres = all_.residues.resindices
+    basepairs = -np.ones(len(allres), dtype=np.int)
+    ## Find likely candidates for basepairs
+    idx1, idx2, dists = residues_within( cutoff = 3.8,
+                                         coords1=sel1.positions,
+                                         coords2=sel2.positions,
+                                         sel1 = sel1, sel2 = sel2 )
-    possible_basepairs = residues_within( cutoff = 3.8,
+    ## Find mapping from idx1/idx2 to index of same residue in all_
-                                          coords1=sel1.positions,
+    def get_idx_to_all(sel):
-                                          coords2=sel2.positions,
+        selres = sel.residues.resindices
-                                          sel1 = sel1, sel2 = sel2,
+        return np.searchsorted( allres, selres )
-                                          universe = u,
+    idx1_to_all, idx2_to_all = [ get_idx_to_all(sel) for sel in (sel1,sel2) ]
-                                          selection_text = selection_text )
-    ## Filter by expected position
+    ## Rank possible basepairs by expected position and angles between base orientation
-    ids = possible_basepairs >= 0
+    rank = 100*np.ones(dists.shape)
-    possible_resI = np.where( ids )[0]
-    possible_resJ = possible_basepairs[ ids ].astype(int) 
+    for i in np.unique(idx1):
-    resI,resJ = [[],[]]
+        all1 = idx1_to_all[i]
-    for i,j,R1,R2,c1,c2 in zip(possible_resI,possible_resJ,
+        R1 = transforms[all1]
-                            transforms[possible_resI],
+        c1 = centers[all1]
-                            transforms[possible_resJ],
-                            centers[possible_resI],
+        for j in idx2[ idx1 == i ]:
-                            centers[possible_resJ]):
+            all2 = idx2_to_all[j]
-        c2_expected = c1 + ref_bp_position.dot(R1)
+            R2 = transforms[all2]
-        # fh.write("graphics top cylinder {{{}}} {{{}}} radius 0.2 resolution 30\n".format(printv(c1),printv(c2)))
+            c2 = centers[all2]
-        if np.linalg.norm(c2_expected-c2) < 3.5:
+            c2_expected = c1 + ref_bp_position.dot(R1)
+            dist = np.linalg.norm(c2_expected-c2)
            angle= R1.T.dot(np.array((0,0,1))).dot(R2.T.dot(np.array((0,0,1))))
-            if angle < -0.7:
+            rank[i,j] = dist**2 + 135*(1+angle)**2
-                resI.append(i)
-                resJ.append(j)
+    idx1,idx2 = np.where( rank < 25 )
+    for i in np.unique(idx1):
+        all1 = idx1_to_all[i]
-    resI = np.array(resI, dtype=np.int)
+        j = np.argmin(rank[i,:])
-    resJ = np.array(resJ, dtype=np.int)
+        if i == np.argmin(rank[:,j]):
+            all2 = idx2_to_all[j]
-    ## Add reciprocal basepairs
+            basepairs[all1] = all2
-    assert( (possible_basepairs[resJ]  == -1).all() )
+            basepairs[all2] = all1
-    basepairs = -np.ones(possible_basepairs.shape, dtype=np.int)
+        else:
-    basepairs[resI] = resJ
+            raise Exception("Unable to detect basepair")
-    basepairs[resJ] = resI
    return basepairs
@@ -185,11 +188,18 @@ def find_stacks( u, centers, transforms, selection_text='all' ):
    expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)
    sel = u.select_atoms("({}) and name C1'".format( selection_text ))
-    stacks_above = residues_within( cutoff = 3.5,
+    idx1, idx2, dists = residues_within( cutoff = 3.5,
-                                    coords1=centers,
+                                         coords1 = centers,
-                                    coords2=expected_stack_positions,
+                                         coords2 = expected_stack_positions,
-                                    sel1 = sel, sel2 = sel,
+                                         sel1 = sel, sel2 = sel )
-                              universe = u, selection_text = selection_text )
+    ## Convert distances to stacks
+    stacks_above = -np.ones(len(sel), dtype=np.int)
+    for i in np.unique(idx1):
+        js = idx2[ idx1 == i ]
+        j = np.argmin(dists[i])
+        stacks_above[i] = j
    return stacks_above
 def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):