Skip to content
Snippets Groups Projects
Commit 861434ba authored by cmaffeo2's avatar cmaffeo2
Browse files

Slightly improved pdb reader basepair and stack search

parent e32b9c26
No related branches found
No related tags found
No related merge requests found
...@@ -70,37 +70,31 @@ def read_ter_from_pdb( *args ): ...@@ -70,37 +70,31 @@ def read_ter_from_pdb( *args ):
last = line last = line
return ter return ter
def residues_within( cutoff, coords1, coords2, sel1, sel2, universe, selection_text ): def residues_within( cutoff, coords1, coords2, sel1, sel2 ):
assert(len(sel1.atoms) > 0 and len(sel2.atoms) > 0) """
distance_matrix = distance_array( coords1, coords2 ) Returns lists of index of residues within sel1 and sel2 such that
if sel1 == sel2: sel1.residue[arr_idx1[i]] is within cutoff of sel2.residue[arr_idx2[i]] for all i
## ignore diagonal
distance_matrix = distance_matrix + np.eye(len(distance_matrix))*10*cutoff
# arr_idxs = np.where((distance_matrix > 0) * (distance_matrix < cutoff))
arr_idxs = np.where((distance_matrix < cutoff))
arr_idx1,arr_idx2 = arr_idxs
atoms = universe.select_atoms(selection_text) Also returns distance_matrix
within = -np.ones( atoms.resindices[-1]+1, dtype=np.int )
if len(arr_idx1) == len(np.unique(arr_idx1)) and len(arr_idx2) == len(np.unique(arr_idx2)): Ignores residues that are the same in sel1 and sel2
within[sel1.resindices[arr_idx1]] = sel2.resindices[arr_idx2] """
else:
unique,ids,counts = np.unique(arr_idx1, return_index=True, return_counts=True)
res1 = sel1.resindices[unique[counts == 1]]
res2 = sel2.resindices[arr_idx2[ids[counts == 1]]]
within[res1] = res2
for i in np.where(counts > 1)[0]:
ids2 = arr_idx1 == unique[i]
ij = np.argmin(distance_matrix[arr_idx1[ids2],arr_idx2[ids2]])
res1 = sel1.resindices[arr_idx1[ids2][ij]]
res2 = sel2.resindices[arr_idx2[ids2][ij]]
within[res1] = res2
assert(len(sel1.atoms) > 0 and len(sel2.atoms) > 0)
assert(len(sel1.residues) == len(coords1))
assert(len(sel2.residues) == len(coords2))
distance_matrix = distance_array( coords1, coords2 )
# within[sel1.resindices[arr_idx1]] = sel2.resindices[arr_idx2] ## Ignore comparisons with self
return within res_diff = sel1.residues.resindices[:,np.newaxis] - sel2.residues.resindices[np.newaxis,:]
assert( res_diff.shape == distance_matrix.shape )
distance_matrix[ res_diff == 0 ] = distance_matrix[ res_diff == 0 ] + 10 *cutoff
arr_idxs = np.where((distance_matrix < cutoff))
arr_idx1,arr_idx2 = arr_idxs
return arr_idx1, arr_idx2, distance_matrix
def find_base_position_orientation( u, selection_text='all' ): def find_base_position_orientation( u, selection_text='all' ):
## Find orientation and center of each nucleotide ## Find orientation and center of each nucleotide
...@@ -133,45 +127,54 @@ def find_basepairs( u, centers, transforms, selection_text='all' ): ...@@ -133,45 +127,54 @@ def find_basepairs( u, centers, transforms, selection_text='all' ):
bonds = { b:"resname {} and name {}".format(resnames[b],bp_bond_atoms[b]) bonds = { b:"resname {} and name {}".format(resnames[b],bp_bond_atoms[b])
for b in bases } for b in bases }
all_ = u.select_atoms(selection_text)
sel1 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text, sel1 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
bonds['A'],bonds['C'])) bonds['A'],bonds['C']))
sel2 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text, sel2 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
bonds['T'],bonds['G'])) bonds['T'],bonds['G']))
allres = all_.residues.resindices
basepairs = -np.ones(len(allres), dtype=np.int)
## Find likely candidates for basepairs
idx1, idx2, dists = residues_within( cutoff = 3.8,
coords1=sel1.positions,
coords2=sel2.positions,
sel1 = sel1, sel2 = sel2 )
possible_basepairs = residues_within( cutoff = 3.8, ## Find mapping from idx1/idx2 to index of same residue in all_
coords1=sel1.positions, def get_idx_to_all(sel):
coords2=sel2.positions, selres = sel.residues.resindices
sel1 = sel1, sel2 = sel2, return np.searchsorted( allres, selres )
universe = u, idx1_to_all, idx2_to_all = [ get_idx_to_all(sel) for sel in (sel1,sel2) ]
selection_text = selection_text )
## Filter by expected position ## Rank possible basepairs by expected position and angles between base orientation
ids = possible_basepairs >= 0 rank = 100*np.ones(dists.shape)
possible_resI = np.where( ids )[0]
possible_resJ = possible_basepairs[ ids ].astype(int) for i in np.unique(idx1):
resI,resJ = [[],[]] all1 = idx1_to_all[i]
for i,j,R1,R2,c1,c2 in zip(possible_resI,possible_resJ, R1 = transforms[all1]
transforms[possible_resI], c1 = centers[all1]
transforms[possible_resJ],
centers[possible_resI], for j in idx2[ idx1 == i ]:
centers[possible_resJ]): all2 = idx2_to_all[j]
c2_expected = c1 + ref_bp_position.dot(R1) R2 = transforms[all2]
# fh.write("graphics top cylinder {{{}}} {{{}}} radius 0.2 resolution 30\n".format(printv(c1),printv(c2))) c2 = centers[all2]
if np.linalg.norm(c2_expected-c2) < 3.5: c2_expected = c1 + ref_bp_position.dot(R1)
dist = np.linalg.norm(c2_expected-c2)
angle= R1.T.dot(np.array((0,0,1))).dot(R2.T.dot(np.array((0,0,1)))) angle= R1.T.dot(np.array((0,0,1))).dot(R2.T.dot(np.array((0,0,1))))
if angle < -0.7: rank[i,j] = dist**2 + 135*(1+angle)**2
resI.append(i)
resJ.append(j) idx1,idx2 = np.where( rank < 25 )
for i in np.unique(idx1):
all1 = idx1_to_all[i]
resI = np.array(resI, dtype=np.int) j = np.argmin(rank[i,:])
resJ = np.array(resJ, dtype=np.int) if i == np.argmin(rank[:,j]):
all2 = idx2_to_all[j]
## Add reciprocal basepairs basepairs[all1] = all2
assert( (possible_basepairs[resJ] == -1).all() ) basepairs[all2] = all1
basepairs = -np.ones(possible_basepairs.shape, dtype=np.int) else:
basepairs[resI] = resJ raise Exception("Unable to detect basepair")
basepairs[resJ] = resI
return basepairs return basepairs
...@@ -185,11 +188,18 @@ def find_stacks( u, centers, transforms, selection_text='all' ): ...@@ -185,11 +188,18 @@ def find_stacks( u, centers, transforms, selection_text='all' ):
expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32) expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)
sel = u.select_atoms("({}) and name C1'".format( selection_text )) sel = u.select_atoms("({}) and name C1'".format( selection_text ))
stacks_above = residues_within( cutoff = 3.5, idx1, idx2, dists = residues_within( cutoff = 3.5,
coords1=centers, coords1 = centers,
coords2=expected_stack_positions, coords2 = expected_stack_positions,
sel1 = sel, sel2 = sel, sel1 = sel, sel2 = sel )
universe = u, selection_text = selection_text )
## Convert distances to stacks
stacks_above = -np.ones(len(sel), dtype=np.int)
for i in np.unique(idx1):
js = idx2[ idx1 == i ]
j = np.argmin(dists[i])
stacks_above[i] = j
return stacks_above return stacks_above
def basepairs_and_stacks_to_helixmap(basepairs,stacks_above): def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment