Commit f0ada5af authored by cmaffeo2's avatar cmaffeo2
Browse files

Updated atomic pdb reader; added routine for removing atomic strands, but needs testing

parent 594b1eaf
......@@ -96,6 +96,8 @@ def set_splines(seg, coordinates, hid, hmap, hrank):
contours.append( float(rank)/maxrank )
coords = np.array(coords)
seg.set_splines(contours,coords)
seg.start_position = coords[0,:]
seg.end_position = coords[-1,:]
def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime, sequence=None):
......@@ -197,16 +199,18 @@ def model_from_basepair_stack_3prime(coordinates, basepair, stack, three_prime,
crossovers,prime5,prime3 = [[],[],[]]
for s in strands:
tmp = np.where(np.diff(hmap[s]) != 0)[0]
crossovers.extend( s[tmp] )
for i in tmp:
crossovers.append( (s[i],s[i+1]) )
prime5.append(s[0])
prime3.append(s[-1])
## Add connections
allSegments = doubleSegments+singleSegments
for r in crossovers:
seg1,seg2 = [allSegments[hmap[i]] for i in (r,r+1)]
nt1,nt2 = [hrank[i] for i in (r,r+1)]
f1,f2 = [fwd[i] for i in (r,r+1)]
for r1,r2 in crossovers:
seg1,seg2 = [allSegments[hmap[i]] for i in (r1,r2)]
nt1,nt2 = [hrank[i] for i in (r1,r2)]
f1,f2 = [fwd[i] for i in (r1,r2)]
if nt1 in (0,seg1.num_nt) or nt2 in (0,seg2.num_nt):
seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_="terminal_crossover")
else:
......
# -*- coding: utf-8 -*-
def printv(v):
return " ".join([str(x) for x in v])
import pdb
import numpy as np
......@@ -13,11 +15,15 @@ from MDAnalysis.analysis.distances import distance_array
from ..segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment
from .. import get_resource_path
from .segmentmodel_from_lists import model_from_basepair_stack_3prime
from .segmentmodel_from_lists import _three_prime_list_to_five_prime
resnames = dict(A='ADE DA DA5 DA3', T='THY DT DT5 DT3',
C='CYT DC DC5 DC3', G='GUA DG DG5 DG3')
bp_bond_atoms = dict( A='N1', T='H3',
C='N3', G='H1' )
# bp_bond_atoms = dict( A='N1', T='H3',
# C='N3', G='H1' )
bp_bond_atoms = dict( A='N1', T='N3',
C='N3', G='N1' )
bases = resnames.keys()
resname_to_key = {n:k for k,v in resnames.items() for n in v.split()}
complement = dict(A='T', C='G', T='A', G='C')
......@@ -44,8 +50,27 @@ for key,val in refUnis.items():
ref_stack_position = np.array((-2.41851735, -0.259761333, 3.39999978))
ref_bp_position = np.array((0, -8.7434375, -0.326354))
def read_ter_from_pdb( *args ):
pdbfile = args[0]
for arg in args:
if arg[-3:].lower() == "pdb":
pdbfile = arg
ter = []
with open(pdbfile) as fh:
residue = -1
last = ""
for line in fh:
if line[:3] == "TER":
ter.append(residue)
elif line[:3] == "ATO" or line[:3] == "HET":
if last == "" or int(last[22:26]) != int(line[22:26]):
residue += 1
last = line
return ter
def residues_within( cutoff, coords1, coords2, sel1, sel2, universe, selection_text ):
assert(len(sel1.atoms) > 0 and len(sel2.atoms) > 0)
distance_matrix = distance_array( coords1, coords2 )
if sel1 == sel2:
## ignore diagonal
......@@ -112,7 +137,7 @@ def find_basepairs( u, centers, transforms, selection_text='all' ):
sel2 = u.select_atoms("({}) and (({}) or ({}))".format(selection_text,
bonds['T'],bonds['G']))
possible_basepairs = residues_within( cutoff = 2.2,
possible_basepairs = residues_within( cutoff = 3.8,
coords1=sel1.positions,
coords2=sel2.positions,
sel1 = sel1, sel2 = sel2,
......@@ -128,10 +153,12 @@ def find_basepairs( u, centers, transforms, selection_text='all' ):
centers[possible_resI],
centers[possible_resJ]):
c2_expected = c1 + ref_bp_position.dot(R1)
if np.linalg.norm(c2_expected-c2) < 1:
# fh.write("graphics top cylinder {{{}}} {{{}}} radius 0.2 resolution 30\n".format(printv(c1),printv(c2)))
if np.linalg.norm(c2_expected-c2) < 2:
resI.append(i)
resJ.append(j)
resJ.append(j)
resI = np.array(resI, dtype=np.int)
resJ = np.array(resJ, dtype=np.int)
......@@ -223,6 +250,8 @@ def SegmentModelFromPdb(*args,**kwargs):
for a in u.select_atoms("resname DT* and name C7").atoms:
a.name = "C5M"
ter = read_ter_from_pdb(*args)
## Find basepairs and base stacks
centers,transforms = find_base_position_orientation(u)
bps = find_basepairs(u, centers, transforms)
......@@ -234,6 +263,24 @@ def SegmentModelFromPdb(*args,**kwargs):
r = s.atoms.select_atoms("name C1'").resindices
three_prime[r[:-1]] = r[1:]
""" ## Remove connections between resids that are very far away
dist = centers[r[1:]]-centers[r[:-1]]
dist = np.sqrt( np.sum( dist**2, axis=-1 ) )
tmp = np.where( dist > 20 )[0]
three_prime[r[tmp]] = -1
"""
## MDanalysis fails to parse chains based on TER, so find which residues should be broken here
three_prime[ter] = -1
## Some pdbs might not follow the 5-to-3-prime direction convention; find if this is the case
ids = bps >= 0
if np.mean(stacks[ids] == three_prime[ids]) < 0.25:
five_prime = _three_prime_list_to_five_prime(three_prime)
if np.mean(stacks[ids] == five_prime[ids]) < 0.5:
print("WARNING: PDB might not follow a consistent convention for 5'-to-3' direction")
else:
three_prime = five_prime
## Find sequence
seq = [resname_to_key[rn]
for rn in u.select_atoms("name C1'").resnames]
......@@ -242,7 +289,8 @@ def SegmentModelFromPdb(*args,**kwargs):
bps,
stacks,
three_prime,
seq)
seq,
)
if __name__ == "__main__":
# u = mda.Universe("test-ssdna.psf", "test-ssdna.pdb")
......
......@@ -530,6 +530,11 @@ class Segment(ConnectableElement, Group):
self.locations.append(loc)
## TODO? Replace with abstract strand-based model?
def add_nick(self, nt, on_fwd_strand=True):
self.add_3prime(nt,on_fwd_strand)
self.add_5prime(nt+1,on_fwd_strand)
def add_5prime(self, nt, on_fwd_strand=True):
if isinstance(self,SingleStrandedSegment):
on_fwd_strand = True
......@@ -1123,6 +1128,8 @@ class StrandInSegment(Group):
assert( np.isclose( idx_in_strand , int(round(idx_in_strand)) ) )
assert(idx_in_strand >= 0)
return self.children[int(round(idx_in_strand))]
def __repr__(self):
return "<StrandInSegment {}{}[{:.2f}-{:.2f},{:d}]>".format( self.parent.segname, self.segment.name, self.start, self.end, self.is_fwd)
class Strand(Group):
......@@ -1135,6 +1142,9 @@ class Strand(Group):
self.is_circular = is_circular
self.debug = False
def __repr__(self):
return "<Strand {}({})>".format( self.segname, self.num_nt )
## TODO disambiguate names of functions
def add_dna(self, segment, start, end, is_fwd):
""" start/end are given as nt """
......@@ -1381,14 +1391,20 @@ class SegmentModel(ArbdModel):
k = opt.leastsq( fitFun, x0=np.exp(-sep/Lp) )
return k[0][0] * 2*kT*0.00030461742
def extend(self, other, copy=True):
def extend(self, other, copy=True, include_strands=False):
assert( isinstance(other, SegmentModel) )
if copy:
for s in other.segments:
self.segments.append(deepcopy(s))
if include_strands:
for s in other.strands:
self.strands.append(deepcopy(s))
else:
for s in other.segments:
self.segments.append(s)
if include_strands:
for s in other.strands:
self.strands.append(s)
self._clear_beads()
def update(self, segment , copy=False):
......@@ -1399,11 +1415,21 @@ class SegmentModel(ArbdModel):
self._clear_beads()
""" Mapping between different resolution models """
def clear_atomic(self):
for strand in self.strands:
for s in strand.children:
s.clear_all()
def clear_beads(self):
return self._clear_beads()
def _clear_beads(self):
for s in self.segments:
s.clear_all()
self.clear_all(keep_children=True)
self.clear_atomic()
## Check that it worked
assert( len([b for b in self]) == 0 )
locParticles = []
......@@ -2205,6 +2231,12 @@ class SegmentModel(ArbdModel):
s.randomize_unset_sequence()
def _generate_strands(self):
## clear strands
try:
for s in self.strands:
self.children.remove(s)
except:
...
self.strands = strands = []
""" Ensure unconnected ends have 5prime Location objects """
......@@ -2252,8 +2284,7 @@ class SegmentModel(ArbdModel):
while True:
mycounter+=1
if mycounter > 10000:
import pdb
pdb.set_trace()
raise Exception("Too many iterations")
#if seg.name == "22-1" and pos > 140:
# if seg.name == "22-2":
......@@ -2261,7 +2292,7 @@ class SegmentModel(ArbdModel):
# pdb.set_trace()
end_pos, next_seg, next_pos, next_dir, move_at_least = seg.get_strand_segment(pos, is_fwd, move_at_least)
history.append((seg,pos,end_pos,is_fwd))
try:
strand.add_dna(seg, pos, end_pos, is_fwd)
except CircularDnaError:
......@@ -2272,12 +2303,13 @@ class SegmentModel(ArbdModel):
raise
if next_seg is None:
return
break
else:
history.append((seg,pos,is_fwd))
seg,pos,is_fwd = (next_seg, next_pos, next_dir)
return history
strand_counter = 0
history = []
for seg in self.segments:
locs = seg.get_5prime_locations()
if locs is None: continue
......@@ -2287,8 +2319,9 @@ class SegmentModel(ArbdModel):
# TODOTODO
pos = seg.contour_to_nt_pos(l.address, round_nt=True)
is_fwd = l.on_fwd_strand
s = Strand()
_recursively_build_strand(s, seg, pos, is_fwd)
s = Strand(segname="S{:03d}".format(len(strands)))
strand_history = _recursively_build_strand(s, seg, pos, is_fwd)
history.append((l,strand_history))
# print("{} {}".format(seg.name,s.num_nt))
strands.append(s)
......@@ -2325,11 +2358,14 @@ class SegmentModel(ArbdModel):
return check(last-1)
def add_strand_if_needed(seg,is_fwd):
history = []
if not strands_cover_segment(seg, is_fwd):
pdb.set_trace()
pos = nt = find_nt_not_in_strand(seg, is_fwd)
s = Strand(is_circular = True)
_recursively_build_strand(s, seg, pos, is_fwd)
history = _recursively_build_strand(s, seg, pos, is_fwd)
strands.append(s)
return history
for seg in self.segments:
add_strand_if_needed(seg,True)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment