diff --git a/mrdna/readers/libs b/mrdna/readers/libs deleted file mode 120000 index e10e7dfd9849fdf43737462ce6850012e56b2149..0000000000000000000000000000000000000000 --- a/mrdna/readers/libs +++ /dev/null @@ -1 +0,0 @@ -/home/pinyili2/server5/tacoxDNA/src/libs \ No newline at end of file diff --git a/mrdna/readers/libs/__init__.py b/mrdna/readers/libs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mrdna/readers/libs/base.py b/mrdna/readers/libs/base.py new file mode 100644 index 0000000000000000000000000000000000000000..e29c35a07385664e41a9c11355f14686c5a969a9 --- /dev/null +++ b/mrdna/readers/libs/base.py @@ -0,0 +1,879 @@ +""" +Utility functions. +base.py includes the classes: System, Strand, Nucleotide + - Make initial configurations (generate.py) + - Get detailed energy information (process_data/) + - If you want to use it with oxRNA, you have to set environment variable OXRNA to 1 (export OXRNA=1) +""" +import sys +import numpy as np +import os + + +def partition(s, d): + if d in s: + sp = s.split(d, 1) + return sp[0], d, sp[1] + else: + return s, "", "" + + +number_to_base = {0: 'A', 1: 'G', 2: 'C', 3: 'T'} + +base_to_number = {'A': 0, 'a': 0, 'G': 1, 'g': 1, + 'C': 2, 'c': 2, 'T': 3, 't': 3, + 'U': 3, 'u': 3, 'D': 4} + +try: + FLT_EPSILON = np.finfo(np.float).eps +except: + FLT_EPSILON = 2.2204460492503131e-16 + +# oxDNA constants +POS_BACK = -0.4 +POS_STACK = 0.34 +POS_BASE = 0.4 +CM_CENTER_DS = POS_BASE + 0.2 +FENE_R0_OXDNA = 0.7525 +FENE_EPS = 2.0 + +POS_MM_BACK1 = -0.3400 +POS_MM_BACK2 = 0.3408 + +# may come in handy later +RNA = False +RNA_POS_BACK_a1 = -0.4 +RNA_POS_BACK_a3 = 0.2 +RNA_POS_BACK_a2 = 0.0 + +LENGTH_FACT = 8.518 +BASE_BASE = 0.3897628551303122 + +LENGTH_FACT = 8.518 +BASE_BASE = 0.3897628551303122 + +CREPY_COLOR_TABLE = ['red', 'blue', '0,0.502,0', '1,0.8,0', '0.2,0.8,1'] + +# INT_POT_TOTAL = 0 +INT_HYDR = 4 +INT_STACK = 2 +INT_CROSS_STACK = 5 +INT_COAX_STACK = 6 +INT_FENE = 0 +INT_EXC_BONDED = 1 +INT_EXC_NONBONDED = 3 + +H_CUTOFF = -0.1 + +NOGROOVE_ENV_VAR = "OXDNA_NOGROOVE" +MM_GROOVING = os.environ.get(NOGROOVE_ENV_VAR) == '1' + +# static class +class Logger(object): + DEBUG = 0 + INFO = 1 + WARNING = 2 + CRITICAL = 3 + debug_level = INFO + + messages = ("DEBUG", "INFO", "WARNING", "CRITICAL") + + @staticmethod + def log(msg, level=None, additional=None): + if level == None: + level = Logger.INFO + if level < Logger.debug_level: + return + + if additional != None and Logger.debug_level == Logger.DEBUG: + print("%s: %s (additional info: '%s')" % (Logger.messages[level], msg, additional), file=sys.stderr) + else: + print("%s: %s" % (Logger.messages[level], msg), file=sys.stderr) + + @staticmethod + def die(msg): + Logger.log(msg, Logger.CRITICAL) + sys.exit() + + +class Nucleotide(): + """ + Nucleotides compose Strands + + cm_pos --- Center of mass position + Ex: [0, 0, 0] + + a1 --- Unit vector indicating orientation of backbone with respect to base + Ex: [1, 0, 0] + + a3 --- Unit vector indicating orientation (tilting )of base with respect to backbone + Ex: [0, 0, 1] + + base --- Identity of base, which must be designated with either numbers or + letters (this is called type in the c++ code). Confusingly enough, this + is similar to Particle.btype in oxDNA. + + Number: {0,1,2,3} or any number in between (-inf,-7) and (10, inf) + To use specific sequences (or an alphabet large than four) one should + start from the complementary pair 10 and -7. Complementary pairs are + such that base_1 + base_2 = 3; + + Letter: {A,G,T,C} (these will be translated to {0, 1, 3, 2}). + + These are set in the dictionaries: number_to_base, base_to_number + + btype--- Identity of base. Unused at the moment. + + pair --- Base-paired Nucleotide, used in oxView output + cluster --- Cluster ID, Number, used in oxView output + color --- Custom color, Number representing hex value, used in oxView output + + """ + index = 0 + + def __init__(self, cm_pos, a1, a3, base, btype=None, v=np.array([0., 0., 0.]), L=np.array([0., 0., 0.]), n3=-1, pair=None, cluster=None, color=None): + self.index = Nucleotide.index + Nucleotide.index += 1 + self.cm_pos = np.array(cm_pos) + self._a1 = np.array(a1) + self._a3 = np.array(a3) + self._original_base = base + # base should be an integer + if isinstance(base, int) or isinstance(base, np.int_): + pass + else: + try: + base = base_to_number[base] + except KeyError: + Logger.log("Invalid base (%s)" % base, level=Logger.WARNING) + self._base = base + if btype is None: + self._btype = base + else: + self._btype = btype + self._L = L + self._v = v + self.n3 = n3 + self.next = -1 + self.pair = pair + self.cluster = cluster + self.color = color + + def get_pos_base (self): + """ + Returns the position of the base centroid + Note that cm_pos is the centrod of the backbone and base. + """ + return self.cm_pos + self._a1 * POS_BASE + + pos_base = property(get_pos_base) + + def get_pos_stack (self): + return self.cm_pos + self._a1 * POS_STACK + + pos_stack = property (get_pos_stack) + + def get_pos_back (self): + """ + Returns the position of the backbone centroid + Note that cm_pos is the centrod of the backbone and base. + """ + if MM_GROOVING: + return self.cm_pos + self._a1 * POS_MM_BACK1 + self._a2 * POS_MM_BACK2 + elif RNA: + return self.cm_pos + self._a1 * RNA_POS_BACK_a1 + self._a2 * RNA_POS_BACK_a2 + self._a3 * RNA_POS_BACK_a3 + else: + return self.cm_pos + self._a1 * POS_BACK + + pos_back = property(get_pos_back) + + def get_pos_back_rel(self): + """ + Returns the position of the backbone centroid relative to the centre of mass + i.e. it will be a vector pointing from the c.o.m. to the backbone + """ + return self.get_pos_back() - self.cm_pos + + def get_a2(self): + return np.cross (self._a3, self._a1) + + _a2 = property (get_a2) + + def copy(self, disp=None, rot=None): + copy = Nucleotide(self.cm_pos, self._a1, self._a3, self._base, self._btype, self._L, self._v, self.n3, self.pair, self.cluster, self.color) + if disp is not None: + copy.translate(disp) + if rot is not None: + copy.rotate(rot) + + return copy + + def translate(self, disp): + self.cm_pos += disp + self.cm_pos_box += disp + + def rotate(self, R, origin=None): + if origin == None: origin = self.cm_pos + + self.cm_pos = np.dot(R, self.cm_pos - origin) + origin + self._a1 = np.dot(R, self._a1) + self._a3 = np.dot(R, self._a3) + + def distance(self, other, PBC=True, box=None): + if PBC and box is None: + if not (isinstance (box, np.ndarray) and len(box) == 3): + Logger.die ("distance between nucleotides: if PBC is True, box must be a numpy array of length 3"); + dr = other.pos_back - self.pos_back + if PBC: + dr -= box * np.rint (dr / box) + return dr + + def is_uracil(self): + return isinstance(self._original_base, str) and self._original_base.lower() == "u" + + def get_base(self): + """ + Returns a number containing base id + >>> v1 = np.array([0.,0.,0.]) + >>> v2 = np.array([1.,0.,0.]) + >>> v3 = np.array([0.,0.,1.]) + >>> Nucleotide(v1, v2, v3, 'A').get_base() + 'A' + >>> Nucleotide(v1, v2, v3, "C").get_base() + 'C' + >>> Nucleotide(v1, v2, v3, "G").get_base() + 'G' + >>> Nucleotide(v1, v2, v3, "T").get_base() + 'T' + >>> Nucleotide(v1, v2, v3, 1).get_base() + 'G' + >>> Nucleotide(v1, v2, v3, 103).get_base() + '103' + >>> Nucleotide(v1, v2, v3, -97).get_base() + '-97' + """ + if self.is_uracil(): + return "U" + + if type(self._base) is not int: + try: + number_to_base[self._base] + except KeyError: + Logger.log("Nucleotide.get_base(): nucleotide %d: unknown base type '%d', defaulting to 12 (A)" % (self.index, self._base), Logger.WARNING) + + if self._base in [0, 1, 2, 3]: + return number_to_base[self._base] + else: + return str(self._base) + + def _get_lorenzo_output(self): + a = np.concatenate((self.cm_pos, self._a1, self._a3, self._v, self._L)) + return " ".join(str(x) for x in a) + + def _get_ribbon_output(self): + s1 = self.cm_pos_box + self.get_pos_back_rel() + return "%lf %lf %lf %lf %lf %lf %lf %lf %lf" % (tuple(s1) + tuple (self._a1) + tuple (self._a2)) + + +class Strand(): + """ + Strand composed of Nucleotides + Strands can be contained in System + """ + index = 0 + + def __init__(self): + self.index = Strand.index + Strand.index += 1 + self._first = -1 + self._last = -1 + self._nucleotides = [] + self._sequence = [] + self.visible = True + self._circular = False # bool on circular DNA + + def get_length(self): + return len(self._nucleotides) + + def get_sequence(self): + return self._sequence + + def _prepare(self, si, ni): + self.index = si + self._first = ni + + for n in range(self.N): + self._nucleotides[n].index = ni + n + + self._last = ni + n + return ni + n + 1 + + def copy(self): + copy = Strand() + for n in self._nucleotides: + copy.add_nucleotide(n.copy()) + return copy + + def get_cm_pos(self): + return sum([x.cm_pos for x in self._nucleotides]) / self.N + + def set_cm_pos(self, new_pos): + diff = new_pos - self.cm_pos + for n in self._nucleotides: n.translate(diff) + + def translate(self, amount): + new_pos = self.cm_pos + amount + self.set_cm_pos(new_pos) + + def rotate(self, R, origin=None): + if origin == None: + origin = self.cm_pos + + for n in self._nucleotides: + n.rotate(R, origin) + + def append(self, other): + if not isinstance (other, Strand): + raise ValueError + + dr = self._nucleotides[-1].distance (other._nucleotides[0], PBC=False) + if np.sqrt(np.dot (dr, dr)) > (0.7525 + 0.25): + print("WARNING: Strand.append(): strands seem too far apart. Assuming you know what you are doing.", file=sys.stderr) + + ret = Strand() + + for n in self._nucleotides: + ret.add_nucleotide(n) + + for n in other._nucleotides: + ret.add_nucleotide(n) + + return ret + + def get_slice(self, start=0, end=None): + if end is None: + end = self.get_length() + + if end > self.get_length(): + print("The given end parameter is larger than the number of nucleotides of the strand (%d > %d)" % (end, self.get_length()), file=sys.stderr) + raise ValueError + + ret = Strand() + for i in range(start, end): + ret.add_nucleotide(self._nucleotides[i].copy()) + return ret + + def set_sequence(self, seq): + if isinstance(seq, str): + seq = [base_to_number[x] for x in seq] + if len(seq) != len(self._nucleotides): + Logger.log ("Cannot change sequence: lengths don't match", Logger.WARNING) + return + i = 0 + for n in self._nucleotides: + n._base = seq[i] + i += 1 + self._sequence = seq + + def bring_in_box_nucleotides(self, box): + diff = np.rint(self.cm_pos / box) * box + for n in self._nucleotides: + n.cm_pos_box = n.cm_pos - diff + + def add_nucleotide(self, n): + if len(self._nucleotides) == 0: + self._first = n.index + n.strand = self.index + self._nucleotides.append(n) + self._last = n.index + self.sequence.append(n._base) + + def _get_lorenzo_output(self): + if not self.visible: + return "" + + conf = "\n".join(n._get_lorenzo_output() for n in self._nucleotides) + "\n" + + top = "" + for n in self._nucleotides: + if self._circular: + if n.index == self._first: + n3 = self._last + else: + n3 = n.index - 1 + if n.index == self._last: + n5 = self._first + else: + n5 = n.index + 1 + else: + if n.index == self._first: + n3 = -1 + else: + n3 = n.index - 1 + if n.index == self._last: + n5 = -1 + else: + n5 = n.index + 1 + top += "%d %s %d %d\n" % (self.index + 1, n.get_base(), n3, n5) + + return conf, top + + def get_lammps_N_of_bonds_strand(self): + N_bonds = 0 + for n in self._nucleotides: + if n.index != self._last: + N_bonds += 1 + elif self._circular: + N_bonds += 1 + + return N_bonds + + def get_lammps_bonds(self): + top = [] + for n in self._nucleotides: + if n.index != self._last: + top.append("%d %d" % (n.index + 1, n.index + 2)) + elif self._circular: + top.append("%d %d" % (n.index + 1, self._first + 1)) + + return top + + def _get_ribbon_output(self): + if not self.visible: + return "" + v = [n._get_ribbon_output() for n in self._nucleotides] + v.insert(0, "0. 0. 0. @ 0.3 C[red] RIB") + return " ".join(v) + + cm_pos = property(get_cm_pos, set_cm_pos) + N = property(get_length) + sequence = property(get_sequence) + + def make_circular(self, check_join_len=False): + if check_join_len: + dr = self._nucleotides[-1].distance (self._nucleotides[0], PBC=False) + if np.sqrt(np.dot (dr, dr)) > (0.7525 + 0.25): + Logger.log("Strand.make_circular(): ends of the strand seem too far apart. \ + Assuming you know what you are doing.", level=Logger.WARNING) + self._circular = True + + def make_noncircular(self): + self._circular = False + + def is_circular(self): + return self._circular + + def cut_in_two(self, copy=True): # cuts a strand into two strands in the middle + fragment_one = Strand() + fragment_two = Strand() + counter = 0 + for n in self._nucleotides: + if counter < (len(self._nucleotides) / 2): + fragment_one.add_nucleotide(n.copy() if copy else n) + else: + fragment_two.add_nucleotide(n.copy() if copy else n) + counter += 1 + return fragment_one , fragment_two + + +def parse_visibility(path): + try: + inp = open (path, 'r') + except: + Logger.log ("Visibility file `" + path + "' not found. Assuming default visibility", Logger.WARNING) + return [] + + output = [] + for linea in inp.readlines(): + linea = linea.strip().lower() + # remove everything that comes after '#' + linea = linea.split('#')[0] + if len(linea) > 0: output.append(linea) + + return output + + +class System(object): + """ + Object representing an oxDNA system + Contains strands + + Arguments: + box -- the box size of the system + Ex: box = [50, 50, 50] + + time --- Time of the system + + E_pot --- Potential energy + + E_kin --- Kinetic energy + + """ + + def __init__(self, box, time=0, E_pot=0, E_kin=0): + self._time = time + self._ready = False + self._box = np.array(box, np.float64) + self._N = 0 + self._N_strands = 0 + self._strands = [] + self._nucleotide_to_strand = [] + self._N_cells = np.array(np.floor (self._box / 3.), np.int_) + for kk in [0, 1, 2]: + if self._N_cells[kk] > 100: + self._N_cells[kk] = 100 + self._cellsides = box / self._N_cells + self._head = [False, ] * int(self._N_cells[0] * self._N_cells[1] * self._N_cells[2]) + self.E_pot = E_pot + self.E_kin = E_kin + self.E_tot = E_pot + E_kin + self.cells_done = False + + Nucleotide.index = 0 + Strand.index = 0 + + def get_sequences (self): + return [x._sequence for x in self._strands] + + _sequences = property (get_sequences) + + def get_N_Nucleotides(self): + return self._N + + def get_N_strands(self): + return self._N_strands + + def _prepare(self, visibility): + sind = 0 + nind = 0 + for sind in range(self._N_strands): + nind = self._strands[sind]._prepare(sind, nind) + + if visibility != None: self.set_visibility(visibility) + + for s in self._strands: + s.bring_in_box_nucleotides(self._box) + + def copy (self): + copy = System (self._box) + for s in self._strands: + copy.add_strand (s.copy (), check_overlap=False) + return copy + + def get_reduced(self, according_to, bbox=None, check_overlap=False): + visibility_list = self.get_visibility(according_to) + + if bbox == None or bbox == True: + bbox = self._box + elif isinstance(bbox, list) and not isinstance(bbox, np.array): + bbox = np.array(bbox) + else: + Logger.die("Cannot reduce system, bbox not correct") + + copy = System(bbox) + for i in range(self._N_strands): + if visibility_list[i]: + copy.add_strand(self._strands[i].copy(), check_overlap) + + return copy + + def join(self, other, box=None): + if box is None: + box = np.array([0., 0., 0.]) + for i in range(3): + if other._box[i] > self._box[i]: + box[i] = other._box[i] + else: + box[i] = self._box[i] + + ret = System(np.array(box, np.float64)) + for s in self._strands: + if s.visible: + ret.add_strand(s.copy(), check_overlap=False) + for s in other._strands: + if s.visible: + ret.add_strand(s.copy(), check_overlap=False) + + return ret + + def get_visibility(self, arg=None): + actions = {'vis': True, 'inv': False} + visibility_list = [True, ] * self._N_strands + + if isinstance (arg, str): + Logger.log ("Setting visibility with method 'file'", Logger.INFO) + lines = parse_visibility(arg) + + for line in lines: + # [uno, due, tre] = [p.strip() for p in line.partition("=")] + [uno, due, tre] = [p.strip() for p in partition (line, "=")] + if due != "=" or uno not in ["inv", "vis", "default"]: + Logger.log ("Lines in visibility must begin with one of inv=, vis= and default=. Skipping this line: --" + line + "--", Logger.WARNING) + continue + + if uno == 'default': + if tre not in ['inv', 'vis']: + Logger.log ("Wrong default in visibility file. Assuming visible as default", Logger.WARNING) + tre = 'vis' + if tre == 'inv': + visibility_list = [False, ] * self._N_strands + else: + # filter removes all the empty strings + arr = [a.strip() for a in [_f for _f in tre.split(',') if _f]] + for a in arr: + try: + ind = int(a) + except: + Logger.log ("Could not cast '%s' to int. Assuming 0" % a, Logger.WARNING) + ind = 0 + try: + visibility_list[ind] = actions[uno] + except: + Logger.log ("Strand %i does not exist in system, cannot assign visibility. Ignoring" % ind, Logger.WARNING) + + elif isinstance (arg, list): + Logger.log("Setting visibility with method 'list'", Logger.INFO) + # first len(arg) elements will be overwritten + visibility_list[0:len(arg)] = arg + else: + if arg is not None: + Logger.log("Argument of System.set_visibility can be a string or a list. Skipping visibility settings, assuming all strands are visible", Logger.WARNING) + return visibility_list + + return visibility_list + + def set_visibility(self, arg=None): + visibility_list = self.get_visibility(arg) + + for i in range(self._N_strands): + self._strands[i].visible = visibility_list[i] + + return + + def do_cells (self): + self._N_cells = np.array(np.floor (self._box / 3.), np.int_) + for kk in [0, 1, 2]: + if self._N_cells[kk] > 100: + self._N_cells[kk] = 100 + self._cellsides = self._box / self._N_cells + for n in self._nucleotides: + n.next = -1 + self._head = [False, ] * int(self._N_cells[0] * self._N_cells[1] * self._N_cells[2]) + for n in self._nucleotides: + cs = np.array((np.floor((n.cm_pos / self._box - np.rint(n.cm_pos / self._box) + 0.5) * (1. - FLT_EPSILON) * self._box / self._cellsides)), np.int_) + cella = cs[0] + self._N_cells[0] * cs[1] + self._N_cells[0] * self._N_cells[1] * cs[2] + n.next = self._head[cella] + self._head[cella] = n + self.cells_done = True + return + + def add_strand(self, s, check_overlap=True): + """ + Add a Strand to the System + """ + ''' + # we now make cells off-line to save time when loading + # configurations; interactions are computed with h_bonds.py + # most of the time anyways + for n in s._nucleotides: + cs = np.array((np.floor((n.cm_pos/self._box - np.rint(n.cm_pos / self._box ) + 0.5) * (1. - FLT_EPSILON) * self._box / self._cellsides)), np.int_) + cella = cs[0] + self._N_cells[0] * cs[1] + self._N_cells[0] * self._N_cells[1] * cs[2] + n.next = self._head[cella] + self._head[cella] = n + ''' + self._strands.append(s) + self._N += s.N + self._N_strands += 1 + self.cells_done = False + return True + + def add_strands(self, ss, check_overlap=True): + if isinstance(ss, tuple) or isinstance(ss, list): + added = [] + for s in ss: + if self.add_strand(s, check_overlap): + added.append(s) + if len(added) == len(ss): + return True + else: + for s in added: + Nucleotide.index -= s.N + Strand.index -= 1 + self._strands.pop() + self._N -= s.N + self._N_strands -= 1 + self._sequences.pop() + return False + + elif not self.add_strand(ss, check_overlap): + return False + + return True + + def get_unique_seq(self): + # we need only the unique sequences of the system + # see http://stackoverflow.com/questions/1143379/removing-duplicates-from-list-of-lists-in-python + unique_seq = list(dict((str(x), x) for x in self._sequences).values()) + return unique_seq + + def rotate (self, amount, origin=None): + for s in self._strands: + s.rotate (amount, origin) + + def translate (self, amount): + for s in self._strands: + s.translate (amount) + + def print_ribbon_output(self, name, same_colors=False, visibility=None, constr_size=None): + self._prepare(visibility) + if constr_size != None: + for i in range(self.N_strands / constr_size): + cind = constr_size * i + s1 = self._strands[cind] + s2 = self._strands[cind + 1] + s3 = self._strands[cind + 2] + s4 = self._strands[cind + 3] + + diff1 = np.rint(s1.cm_pos / self._box) * self._box + diff2 = np.rint(s2.cm_pos / self._box) * self._box + diff3 = np.rint(s3.cm_pos / self._box) * self._box + diff4 = np.rint(s4.cm_pos / self._box) * self._box + + s2.translate(np.rint((s1.cm_pos - s2.cm_pos - diff1 + diff2) / self._box) * self._box) + s3.translate(np.rint((s1.cm_pos - s3.cm_pos - diff1 + diff3) / self._box) * self._box) + s4.translate(np.rint((s1.cm_pos - s4.cm_pos - diff1 + diff4) / self._box) * self._box) + + unique_seq = list(self.get_unique_seq()) + + if same_colors: + n = len(unique_seq) + colors = CREPY_COLOR_TABLE + while len(colors) < n: colors *= 2 + + f = open(name, "w") + f.write(".Box:%lf,%lf,%lf\n" % tuple(self._box)) + for s in self._strands: + out = s._get_ribbon_output() + "\n" + if same_colors: + color = colors[unique_seq.index(s.sequence)] + out = out.replace("C[red]", "C[%s]" % color) + f.write(out) + f.close() + + def print_lorenzo_output(self, conf_name, top_name, visibility=None): + self._prepare(visibility) + conf = "t = %lu\nb = %f %f %f\nE = %lf %lf %lf\n" % (int(self._time), self._box[0], self._box[1], self._box[2], self.E_tot, self.E_pot, self.E_kin) + + visible_strands = 0 + visible_nucleotides = 0 + for s in self._strands: + if s.visible: + visible_strands += 1 + visible_nucleotides += s.N + + topology = "%d %d\n" % (visible_nucleotides, visible_strands) + for s in self._strands: + sc, st = s._get_lorenzo_output() + topology += st + conf += sc + + with open(conf_name, "w") as f: + f.write(conf) + f.close() + with open(top_name, "w") as f: + f.write(topology) + f.close() + + def print_oxview_output(self, name): + import json + + out = { + 'box': self._box.tolist(), + 'systems': [{'id':0, 'strands': []}] + } + + for s in self._strands: + strand = { + 'id': s.index, 'end3': s._nucleotides[-1].index, 'end5': s._nucleotides[0].index, + 'class': 'NucleicAcidStrand', 'monomers': [] + } + for i, n in enumerate(s._nucleotides): + if s._circular: + if i == 0: + n5 = s._nucleotides[-1].index + else: + n5 = s._nucleotides[i - 1].index + if i == len(s._nucleotides) - 1: + n3 = s._nucleotides[0].index + else: + n3 = s._nucleotides[i + 1].index + else: + if i == 0: + n5 = -1 + else: + n5 = s._nucleotides[i - 1].index + if i == len(s._nucleotides) - 1: + n3 = -1 + else: + n3 = s._nucleotides[i + 1].index + nucleotide = { + 'id': n.index, + 'type': n.get_base(), + 'class': 'DNA', + 'p': n.cm_pos.tolist(), + 'a1': n._a1.tolist(), + 'a3': n._a3.tolist() + } + if n3 >= 0: + nucleotide['n3'] = n3 + if n5 >= 0: + nucleotide['n5'] = n5 + if n.pair is not None: + nucleotide['bp'] = n.pair.index + if n.cluster is not None: + nucleotide['cluster'] = n.cluster + if n.color is not None: + nucleotide['color'] = n.color + strand['monomers'].append(nucleotide) + out['systems'][0]['strands'].append(strand) + + with open(name, "w") as f: + f.write(json.dumps(out)) + + N = property(get_N_Nucleotides) + N_strands = property (get_N_strands) + + def get_nucleotide_list (self): + ret = [] + for s in self._strands: + ret += s._nucleotides + return ret + + _nucleotides = property (get_nucleotide_list) + + def map_nucleotides_to_strands(self): + # this function creates nucl_id -> strand_id array + for i in range(len(self._strands)): + for _ in range(self._strands[i].get_length()): + self._nucleotide_to_strand.append(i) + + def print_dot_bracket_output(self, filename): + # assumes each nucleotide has at most 1 hydrogen bond, requires interactions already to be filled for nucleotide objects + nupack_string = "" + for n1 in range(self.get_N_Nucleotides()): + interactions = self._nucleotides[n1].interactions + if len(interactions) > 1: + Logger.log ("more than 1 HB for a nucleotide", Logger.WARNING) + if len(interactions) == 0: + nupack_string += "." + elif interactions[0] > n1: + nupack_string += "(" + elif interactions[0] < n1: + nupack_string += ")" + else: + Logger.log("unexpected interaction detected while building nupack string", Logger.CRITICAL) + + f = open(filename, "w") + f.write(nupack_string) + f.close() + diff --git a/mrdna/readers/libs/cadnano_utils.py b/mrdna/readers/libs/cadnano_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..30a1787c5f3b5fe54d20d251a5876841e63f59c2 --- /dev/null +++ b/mrdna/readers/libs/cadnano_utils.py @@ -0,0 +1,415 @@ +''' +Created on Nov 11, 2018 + +@author: lorenzo +''' + +import numpy as np +from . import base +from . import utils +import math + +BP = "bp" +DEGREES = "degrees" + + +class StrandGenerator (object): + + def generate(self, bp, sequence=None, start_pos=np.array([0, 0, 0]), direction=np.array([0, 0, 1]), perp=None, rot=0., double=True, circular=False, DELTA_LK=0, BP_PER_TURN=10.34, ds_start=None, ds_end=None, force_helicity=False): + """ + Generate a strand of DNA. + - linear, circular (circular) + - ssDNA, dsDNA (double) + - Combination of ss/dsDNA (ds_start, ds_end) + Note: Relevent argument(s) in parentheses. + + Arguments: + bp --- Integer number of bp/nt (required) + sequence --- Array of integers or string. Should be same length as bp (default None) + Default (None) generates a random sequence. + Ex: [0,1,2,3,0] + Ex: "AGCTA" + See dictionary base.base_to_number for int/char conversion {0:'A'} + start_pos --- Location to begin building the strand (default np.array([0, 0, 0])) + direction --- a3 vector, orientation of the base (default np.array([0, 0, 1])) + perp --- Sets a1 vector, the orientation of the backbone. (default False) + Must be perpendicular to direction (as a1 must be perpendicular to a3) + If perp is None or False, perp is set to a random orthogonal angle + rot --- Rotation of first bp (default 0.) + double --- Generate dsDNA (default True) + circular --- Generate closed circular DNA (defalt False) + Limitations... + For ssDNA (double=False): bp >= 4 + For dsDNA (double=True) : bp >= 30 + Will throw warnings. Allowed, but use at your own risk. + DELTA_LK --- Integer change in linking number from Lk0 (default 0) + Only valid if circular==True + BP_PER_TURN --- Base pairs per complete 2*pi helix turn. (default 10.34) + Only valid if circular==True + ds_start --- Index (from 0) to begin double stranded region (default None) + ds_end --- Index (from 0) to end double stranded region (default None) + Default is None, which is entirely dsDNA; sets ds_start = 0, ds_end=bp + Ex: ds_start=0, ds_end=10 will create a double stranded region on bases + range(0,10): [0,1,2,3,4,5,6,7,8,9] + Note: To generate a nicked circular dsDNA, manually change state with + {Strand}.make_noncircular() + force_helicity --- Force generation of helical strands. Use helicity by default + for bp > 30. Warns from 18 to 29. Will crash oxDNA below 18. (default False) + + Note: Minimuim circular duplex is 18. Shorter circular strands disobey FENE. + For shorter strands, circular ssDNA is generated in a circle instead of having + imposed helicity. + + Examples: + Generate ssDNA: + generate(bp=4,sequence=[0,1,2,3],double=False,circular=False) + Generate circular dsDNA with +2 Linking number: + generate(bp=45,double=True,circular=True,DELTA_LK=2) + Generate a circular ssDNA (45nt) with ssDNA (25nt) annealed to indices 0 to 24: + generate(bp=45,double=True,circular=True,ds_start=0,ds_end=25) + """ + # we need numpy array for these + start_pos = np.array(start_pos, dtype=float) + direction = np.array(direction, dtype=float) + if isinstance(sequence, list): + sequence = np.array(sequence) + + # Loads of input checking... + if isinstance(sequence, str): + try: + sequence = [base.base_to_number[x] for x in sequence] + except KeyError: + base.Logger.die("Key Error: sequence is invalid") + if sequence == None: + sequence = np.random.randint(0, 4, bp) + elif len(sequence) != bp: + n = bp - len(sequence) + sequence = np.append(sequence, np.random.randint(0, 4, n)) + base.Logger.log("sequence is too short, adding %d random bases" % n, base.Logger.WARNING) + + if circular == True and bp < 30: + # 30 is about the cut off for circular dsDNA. Anything shorter will probably clash. + # oxDNA can relax down to 18. + # 4 is about the cut off for circular ssDNA. Use dsDNA cutoff for saftey. + base.Logger.log("sequence is too short! Proceed at your own risk", base.Logger.WARNING) + + option_use_helicity = True + if circular == True and bp < 30 and double == False: + base.Logger.log("sequence is too short! Generating ssDNA without imposed helicity", base.Logger.WARNING) + # Do not impose helcity to generate shorter circular ssDNA + if not force_helicity: + option_use_helicity = False + + if ds_start == None: + ds_start = 0 + if ds_end == None: + ds_end = bp + if ds_start > ds_end: + base.Logger.die("ds_end > ds_start") + if ds_end > bp: + base.Logger.die("ds_end > bp") + + # we need to find a vector orthogonal to direction + dir_norm = np.sqrt(np.dot(direction, direction)) + if dir_norm < 1e-10: + base.Logger.log("direction must be a valid vector, defaulting to (0, 0, 1)", base.Logger.WARNING) + direction = np.array([0, 0, 1]) + else: + direction /= dir_norm + + if perp is None or perp is False: + v1 = np.random.random_sample(3) + v1 -= direction * (np.dot(direction, v1)) + v1 /= np.sqrt(sum(v1 * v1)) + else: + v1 = perp; + + # Setup initial parameters + ns1 = base.Strand() + # and we need to generate a rotational matrix + R0 = utils.get_rotation_matrix(direction, rot) + # R = get_rotation_matrix(direction, np.deg2rad(35.9)) + R = utils.get_rotation_matrix(direction, [1, BP]) + a1 = v1 + a1 = np.dot (R0, a1) + rb = np.array(start_pos) + a3 = direction + + # Circular strands require a continuious deformation of the ideal helical pitch + if circular == True: + # Unit vector orthogonal to plane of torus + # Note: Plane of torus defined by v1,direction + torus_perp = np.cross(v1, direction) + # Angle between base pairs along torus + angle = 2. * np.pi / float(bp) + # Radius of torus + radius = base.FENE_R0_OXDNA / math.sqrt(2. * (1. - math.cos(angle))); + + if circular == True and option_use_helicity: + # Draw backbone in a helical spiral around a torus + # Draw bases pointing to center of torus + for i in range(bp): + # Torus plane defined by direction and v1 + v_torus = v1 * base.BASE_BASE * math.cos(i * angle) + \ + direction * base.BASE_BASE * math.sin(i * angle) + rb += v_torus + + # a3 is tangent to the torus + a3 = v_torus / np.linalg.norm(v_torus) + R = utils.get_rotation_matrix(a3, [i * (round(bp // BP_PER_TURN) + DELTA_LK) / float(bp) * 360, DEGREES]) + + # a1 is orthogonal to a3 and the torus normal + a1 = np.cross (a3, torus_perp) + + # Apply the rotation matrix + a1 = np.dot(R, a1) + ns1.add_nucleotide(base.Nucleotide(rb - base.CM_CENTER_DS * a1, a1, a3, sequence[i])) + ns1.make_circular(check_join_len=True) + elif circular == True and not option_use_helicity: + for i in range(bp): + rbx = math.cos (i * angle) * radius + 0.34 * math.cos(i * angle) + rby = math.sin (i * angle) * radius + 0.34 * math.sin(i * angle) + rbz = 0. + rb = np.array([rbx, rby, rbz]) + a1x = math.cos (i * angle) + a1y = math.sin (i * angle) + a1z = 0. + a1 = np.array([a1x, a1y, a1z]) + ns1.add_nucleotide(base.Nucleotide(rb, a1, np.array([0, 0, 1]), sequence[i])) + ns1.make_circular(check_join_len=True) + else: + # Add nt in canonical double helix + for i in range(bp): + ns1.add_nucleotide(base.Nucleotide(rb - base.CM_CENTER_DS * a1, a1, a3, sequence[i])) + if i != bp - 1: + a1 = np.dot(R, a1) + rb += a3 * base.BASE_BASE + + # Fill in complement strand + if double == True: + ns2 = base.Strand() + for i in reversed(list(range(ds_start, ds_end))): + # Note that the complement strand is built in reverse order + nt = ns1._nucleotides[i] + a1 = -nt._a1 + a3 = -nt._a3 + nt2_cm_pos = -(base.FENE_EPS + 2 * base.POS_BACK) * a1 + nt.cm_pos + ns2.add_nucleotide(base.Nucleotide(nt2_cm_pos, a1, a3, 3 - sequence[i])) + if ds_start == 0 and ds_end == bp and circular == True: + ns2.make_circular(check_join_len=True) + return ns1, ns2 + else: + return ns1 + + def generate_or_sq(self, bp, sequence=None, start_pos=np.array([0., 0., 0.]), direction=np.array([0., 0., 1.]), perp=None, double=True, rot=0., angle=np.pi / 180 * 33.75, length_change=0, region_begin=0, region_end=0): + if length_change and len(region_begin) != len(region_end): + if (len(region_end) + 1) == len(region_begin): + base.Logger.log("the lengths of begin (%d) and end (%d) arrays are mismatched; I will try to proceed by using the number of basepairs as the last element of the end array" % (len(region_begin), len(region_end)), base.Logger.WARNING) + region_end.append(bp + 1) + else: + base.Logger.die("the lengths of begin (%d) and end (%d) arrays are unrecoverably mismatched" % (len(region_begin), len(region_end))) + + # we need numpy array for these + start_pos = np.array(start_pos, dtype=float) + direction = np.array(direction, dtype=float) + if sequence == None: + sequence = np.random.randint(0, 4, bp) + elif len(sequence) != bp: + n = bp - len(sequence) + sequence += np.random.randint(0, 4, n) + base.Logger.log("sequence is too short, adding %d random bases" % n, base.Logger.WARNING) + # angle should be an array, with a length 1 less than the # of base pairs + if not isinstance(angle, np.ndarray): + angle = np.ones(bp) * angle + elif len(angle) != bp - 1: + base.Logger.log("generate_or_sq: incorrect angle array length, should be 1 less than number of base pairs", base.Logger.CRITICAL) + # create the sequence of the second strand as made of complementary bases + sequence2 = [3 - s for s in sequence] + sequence2.reverse() + + # we need to find a vector orthogonal to direction + dir_norm = np.sqrt(np.dot(direction, direction)) + if dir_norm < 1e-10: + base.Logger.log("direction must be a valid vector, defaulting to (0, 0, 1)", base.Logger.WARNING) + direction = np.array([0, 0, 1]) + else: + direction /= dir_norm + + if perp is None: + v1 = np.random.random_sample(3) + v1 -= direction * (np.dot(direction, v1)) + v1 /= np.sqrt(sum(v1 * v1)) + else: + v1 = perp; + + # and we need to generate a rotational matrix + R0 = utils.get_rotation_matrix(direction, rot) + + ns1 = base.Strand() + a1 = v1 + a1 = np.dot (R0, a1) + rb = np.array(start_pos) + a3 = direction + Rs = [] + for i in range(bp): + ns1.add_nucleotide(base.Nucleotide(rb - base.CM_CENTER_DS * a1, a1, a3, sequence[i])) + if i != bp - 1: + R = utils.get_rotation_matrix(direction, angle[i]) + Rs.append(R) + a1 = np.dot(R, a1) + rb += a3 * base.BASE_BASE + if length_change: + for j in range(len(length_change)): + if i >= region_begin[j] and i < region_end[j]: + if length_change[j]: + rb += a3 * base.BASE_BASE * (-(float(length_change[j]) / (region_end[j] - region_begin[j]))) + + if double == True: + a1 = -a1 + a3 = -direction + ns2 = base.Strand() + + for i in range(bp): + # create new nucleotide and save basepair info on both sides + paired_nuc = ns1._nucleotides[bp-i-1] + new_nuc = base.Nucleotide(rb - base.CM_CENTER_DS * a1, a1, a3, sequence2[i], pair=paired_nuc) + paired_nuc.pair = new_nuc + ns2.add_nucleotide(new_nuc) + if i != bp - 1: + # we loop over the rotation matrices in the reverse order, and use the transpose of each matrix + a1 = np.dot(Rs.pop().transpose(), a1) + rb += a3 * base.BASE_BASE + if length_change: + for j in range(len(length_change)): + if bp - 2 - i >= region_begin[j] and bp - 2 - i < region_end[j]: + if length_change[j]: + rb += a3 * base.BASE_BASE * (-(float(length_change[j]) / (region_end[j] - region_begin[j]))) + + return ns1, ns2 + else: return ns1 + + def generate_double_offset(self, seqA, seqB, offset, start_pos=np.array([0, 0, 0]), direction=np.array([0, 0, 1]), perp=None, rot=0): + if isinstance (seqA, str): + seqa = [base.base_to_number[x] for x in seqA] + else: + seqa = seqA + if isinstance (seqB, str): + seqb = [base.base_to_number[x] for x in seqB] + else: + seqb = seqB + + bp = max (len(seqa), len(seqb) + offset) + + s1, s2 = self.generate(bp, None, start_pos, direction, False, True, 0.) + + s1 = s1.get_slice (0, len(seqa)) + + if len(seqb) + offset > len(seqa): + s2 = s2.get_slice (0, len(seqb)) # starts from opposite end + else: + s2 = s2.get_slice (bp - offset - len(seqb), len(seqb)) + + s1.set_sequence (seqa) + s2.set_sequence (seqb) + + return s1, s2 + + def generate_rw (self, sequence, start_pos=np.array([0., 0., 0.])): + """ + Generate ssDNA as a random walk (high-energy configurations are possible): + generate(bp=45,double=False,circular=False,random_walk=True) + """ + # random walk generator + base.Logger.log("Generating strand as a random walk. Remember to equilibrate the configuration with MC", base.Logger.WARNING) + d = np.array ([0.7525, 0., 0.]) + pos = start_pos + rw = [] + rw.append(pos) + for i, _ in enumerate(sequence[1:]): + overlap = True + while overlap: + overlap = False + R = utils.get_random_rotation_matrix() + dd = np.dot (R, d) + trypos = pos + np.dot (R, d); + overlap = False + for r in rw: + dd = trypos - r + if np.dot (dd, dd) < 0.40 * 0.40: + overlap = True + pos = trypos + rw.append (pos) + + # we get the a1 vectors in a smart way + a1s = [] + d = rw[1] - rw[0] + a1s.append (d / np.sqrt (np.dot(d, d))) + + for i in range (1, len(rw) - 1): + d = (rw[i + 1] + rw[i - 1]) * 0.5 + d = rw[i] - d + a1s.append (d / np.sqrt(np.dot (d, d))) + + d = rw[len(rw) - 1] - rw[len(rw) - 2] + a1s.append (d / np.sqrt (np.dot(d, d))) + + s = base.Strand() + for i, r in enumerate(rw): + a1, _, a3 = utils.get_orthonormalized_base (a1s[i], utils.get_random_vector(), utils.get_random_vector()) + # we use abs since POS_BACK is negative + cm = r + a1s[i] * abs(base.POS_BACK) + s.add_nucleotide (base.Nucleotide (cm, a1, a3, sequence[i])) + + return s + +class vhelix_vbase_to_nucleotide(object): + # at the moment squares with skips in have entries in the dicts but with the nucleotide list empty (rather than having no entry) - I'm not sure whether or not this is desirable. It's probably ok + def __init__(self): + self._scaf = {} + self._stap = {} + self.nuc_count = 0 # record the nucleotide count, updated only after a whole strand is added + self.strand_count = 0 + + def add_scaf(self, vh, vb, strand, nuc): + self._scaf[(vh, vb)] = (strand, nuc) + + def add_stap(self, vh, vb, strand, nuc): + self._stap[(vh, vb)] = (strand, nuc) + + # these methods use a reference vhvb2n object to make the final vhvb2n object + def add_scaf_strand(self, add_strand, reference, continue_join = False): + count = 0 + size = len(self._scaf) + for (vh, vb), [strand_ind, nuc] in reference._scaf.items(): + if strand_ind == add_strand: + self.add_scaf(vh, vb, self.strand_count, [x + self.nuc_count for x in nuc]) + count += len(nuc) + self.nuc_count += count + if len(self._scaf) == size: + return 1 + else: + if continue_join == False: + self.strand_count += 1 + return 0 + + def add_stap_strand(self, add_strand, reference, continue_join = False): + count = 0 + size = len(self._stap) + for (vh, vb), [strand_ind, nuc] in reference._stap.items(): + if strand_ind == add_strand: + self.add_stap(vh, vb, self.strand_count, [x + self.nuc_count for x in nuc]) + count += len(nuc) + self.nuc_count += count + if len(self._stap) == size: + return 1 + else: + if continue_join == False: + self.strand_count += 1 + return 0 + + def add_strand(self, add_strand, reference, continue_join = False): + if self.add_scaf_strand(add_strand, reference, continue_join) and self.add_stap_strand(add_strand, reference, continue_join): + return 1 + else: + return 0 + diff --git a/mrdna/readers/libs/constants.py b/mrdna/readers/libs/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..b6470d17e6d69b201b86d879d4520d8390d02671 --- /dev/null +++ b/mrdna/readers/libs/constants.py @@ -0,0 +1,3 @@ +mass_in_lammps = 3.1575 +inertia_in_lammps = 0.435179 +number_oxdna_to_lammps = {0 : 0, 1 : 2, 2 : 1, 3 : 3} diff --git a/mrdna/readers/libs/pdb.py b/mrdna/readers/libs/pdb.py new file mode 100644 index 0000000000000000000000000000000000000000..842fd408c5c42627424ce9cdcadc6cb1204ef114 --- /dev/null +++ b/mrdna/readers/libs/pdb.py @@ -0,0 +1,231 @@ +import numpy as np +import itertools +from math import sqrt +import sys +import copy + +BASE_SHIFT = 1.13 +COM_SHIFT = 0.5 +FROM_OXDNA_TO_ANGSTROM = 8.518 +FROM_ANGSTROM_TO_OXDNA = 1. / FROM_OXDNA_TO_ANGSTROM + +NAME_TO_BASE = { + "ADE" : "A", + "CYT" : "C", + "GUA" : "G", + "THY" : "T", + "URA" : "U", + } + +BASES = ["A", "T", "G", "C", "U"] + +class Nucleotide(object): + RNA_warning_printed = False + + def __init__(self, name, idx): + object.__init__(self) + self.name = name.strip() + if self.name in list(NAME_TO_BASE.keys()): + self.base = NAME_TO_BASE[self.name] + elif self.name in BASES: + if self.name == "U" and not Nucleotide.RNA_warning_printed: + print("WARNING: unsupported uracil detected: use at your own risk", file=sys.stderr) + Nucleotide.RNA_warning_printed = True + + self.base = self.name + else: + self.base = name[1:] + self.idx = idx + self.base_atoms = [] + self.phosphate_atoms = [] + self.sugar_atoms = [] + self.named_atoms = {} + self.ring_names = ["C2", "C4", "C5", "C6", "N1", "N3"] + self.chain_id = None + + def get_atoms(self): + return self.base_atoms + self.phosphate_atoms + self.sugar_atoms + + def add_atom(self, a): + if 'P' in a.name or a.name == "HO5'": + self.phosphate_atoms.append(a) + elif "'" in a.name: + self.sugar_atoms.append(a) + else: + self.base_atoms.append(a) + + self.named_atoms[a.name] = a + if self.chain_id == None: + self.chain_id = a.chain_id + + def get_com(self, atoms=None): + if atoms == None: + atoms = self.atoms + com = np.array([0., 0., 0.]) + for a in atoms: + com += a.pos + + return com / len(atoms) + + def compute_a3(self): + base_com = self.get_com(self.base_atoms) + # the O4' oxygen is always (at least for non pathological configurations, as far as I know) oriented 3' -> 5' with respect to the base's centre of mass + parallel_to = self.named_atoms["O4'"].pos - base_com + self.a3 = np.array([0., 0., 0.]) + + for perm in itertools.permutations(self.ring_names, 3): + p = self.named_atoms[perm[0]] + q = self.named_atoms[perm[1]] + r = self.named_atoms[perm[2]] + v1 = p.pos - q.pos + v2 = p.pos - r.pos + v1 /= sqrt(np.dot(v1, v1)) + v2 /= sqrt(np.dot(v2, v2)) + if abs(np.dot(v1, v2)) > 0.01 or 1: + a3 = np.cross(v1, v2) + a3 /= sqrt(np.dot(a3, a3)) + if np.dot(a3, parallel_to) < 0.: + a3 = -a3 + self.a3 += a3 + + self.a3 /= sqrt(np.dot(self.a3, self.a3)) + + def compute_a1(self): + if "C" in self.name or "T" in self.name or "U" in self.name: + pairs = [ ["N3", "C6"], ["C2", "N1"], ["C4", "C5"] ] + else: + pairs = [ ["N1", "C4"], ["C2", "N3"], ["C6", "C5"] ] + + self.a1 = np.array([0., 0., 0.]) + for pair in pairs: + p = self.named_atoms[pair[0]] + q = self.named_atoms[pair[1]] + diff = p.pos - q.pos + self.a1 += diff + + self.a1 /= sqrt(np.dot(self.a1, self.a1)) + + def compute_as(self): + self.compute_a1() + self.compute_a3() + self.a2 = np.cross(self.a3, self.a1) + self.check = abs(np.dot(self.a1, self.a3)) + + def correct_for_large_boxes(self, box): + for atom in self.atoms: + atom.shift(-np.rint(atom.pos / box ) * box) + + def to_pdb(self, chain_identifier, print_H, residue_serial, residue_suffix, residue_type, bfactor): + res = [] + for a in self.atoms: + if not print_H and 'H' in a.name: + continue + if residue_type == "5": + if 'P' in a.name: + if a.name == 'P': + phosphorus = a + continue + elif a.name == "O5'": + O5prime = a + elif residue_type == "3": + if a.name == "O3'": + O3prime = a + res.append(a.to_pdb(chain_identifier, residue_serial, residue_suffix, bfactor)) + + # if the residue is a 3' or 5' end, it requires one more hydrogen linked to the O3' or O5', respectively + if residue_type == "5": + new_hydrogen = copy.deepcopy(phosphorus) + new_hydrogen.name = "HO5'" + + # we put the new hydrogen at a distance 1 Angstrom from the O5' oxygen along the direction that, in a regular nucleotide, connects O5' and P + dist_P_O = phosphorus.pos - O5prime.pos + dist_P_O *= 1. / np.sqrt(np.dot(dist_P_O, dist_P_O)) + new_hydrogen.pos = O5prime.pos + dist_P_O + res.append(new_hydrogen.to_pdb(chain_identifier, residue_serial, residue_suffix, bfactor)) + elif residue_type == "3": + new_hydrogen = copy.deepcopy(O3prime) + new_hydrogen.name = "HO3'" + + # we put the new hydrogen at a distance 1 Angstrom from the O3' oxygen along a direction which is a linear combination of the three + # orientations that approximately reproduce the crystallographic position + new_distance = 0.2 * self.a2 - 0.2 * self.a1 - self.a3 + new_distance *= 1. / np.sqrt(np.dot(new_distance, new_distance)) + new_hydrogen.pos = O3prime.pos + new_distance + res.append(new_hydrogen.to_pdb(chain_identifier, residue_serial, residue_suffix, bfactor)) + + return "\n".join(res) + + def to_mgl(self): + res = [] + for a in self.atoms: + res.append(a.to_mgl()) + + return "\n".join(res) + + def rotate(self, R): + com = self.get_com() + for a in self.atoms: + a.pos = np.dot(R, a.pos - com) + com + + self.compute_as() + + def set_com(self, new_com): + com = self.get_com() + for a in self.atoms: + a.pos += new_com - com - COM_SHIFT * self.a1 + + def set_sugar(self, new_base_back_com): + sugar_com = self.get_com(self.sugar_atoms) + for a in self.atoms: + a.pos += new_base_back_com - sugar_com + + self.compute_as() + + def set_base(self, new_base_com): + atoms = [v for k, v in self.named_atoms.items() if k in self.ring_names] + ring_com = self.get_com(atoms) + for a in self.atoms: + a.pos += new_base_com - ring_com - BASE_SHIFT * self.a1 + + self.compute_as() + + atoms = property(get_atoms) + + +class Atom(object): + serial_atom = 1 + + def __init__(self, pdb_line): + object.__init__(self) + # http://cupnet.net/pdb-format/ + self.name = pdb_line[12:16].strip() + # some PDB files have * in place of ' + if "*" in self.name: + self.name = self.name.replace("*", "'") + + self.alternate = pdb_line[16] + self.residue = pdb_line[17:20].strip() + self.chain_id = pdb_line[21:22].strip() + self.residue_idx = int(pdb_line[22:26]) + self.pos = np.array([float(pdb_line[30:38]), float(pdb_line[38:46]), float(pdb_line[46:54])]) + + def is_hydrogen(self): + return "H" in self.name + + def shift(self, diff): + self.pos += diff + + def to_pdb(self, chain_identifier, residue_serial, residue_suffix, bfactor): + residue = self.residue + residue_suffix + res = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f} {:>2s}{:2s}".format("ATOM", Atom.serial_atom, self.name, " ", residue, chain_identifier, residue_serial, " ", self.pos[0], self.pos[1], self.pos[2], 1.00, bfactor, " ", " ", " ") + Atom.serial_atom += 1 + if Atom.serial_atom > 99999: + Atom.serial_atom = 1 + return res + + def to_mgl(self): + colors = {"C" : "0,1,1", "P" : "1,1,0", "O" : "1,0,0", "H" : "0.5,0.5,0.5", "N" : "0,0,1"} + for c in colors: + if c in self.name: color = colors[c] + r = 0.5 + return "%s %s %s @ %f C[%s]" % (self.pos[0], self.pos[1], self.pos[2], r, color) diff --git a/mrdna/readers/libs/pyquaternion/LICENSE.txt b/mrdna/readers/libs/pyquaternion/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..a5da57dea2d702dba61870792795d900509e2276 --- /dev/null +++ b/mrdna/readers/libs/pyquaternion/LICENSE.txt @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Kieran Wynn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/mrdna/readers/libs/pyquaternion/__init__.py b/mrdna/readers/libs/pyquaternion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5049043149066621c61ad8fef0498a365cf1f0df --- /dev/null +++ b/mrdna/readers/libs/pyquaternion/__init__.py @@ -0,0 +1 @@ +from .quaternion import Quaternion diff --git a/mrdna/readers/libs/pyquaternion/quaternion.py b/mrdna/readers/libs/pyquaternion/quaternion.py new file mode 100644 index 0000000000000000000000000000000000000000..b386a99326852ad28ddb1a1b20c98c78705d949c --- /dev/null +++ b/mrdna/readers/libs/pyquaternion/quaternion.py @@ -0,0 +1,1183 @@ +""" +This file is part of the pyquaternion python module + +Author: Kieran Wynn +Website: https://github.com/KieranWynn/pyquaternion +Documentation: http://kieranwynn.github.io/pyquaternion/ + +Version: 1.0.0 +License: The MIT License (MIT) + +Copyright (c) 2015 Kieran Wynn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +quaternion.py - This file defines the core Quaternion class + +""" + +from __future__ import absolute_import, division, print_function # Add compatibility for Python 2.7+ + +from math import sqrt, pi, sin, cos, asin, acos, atan2, exp, log +from copy import deepcopy +import numpy as np # Numpy is required for many vector operations + + +class Quaternion: + """Class to represent a 4-dimensional complex number or quaternion. + + Quaternion objects can be used generically as 4D numbers, + or as unit quaternions to represent rotations in 3D space. + + Attributes: + q: Quaternion 4-vector represented as a Numpy array + + """ + + def __init__(self, *args, **kwargs): + """Initialise a new Quaternion object. + + See Object Initialisation docs for complete behaviour: + + http://kieranwynn.github.io/pyquaternion/initialisation/ + + """ + s = len(args) + if s == 0: + # No positional arguments supplied + if kwargs: + # Keyword arguments provided + if ("scalar" in kwargs) or ("vector" in kwargs): + scalar = kwargs.get("scalar", 0.0) + if scalar is None: + scalar = 0.0 + else: + scalar = float(scalar) + + vector = kwargs.get("vector", []) + vector = self._validate_number_sequence(vector, 3) + + self.q = np.hstack((scalar, vector)) + elif ("real" in kwargs) or ("imaginary" in kwargs): + real = kwargs.get("real", 0.0) + if real is None: + real = 0.0 + else: + real = float(real) + + imaginary = kwargs.get("imaginary", []) + imaginary = self._validate_number_sequence(imaginary, 3) + + self.q = np.hstack((real, imaginary)) + elif ("axis" in kwargs) or ("radians" in kwargs) or ("degrees" in kwargs) or ("angle" in kwargs): + try: + axis = self._validate_number_sequence(kwargs["axis"], 3) + except KeyError: + raise ValueError( + "A valid rotation 'axis' parameter must be provided to describe a meaningful rotation." + ) + angle = kwargs.get('radians') or self.to_radians(kwargs.get('degrees')) or kwargs.get('angle') or 0.0 + self.q = Quaternion._from_axis_angle(axis, angle).q + elif "array" in kwargs: + self.q = self._validate_number_sequence(kwargs["array"], 4) + elif "matrix" in kwargs: + optional_args = {key: kwargs[key] for key in kwargs if key in ['rtol', 'atol']} + self.q = Quaternion._from_matrix(kwargs["matrix"], **optional_args).q + else: + keys = sorted(kwargs.keys()) + elements = [kwargs[kw] for kw in keys] + if len(elements) == 1: + r = float(elements[0]) + self.q = np.array([r, 0.0, 0.0, 0.0]) + else: + self.q = self._validate_number_sequence(elements, 4) + + else: + # Default initialisation + self.q = np.array([1.0, 0.0, 0.0, 0.0]) + elif s == 1: + # Single positional argument supplied + if isinstance(args[0], Quaternion): + self.q = args[0].q + return + if args[0] is None: + raise TypeError("Object cannot be initialised from {}".format(type(args[0]))) + try: + r = float(args[0]) + self.q = np.array([r, 0.0, 0.0, 0.0]) + return + except TypeError: + pass # If the single argument is not scalar, it should be a sequence + + self.q = self._validate_number_sequence(args[0], 4) + return + + else: + # More than one positional argument supplied + self.q = self._validate_number_sequence(args, 4) + + def __hash__(self): + return hash(tuple(self.q)) + + def _validate_number_sequence(self, seq, n): + """Validate a sequence to be of a certain length and ensure it's a numpy array of floats. + + Raises: + ValueError: Invalid length or non-numeric value + """ + if seq is None: + return np.zeros(n) + if len(seq) == n: + try: + l = [float(e) for e in seq] + except ValueError: + raise ValueError("One or more elements in sequence <{!r}> cannot be interpreted as a real number".format(seq)) + else: + return np.asarray(l) + elif len(seq) == 0: + return np.zeros(n) + else: + raise ValueError("Unexpected number of elements in sequence. Got: {}, Expected: {}.".format(len(seq), n)) + + # Initialise from matrix + @classmethod + def _from_matrix(cls, matrix, rtol=1e-05, atol=1e-08): + """Initialise from matrix representation + + Create a Quaternion by specifying the 3x3 rotation or 4x4 transformation matrix + (as a numpy array) from which the quaternion's rotation should be created. + + """ + try: + shape = matrix.shape + except AttributeError: + raise TypeError("Invalid matrix type: Input must be a 3x3 or 4x4 numpy array or matrix") + + if shape == (3, 3): + R = matrix + elif shape == (4, 4): + R = matrix[:-1][:,:-1] # Upper left 3x3 sub-matrix + else: + raise ValueError("Invalid matrix shape: Input must be a 3x3 or 4x4 numpy array or matrix") + + # Check matrix properties + if not np.allclose(np.dot(R, R.conj().transpose()), np.eye(3), rtol=rtol, atol=atol): + raise ValueError("Matrix must be orthogonal, i.e. its transpose should be its inverse") + if not np.isclose(np.linalg.det(R), 1.0, rtol=rtol, atol=atol): + raise ValueError("Matrix must be special orthogonal i.e. its determinant must be +1.0") + + def decomposition_method(matrix): + """ Method supposedly able to deal with non-orthogonal matrices - NON-FUNCTIONAL! + Based on this method: http://arc.aiaa.org/doi/abs/10.2514/2.4654 + """ + x, y, z = 0, 1, 2 # indices + K = np.array([ + [R[x, x]-R[y, y]-R[z, z], R[y, x]+R[x, y], R[z, x]+R[x, z], R[y, z]-R[z, y]], + [R[y, x]+R[x, y], R[y, y]-R[x, x]-R[z, z], R[z, y]+R[y, z], R[z, x]-R[x, z]], + [R[z, x]+R[x, z], R[z, y]+R[y, z], R[z, z]-R[x, x]-R[y, y], R[x, y]-R[y, x]], + [R[y, z]-R[z, y], R[z, x]-R[x, z], R[x, y]-R[y, x], R[x, x]+R[y, y]+R[z, z]] + ]) + K = K / 3.0 + + e_vals, e_vecs = np.linalg.eig(K) + print('Eigenvalues:', e_vals) + print('Eigenvectors:', e_vecs) + max_index = np.argmax(e_vals) + principal_component = e_vecs[max_index] + return principal_component + + def trace_method(matrix): + """ + This code uses a modification of the algorithm described in: + https://d3cw3dd2w32x2b.cloudfront.net/wp-content/uploads/2015/01/matrix-to-quat.pdf + which is itself based on the method described here: + http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/ + + Altered to work with the column vector convention instead of row vectors + """ + m = matrix.conj().transpose() # This method assumes row-vector and postmultiplication of that vector + if m[2, 2] < 0: + if m[0, 0] > m[1, 1]: + t = 1 + m[0, 0] - m[1, 1] - m[2, 2] + q = [m[1, 2]-m[2, 1], t, m[0, 1]+m[1, 0], m[2, 0]+m[0, 2]] + else: + t = 1 - m[0, 0] + m[1, 1] - m[2, 2] + q = [m[2, 0]-m[0, 2], m[0, 1]+m[1, 0], t, m[1, 2]+m[2, 1]] + else: + if m[0, 0] < -m[1, 1]: + t = 1 - m[0, 0] - m[1, 1] + m[2, 2] + q = [m[0, 1]-m[1, 0], m[2, 0]+m[0, 2], m[1, 2]+m[2, 1], t] + else: + t = 1 + m[0, 0] + m[1, 1] + m[2, 2] + q = [t, m[1, 2]-m[2, 1], m[2, 0]-m[0, 2], m[0, 1]-m[1, 0]] + + q = np.array(q) + q *= 0.5 / sqrt(t) + return q + + return cls(array=trace_method(R)) + + # Initialise from axis-angle + @classmethod + def _from_axis_angle(cls, axis, angle): + """Initialise from axis and angle representation + + Create a Quaternion by specifying the 3-vector rotation axis and rotation + angle (in radians) from which the quaternion's rotation should be created. + + Params: + axis: a valid numpy 3-vector + angle: a real valued angle in radians + """ + mag_sq = np.dot(axis, axis) + if mag_sq == 0.0: + raise ZeroDivisionError("Provided rotation axis has no length") + # Ensure axis is in unit vector form + if (abs(1.0 - mag_sq) > 1e-12): + axis = axis / sqrt(mag_sq) + theta = angle / 2.0 + r = cos(theta) + i = axis * sin(theta) + + return cls(r, i[0], i[1], i[2]) + + @classmethod + def random(cls): + """Generate a random unit quaternion. + + Uniformly distributed across the rotation space + As per: http://planning.cs.uiuc.edu/node198.html + """ + r1, r2, r3 = np.random.random(3) + + q1 = sqrt(1.0 - r1) * (sin(2 * pi * r2)) + q2 = sqrt(1.0 - r1) * (cos(2 * pi * r2)) + q3 = sqrt(r1) * (sin(2 * pi * r3)) + q4 = sqrt(r1) * (cos(2 * pi * r3)) + + return cls(q1, q2, q3, q4) + + # Representation + def __str__(self): + """An informal, nicely printable string representation of the Quaternion object. + """ + return "{:.3f} {:+.3f}i {:+.3f}j {:+.3f}k".format(self.q[0], self.q[1], self.q[2], self.q[3]) + + def __repr__(self): + """The 'official' string representation of the Quaternion object. + + This is a string representation of a valid Python expression that could be used + to recreate an object with the same value (given an appropriate environment) + """ + return "Quaternion({!r}, {!r}, {!r}, {!r})".format(self.q[0], self.q[1], self.q[2], self.q[3]) + + def __format__(self, formatstr): + """Inserts a customisable, nicely printable string representation of the Quaternion object + + The syntax for `format_spec` mirrors that of the built in format specifiers for floating point types. + Check out the official Python [format specification mini-language](https://docs.python.org/3.4/library/string.html#formatspec) for details. + """ + if formatstr.strip() == '': # Defualt behaviour mirrors self.__str__() + formatstr = '+.3f' + + string = \ + "{:" + formatstr +"} " + \ + "{:" + formatstr +"}i " + \ + "{:" + formatstr +"}j " + \ + "{:" + formatstr +"}k" + return string.format(self.q[0], self.q[1], self.q[2], self.q[3]) + + # Type Conversion + def __int__(self): + """Implements type conversion to int. + + Truncates the Quaternion object by only considering the real + component and rounding to the next integer value towards zero. + Note: to round to the closest integer, use int(round(float(q))) + """ + return int(self.q[0]) + + def __float__(self): + """Implements type conversion to float. + + Truncates the Quaternion object by only considering the real + component. + """ + return float(self.q[0]) + + def __complex__(self): + """Implements type conversion to complex. + + Truncates the Quaternion object by only considering the real + component and the first imaginary component. + This is equivalent to a projection from the 4-dimensional hypersphere + to the 2-dimensional complex plane. + """ + return complex(self.q[0], self.q[1]) + + def __bool__(self): + return not (self == Quaternion(0.0)) + + def __nonzero__(self): + return not (self == Quaternion(0.0)) + + def __invert__(self): + return (self == Quaternion(0.0)) + + # Comparison + def __eq__(self, other): + """Returns true if the following is true for each element: + `absolute(a - b) <= (atol + rtol * absolute(b))` + """ + if isinstance(other, Quaternion): + r_tol = 1.0e-13 + a_tol = 1.0e-14 + try: + isEqual = np.allclose(self.q, other.q, rtol=r_tol, atol=a_tol) + except AttributeError: + raise AttributeError("Error in internal quaternion representation means it cannot be compared like a numpy array.") + return isEqual + return self.__eq__(self.__class__(other)) + + # Negation + def __neg__(self): + return self.__class__(array= -self.q) + + # Absolute value + def __abs__(self): + return self.norm + + # Addition + def __add__(self, other): + if isinstance(other, Quaternion): + return self.__class__(array=self.q + other.q) + return self + self.__class__(other) + + def __iadd__(self, other): + return self + other + + def __radd__(self, other): + return self + other + + # Subtraction + def __sub__(self, other): + return self + (-other) + + def __isub__(self, other): + return self + (-other) + + def __rsub__(self, other): + return -(self - other) + + # Multiplication + def __mul__(self, other): + if isinstance(other, Quaternion): + return self.__class__(array=np.dot(self._q_matrix(), other.q)) + return self * self.__class__(other) + + def __imul__(self, other): + return self * other + + def __rmul__(self, other): + return self.__class__(other) * self + + def __matmul__(self, other): + if isinstance(other, Quaternion): + return self.q.__matmul__(other.q) + return self.__matmul__(self.__class__(other)) + + def __imatmul__(self, other): + return self.__matmul__(other) + + def __rmatmul__(self, other): + return self.__class__(other).__matmul__(self) + + # Division + def __div__(self, other): + if isinstance(other, Quaternion): + if other == self.__class__(0.0): + raise ZeroDivisionError("Quaternion divisor must be non-zero") + return self * other.inverse + return self.__div__(self.__class__(other)) + + def __idiv__(self, other): + return self.__div__(other) + + def __rdiv__(self, other): + return self.__class__(other) * self.inverse + + def __truediv__(self, other): + return self.__div__(other) + + def __itruediv__(self, other): + return self.__idiv__(other) + + def __rtruediv__(self, other): + return self.__rdiv__(other) + + # Exponentiation + def __pow__(self, exponent): + # source: https://en.wikipedia.org/wiki/Quaternion#Exponential.2C_logarithm.2C_and_power + exponent = float(exponent) # Explicitly reject non-real exponents + norm = self.norm + if norm > 0.0: + try: + n, theta = self.polar_decomposition + except ZeroDivisionError: + # quaternion is a real number (no vector or imaginary part) + return Quaternion(scalar=self.scalar ** exponent) + return (self.norm ** exponent) * Quaternion(scalar=cos(exponent * theta), vector=(n * sin(exponent * theta))) + return Quaternion(self) + + def __ipow__(self, other): + return self ** other + + def __rpow__(self, other): + return other ** float(self) + + # Quaternion Features + def _vector_conjugate(self): + return np.hstack((self.q[0], -self.q[1:4])) + + def _sum_of_squares(self): + return np.dot(self.q, self.q) + + @property + def conjugate(self): + """Quaternion conjugate, encapsulated in a new instance. + + For a unit quaternion, this is the same as the inverse. + + Returns: + A new Quaternion object clone with its vector part negated + """ + return self.__class__(scalar=self.scalar, vector=-self.vector) + + @property + def inverse(self): + """Inverse of the quaternion object, encapsulated in a new instance. + + For a unit quaternion, this is the inverse rotation, i.e. when combined with the original rotation, will result in the null rotation. + + Returns: + A new Quaternion object representing the inverse of this object + """ + ss = self._sum_of_squares() + if ss > 0: + return self.__class__(array=(self._vector_conjugate() / ss)) + else: + raise ZeroDivisionError("a zero quaternion (0 + 0i + 0j + 0k) cannot be inverted") + + @property + def norm(self): + """L2 norm of the quaternion 4-vector. + + This should be 1.0 for a unit quaternion (versor) + Slow but accurate. If speed is a concern, consider using _fast_normalise() instead + + Returns: + A scalar real number representing the square root of the sum of the squares of the elements of the quaternion. + """ + mag_squared = self._sum_of_squares() + return sqrt(mag_squared) + + @property + def magnitude(self): + return self.norm + + def _normalise(self): + """Object is guaranteed to be a unit quaternion after calling this + operation UNLESS the object is equivalent to Quaternion(0) + """ + if not self.is_unit(): + n = self.norm + if n > 0: + self.q = self.q / n + + def _fast_normalise(self): + """Normalise the object to a unit quaternion using a fast approximation method if appropriate. + + Object is guaranteed to be a quaternion of approximately unit length + after calling this operation UNLESS the object is equivalent to Quaternion(0) + """ + if not self.is_unit(): + mag_squared = np.dot(self.q, self.q) + if (mag_squared == 0): + return + if (abs(1.0 - mag_squared) < 2.107342e-08): + mag = ((1.0 + mag_squared) / 2.0) # More efficient. Pade approximation valid if error is small + else: + mag = sqrt(mag_squared) # Error is too big, take the performance hit to calculate the square root properly + + self.q = self.q / mag + + @property + def normalised(self): + """Get a unit quaternion (versor) copy of this Quaternion object. + + A unit quaternion has a `norm` of 1.0 + + Returns: + A new Quaternion object clone that is guaranteed to be a unit quaternion + """ + q = Quaternion(self) + q._normalise() + return q + + @property + def polar_unit_vector(self): + vector_length = np.linalg.norm(self.vector) + if vector_length <= 0.0: + raise ZeroDivisionError('Quaternion is pure real and does not have a unique unit vector') + return self.vector / vector_length + + @property + def polar_angle(self): + return acos(self.scalar / self.norm) + + @property + def polar_decomposition(self): + """ + Returns the unit vector and angle of a non-scalar quaternion according to the following decomposition + + q = q.norm() * (e ** (q.polar_unit_vector * q.polar_angle)) + + source: https://en.wikipedia.org/wiki/Polar_decomposition#Quaternion_polar_decomposition + """ + return self.polar_unit_vector, self.polar_angle + + @property + def unit(self): + return self.normalised + + def is_unit(self, tolerance=1e-14): + """Determine whether the quaternion is of unit length to within a specified tolerance value. + + Params: + tolerance: [optional] maximum absolute value by which the norm can differ from 1.0 for the object to be considered a unit quaternion. Defaults to `1e-14`. + + Returns: + `True` if the Quaternion object is of unit length to within the specified tolerance value. `False` otherwise. + """ + return abs(1.0 - self._sum_of_squares()) < tolerance # if _sum_of_squares is 1, norm is 1. This saves a call to sqrt() + + def _q_matrix(self): + """Matrix representation of quaternion for multiplication purposes. + """ + return np.array([ + [self.q[0], -self.q[1], -self.q[2], -self.q[3]], + [self.q[1], self.q[0], -self.q[3], self.q[2]], + [self.q[2], self.q[3], self.q[0], -self.q[1]], + [self.q[3], -self.q[2], self.q[1], self.q[0]]]) + + def _q_bar_matrix(self): + """Matrix representation of quaternion for multiplication purposes. + """ + return np.array([ + [self.q[0], -self.q[1], -self.q[2], -self.q[3]], + [self.q[1], self.q[0], self.q[3], -self.q[2]], + [self.q[2], -self.q[3], self.q[0], self.q[1]], + [self.q[3], self.q[2], -self.q[1], self.q[0]]]) + + def _rotate_quaternion(self, q): + """Rotate a quaternion vector using the stored rotation. + + Params: + q: The vector to be rotated, in quaternion form (0 + xi + yj + kz) + + Returns: + A Quaternion object representing the rotated vector in quaternion from (0 + xi + yj + kz) + """ + self._normalise() + return self * q * self.conjugate + + def rotate(self, vector): + """Rotate a 3D vector by the rotation stored in the Quaternion object. + + Params: + vector: A 3-vector specified as any ordered sequence of 3 real numbers corresponding to x, y, and z values. + Some types that are recognised are: numpy arrays, lists and tuples. + A 3-vector can also be represented by a Quaternion object who's scalar part is 0 and vector part is the required 3-vector. + Thus it is possible to call `Quaternion.rotate(q)` with another quaternion object as an input. + + Returns: + The rotated vector returned as the same type it was specified at input. + + Raises: + TypeError: if any of the vector elements cannot be converted to a real number. + ValueError: if `vector` cannot be interpreted as a 3-vector or a Quaternion object. + + """ + if isinstance(vector, Quaternion): + return self._rotate_quaternion(vector) + q = Quaternion(vector=vector) + a = self._rotate_quaternion(q).vector + if isinstance(vector, list): + l = [x for x in a] + return l + elif isinstance(vector, tuple): + l = [x for x in a] + return tuple(l) + else: + return a + + @classmethod + def exp(cls, q): + """Quaternion Exponential. + + Find the exponential of a quaternion amount. + + Params: + q: the input quaternion/argument as a Quaternion object. + + Returns: + A quaternion amount representing the exp(q). See [Source](https://math.stackexchange.com/questions/1030737/exponential-function-of-quaternion-derivation for more information and mathematical background). + + Note: + The method can compute the exponential of any quaternion. + """ + tolerance = 1e-17 + v_norm = np.linalg.norm(q.vector) + vec = q.vector + if v_norm > tolerance: + vec = vec / v_norm + magnitude = exp(q.scalar) + return Quaternion(scalar = magnitude * cos(v_norm), vector = magnitude * sin(v_norm) * vec) + + @classmethod + def log(cls, q): + """Quaternion Logarithm. + + Find the logarithm of a quaternion amount. + + Params: + q: the input quaternion/argument as a Quaternion object. + + Returns: + A quaternion amount representing log(q) := (log(|q|), v/|v|acos(w/|q|)). + + Note: + The method computes the logarithm of general quaternions. See [Source](https://math.stackexchange.com/questions/2552/the-logarithm-of-quaternion/2554#2554) for more details. + """ + v_norm = np.linalg.norm(q.vector) + q_norm = q.norm + tolerance = 1e-17 + if q_norm < tolerance: + # 0 quaternion - undefined + return Quaternion(scalar=-float('inf'), vector=float('nan')*q.vector) + if v_norm < tolerance: + # real quaternions - no imaginary part + return Quaternion(scalar=log(q_norm), vector=[0, 0, 0]) + vec = q.vector / v_norm + return Quaternion(scalar=log(q_norm), vector=acos(q.scalar/q_norm)*vec) + + @classmethod + def exp_map(cls, q, eta): + """Quaternion exponential map. + + Find the exponential map on the Riemannian manifold described + by the quaternion space. + + Params: + q: the base point of the exponential map, i.e. a Quaternion object + eta: the argument of the exponential map, a tangent vector, i.e. a Quaternion object + + Returns: + A quaternion p such that p is the endpoint of the geodesic starting at q + in the direction of eta, having the length equal to the magnitude of eta. + + Note: + The exponential map plays an important role in integrating orientation + variations (e.g. angular velocities). This is done by projecting + quaternion tangent vectors onto the quaternion manifold. + """ + return q * Quaternion.exp(eta) + + @classmethod + def sym_exp_map(cls, q, eta): + """Quaternion symmetrized exponential map. + + Find the symmetrized exponential map on the quaternion Riemannian + manifold. + + Params: + q: the base point as a Quaternion object + eta: the tangent vector argument of the exponential map + as a Quaternion object + + Returns: + A quaternion p. + + Note: + The symmetrized exponential formulation is akin to the exponential + formulation for symmetric positive definite tensors [Source](http://www.academia.edu/7656761/On_the_Averaging_of_Symmetric_Positive-Definite_Tensors) + """ + sqrt_q = q ** 0.5 + return sqrt_q * Quaternion.exp(eta) * sqrt_q + + @classmethod + def log_map(cls, q, p): + """Quaternion logarithm map. + + Find the logarithm map on the quaternion Riemannian manifold. + + Params: + q: the base point at which the logarithm is computed, i.e. + a Quaternion object + p: the argument of the quaternion map, a Quaternion object + + Returns: + A tangent vector having the length and direction given by the + geodesic joining q and p. + """ + return Quaternion.log(q.inverse * p) + + @classmethod + def sym_log_map(cls, q, p): + """Quaternion symmetrized logarithm map. + + Find the symmetrized logarithm map on the quaternion Riemannian manifold. + + Params: + q: the base point at which the logarithm is computed, i.e. + a Quaternion object + p: the argument of the quaternion map, a Quaternion object + + Returns: + A tangent vector corresponding to the symmetrized geodesic curve formulation. + + Note: + Information on the symmetrized formulations given in [Source](https://www.researchgate.net/publication/267191489_Riemannian_L_p_Averaging_on_Lie_Group_of_Nonzero_Quaternions). + """ + inv_sqrt_q = (q ** (-0.5)) + return Quaternion.log(inv_sqrt_q * p * inv_sqrt_q) + + @classmethod + def absolute_distance(cls, q0, q1): + """Quaternion absolute distance. + + Find the distance between two quaternions accounting for the sign ambiguity. + + Params: + q0: the first quaternion + q1: the second quaternion + + Returns: + A positive scalar corresponding to the chord of the shortest path/arc that + connects q0 to q1. + + Note: + This function does not measure the distance on the hypersphere, but + it takes into account the fact that q and -q encode the same rotation. + It is thus a good indicator for rotation similarities. + """ + q0_minus_q1 = q0 - q1 + q0_plus_q1 = q0 + q1 + d_minus = q0_minus_q1.norm + d_plus = q0_plus_q1.norm + if d_minus < d_plus: + return d_minus + else: + return d_plus + + @classmethod + def distance(cls, q0, q1): + """Quaternion intrinsic distance. + + Find the intrinsic geodesic distance between q0 and q1. + + Params: + q0: the first quaternion + q1: the second quaternion + + Returns: + A positive amount corresponding to the length of the geodesic arc + connecting q0 to q1. + + Note: + Although the q0^(-1)*q1 != q1^(-1)*q0, the length of the path joining + them is given by the logarithm of those product quaternions, the norm + of which is the same. + """ + q = Quaternion.log_map(q0, q1) + return q.norm + + @classmethod + def sym_distance(cls, q0, q1): + """Quaternion symmetrized distance. + + Find the intrinsic symmetrized geodesic distance between q0 and q1. + + Params: + q0: the first quaternion + q1: the second quaternion + + Returns: + A positive amount corresponding to the length of the symmetrized + geodesic curve connecting q0 to q1. + + Note: + This formulation is more numerically stable when performing + iterative gradient descent on the Riemannian quaternion manifold. + However, the distance between q and -q is equal to pi, rendering this + formulation not useful for measuring rotation similarities when the + samples are spread over a "solid" angle of more than pi/2 radians + (the spread refers to quaternions as point samples on the unit hypersphere). + """ + q = Quaternion.sym_log_map(q0, q1) + return q.norm + + @classmethod + def slerp(cls, q0, q1, amount=0.5): + """Spherical Linear Interpolation between quaternions. + Implemented as described in https://en.wikipedia.org/wiki/Slerp + + Find a valid quaternion rotation at a specified distance along the + minor arc of a great circle passing through any two existing quaternion + endpoints lying on the unit radius hypersphere. + + This is a class method and is called as a method of the class itself rather than on a particular instance. + + Params: + q0: first endpoint rotation as a Quaternion object + q1: second endpoint rotation as a Quaternion object + amount: interpolation parameter between 0 and 1. This describes the linear placement position of + the result along the arc between endpoints; 0 being at `q0` and 1 being at `q1`. + Defaults to the midpoint (0.5). + + Returns: + A new Quaternion object representing the interpolated rotation. This is guaranteed to be a unit quaternion. + + Note: + This feature only makes sense when interpolating between unit quaternions (those lying on the unit radius hypersphere). + Calling this method will implicitly normalise the endpoints to unit quaternions if they are not already unit length. + """ + # Ensure quaternion inputs are unit quaternions and 0 <= amount <=1 + q0._fast_normalise() + q1._fast_normalise() + amount = np.clip(amount, 0, 1) + + dot = np.dot(q0.q, q1.q) + + # If the dot product is negative, slerp won't take the shorter path. + # Note that v1 and -v1 are equivalent when the negation is applied to all four components. + # Fix by reversing one quaternion + if dot < 0.0: + q0.q = -q0.q + dot = -dot + + # sin_theta_0 can not be zero + if dot > 0.9995: + qr = Quaternion(q0.q + amount * (q1.q - q0.q)) + qr._fast_normalise() + return qr + + theta_0 = np.arccos(dot) # Since dot is in range [0, 0.9995], np.arccos() is safe + sin_theta_0 = np.sin(theta_0) + + theta = theta_0 * amount + sin_theta = np.sin(theta) + + s0 = np.cos(theta) - dot * sin_theta / sin_theta_0 + s1 = sin_theta / sin_theta_0 + qr = Quaternion((s0 * q0.q) + (s1 * q1.q)) + qr._fast_normalise() + return qr + + @classmethod + def intermediates(cls, q0, q1, n, include_endpoints=False): + """Generator method to get an iterable sequence of `n` evenly spaced quaternion + rotations between any two existing quaternion endpoints lying on the unit + radius hypersphere. + + This is a convenience function that is based on `Quaternion.slerp()` as defined above. + + This is a class method and is called as a method of the class itself rather than on a particular instance. + + Params: + q_start: initial endpoint rotation as a Quaternion object + q_end: final endpoint rotation as a Quaternion object + n: number of intermediate quaternion objects to include within the interval + include_endpoints: [optional] if set to `True`, the sequence of intermediates + will be 'bookended' by `q_start` and `q_end`, resulting in a sequence length of `n + 2`. + If set to `False`, endpoints are not included. Defaults to `False`. + + Yields: + A generator object iterating over a sequence of intermediate quaternion objects. + + Note: + This feature only makes sense when interpolating between unit quaternions (those lying on the unit radius hypersphere). + Calling this method will implicitly normalise the endpoints to unit quaternions if they are not already unit length. + """ + step_size = 1.0 / (n + 1) + if include_endpoints: + steps = [i * step_size for i in range(0, n + 2)] + else: + steps = [i * step_size for i in range(1, n + 1)] + for step in steps: + yield cls.slerp(q0, q1, step) + + def derivative(self, rate): + """Get the instantaneous quaternion derivative representing a quaternion rotating at a 3D rate vector `rate` + + Params: + rate: numpy 3-array (or array-like) describing rotation rates about the global x, y and z axes respectively. + + Returns: + A unit quaternion describing the rotation rate + """ + rate = self._validate_number_sequence(rate, 3) + return 0.5 * self * Quaternion(vector=rate) + + def integrate(self, rate, timestep): + """Advance a time varying quaternion to its value at a time `timestep` in the future. + + The Quaternion object will be modified to its future value. + It is guaranteed to remain a unit quaternion. + + Params: + + rate: numpy 3-array (or array-like) describing rotation rates about the + global x, y and z axes respectively. + timestep: interval over which to integrate into the future. + Assuming *now* is `T=0`, the integration occurs over the interval + `T=0` to `T=timestep`. Smaller intervals are more accurate when + `rate` changes over time. + + Note: + The solution is closed form given the assumption that `rate` is constant + over the interval of length `timestep`. + """ + self._fast_normalise() + rate = self._validate_number_sequence(rate, 3) + + rotation_vector = rate * timestep + rotation_norm = np.linalg.norm(rotation_vector) + if rotation_norm > 0: + axis = rotation_vector / rotation_norm + angle = rotation_norm + q2 = Quaternion(axis=axis, angle=angle) + self.q = (self * q2).q + self._fast_normalise() + + + @property + def rotation_matrix(self): + """Get the 3x3 rotation matrix equivalent of the quaternion rotation. + + Returns: + A 3x3 orthogonal rotation matrix as a 3x3 Numpy array + + Note: + This feature only makes sense when referring to a unit quaternion. Calling this method will implicitly normalise the Quaternion object to a unit quaternion if it is not already one. + + """ + self._normalise() + product_matrix = np.dot(self._q_matrix(), self._q_bar_matrix().conj().transpose()) + return product_matrix[1:][:, 1:] + + @property + def transformation_matrix(self): + """Get the 4x4 homogeneous transformation matrix equivalent of the quaternion rotation. + + Returns: + A 4x4 homogeneous transformation matrix as a 4x4 Numpy array + + Note: + This feature only makes sense when referring to a unit quaternion. Calling this method will implicitly normalise the Quaternion object to a unit quaternion if it is not already one. + """ + t = np.array([[0.0], [0.0], [0.0]]) + Rt = np.hstack([self.rotation_matrix, t]) + return np.vstack([Rt, np.array([0.0, 0.0, 0.0, 1.0])]) + + @property + def yaw_pitch_roll(self): + """Get the equivalent yaw-pitch-roll angles aka. intrinsic Tait-Bryan angles following the z-y'-x'' convention + + Returns: + yaw: rotation angle around the z-axis in radians, in the range `[-pi, pi]` + pitch: rotation angle around the y'-axis in radians, in the range `[-pi/2, -pi/2]` + roll: rotation angle around the x''-axis in radians, in the range `[-pi, pi]` + + The resulting rotation_matrix would be R = R_x(roll) R_y(pitch) R_z(yaw) + + Note: + This feature only makes sense when referring to a unit quaternion. Calling this method will implicitly normalise the Quaternion object to a unit quaternion if it is not already one. + """ + + self._normalise() + yaw = np.arctan2(2 * (self.q[0] * self.q[3] - self.q[1] * self.q[2]), + 1 - 2 * (self.q[2] ** 2 + self.q[3] ** 2)) + pitch = np.arcsin(2 * (self.q[0] * self.q[2] + self.q[3] * self.q[1])) + roll = np.arctan2(2 * (self.q[0] * self.q[1] - self.q[2] * self.q[3]), + 1 - 2 * (self.q[1] ** 2 + self.q[2] ** 2)) + + return yaw, pitch, roll + + def _wrap_angle(self, theta): + """Helper method: Wrap any angle to lie between -pi and pi + + Odd multiples of pi are wrapped to +pi (as opposed to -pi) + """ + result = ((theta + pi) % (2 * pi)) - pi + if result == -pi: + result = pi + return result + + def get_axis(self, undefined=np.zeros(3)): + """Get the axis or vector about which the quaternion rotation occurs + + For a null rotation (a purely real quaternion), the rotation angle will + always be `0`, but the rotation axis is undefined. + It is by default assumed to be `[0, 0, 0]`. + + Params: + undefined: [optional] specify the axis vector that should define a null rotation. + This is geometrically meaningless, and could be any of an infinite set of vectors, + but can be specified if the default (`[0, 0, 0]`) causes undesired behaviour. + + Returns: + A Numpy unit 3-vector describing the Quaternion object's axis of rotation. + + Note: + This feature only makes sense when referring to a unit quaternion. + Calling this method will implicitly normalise the Quaternion object to a unit quaternion if it is not already one. + """ + tolerance = 1e-17 + self._normalise() + norm = np.linalg.norm(self.vector) + if norm < tolerance: + # Here there are an infinite set of possible axes, use what has been specified as an undefined axis. + return undefined + else: + return self.vector / norm + + @property + def axis(self): + return self.get_axis() + + @property + def angle(self): + """Get the angle (in radians) describing the magnitude of the quaternion rotation about its rotation axis. + + This is guaranteed to be within the range (-pi:pi) with the direction of + rotation indicated by the sign. + + When a particular rotation describes a 180 degree rotation about an arbitrary + axis vector `v`, the conversion to axis / angle representation may jump + discontinuously between all permutations of `(-pi, pi)` and `(-v, v)`, + each being geometrically equivalent (see Note in documentation). + + Returns: + A real number in the range (-pi:pi) describing the angle of rotation + in radians about a Quaternion object's axis of rotation. + + Note: + This feature only makes sense when referring to a unit quaternion. + Calling this method will implicitly normalise the Quaternion object to a unit quaternion if it is not already one. + """ + self._normalise() + norm = np.linalg.norm(self.vector) + return self._wrap_angle(2.0 * atan2(norm, self.scalar)) + + @property + def degrees(self): + return self.to_degrees(self.angle) + + @property + def radians(self): + return self.angle + + @property + def scalar(self): + """ Return the real or scalar component of the quaternion object. + + Returns: + A real number i.e. float + """ + return self.q[0] + + @property + def vector(self): + """ Return the imaginary or vector component of the quaternion object. + + Returns: + A numpy 3-array of floats. NOT guaranteed to be a unit vector + """ + return self.q[1:4] + + @property + def real(self): + return self.scalar + + @property + def imaginary(self): + return self.vector + + @property + def w(self): + return self.q[0] + + @property + def x(self): + return self.q[1] + + @property + def y(self): + return self.q[2] + + @property + def z(self): + return self.q[3] + + @property + def elements(self): + """ Return all the elements of the quaternion object. + + Returns: + A numpy 4-array of floats. NOT guaranteed to be a unit vector + """ + return self.q + + def __getitem__(self, index): + index = int(index) + return self.q[index] + + def __setitem__(self, index, value): + index = int(index) + self.q[index] = float(value) + + def __copy__(self): + result = self.__class__(self.q) + return result + + def __deepcopy__(self, memo): + result = self.__class__(deepcopy(self.q, memo)) + memo[id(self)] = result + return result + + @staticmethod + def to_degrees(angle_rad): + if angle_rad is not None: + return float(angle_rad) / pi * 180.0 + + @staticmethod + def to_radians(angle_deg): + if angle_deg is not None: + return float(angle_deg) / 180.0 * pi diff --git a/mrdna/readers/libs/reader_lammps_init.py b/mrdna/readers/libs/reader_lammps_init.py new file mode 100644 index 0000000000000000000000000000000000000000..003b29602225e1adcea0b56d4cfaf3a1125e0020 --- /dev/null +++ b/mrdna/readers/libs/reader_lammps_init.py @@ -0,0 +1,193 @@ +import numpy as np +import sys + +SECTIONS = set([ + 'Atoms', + 'Velocities', + 'Ellipsoids', + 'Bonds', +]) + +HEADERS = set([ + 'atoms', + 'bonds', + 'atom types', + 'bond types', + 'ellipsoids', + 'xlo xhi', + 'ylo yhi', + 'zlo zhi', +]) + + +class Lammps_parser(object): + + def __init__(self, filename): + self.filename = filename + + head, sects = self.grab_datafile() + + self.natoms = int(head['atoms']) + self.nbonds = int(head['bonds']) + self.nellipsoids = int(head['ellipsoids']) + x1, x2 = np.float32(head['xlo xhi'].split()) + self.Lx = x2 - x1 + y1, y2 = np.float32(head['ylo yhi'].split()) + self.Ly = y2 - y1 + z1, z2 = np.float32(head['zlo zhi'].split()) + self.Lz = z2 - z1 + + self.parse_Atoms_header(sects['Atoms']) + self.parse_bonds(sects['Bonds']) + self.parse_ellipsoids(sects['Ellipsoids']) + + self.parse_velocities(sects['Velocities']) + + # checking the nucleotides have indexes ordered the same way as bonds and are compatible with strands + for i in range(self.natoms): + next_bond = self.bonds[i][1] + + if next_bond != -1: + #check the consecutive bond is on the same strand + if self.strand[i] != self.strand[next_bond]: + print("Wrong bond arising between two different strands", file=sys.stderr) + #check the right bond is an higher index (except from the circular closure) + if next_bond == i-1: + print("The bonds should be in incremental order (i i+1) except for strand circularization N 0", file=sys.stderr) + if next_bond > i+1: + print("The bonds should be in incremental order (i i+1) except for strand circularization N 0", file=sys.stderr) + if next_bond < i+1: + if self.bonds[next_bond][1] != next_bond + 1: + print("The bonds should be in incremental order (i i+1) except for strand circularization N 0", file=sys.stderr) + + # more check to insert about completely random ordering + + def parse_Atoms_header(self, datalines): + if self.natoms != len(datalines): + raise ValueError("Number of atoms in header %d and in Atoms %d do not coincide" % (self.natoms, len(datalines))) + # Fields per line + if len(datalines[1].split()) < 8: + raise ValueError("Atoms section should be the default one # Atom-ID, type, position, molecule-ID, ellipsoid flag, density with at least 8 columns and not %d" % len(datalines[1].split())) + N = self.natoms + # atom ids aren't necessarily sequential + self.bases = np.zeros(N, dtype=int) + self.strand = np.zeros(N, dtype=int) + self.xyz = np.zeros((N, 3), dtype=float) + for _, line in enumerate(datalines): + line = line.split() + index = int(line[0]) - 1 + self.bases[index] = line[1] + self.strand[index] = line[5] + self.xyz[index, :] = line[2:5] + + self.nstrands = len(np.unique(self.strand)) + + def parse_velocities(self, datalines): + if self.natoms != len(datalines): + raise ValueError("Velocities section do not contain the same amount of particles as number of atoms %d != %d " % self.natoms, len(datalines)) + if len(datalines[1].split()) != 7: + raise ValueError("Velocities section should be the default one # Atom-ID, translational, rotational velocity with 7 columns and not %d" % len(datalines[1].split())) + else: + N = self.natoms + self.v = np.zeros((N, 3), dtype=float) + self.Lv = np.zeros((N, 3), dtype=float) + for _, line in enumerate(datalines): + line = line.split() + index = int(line[0]) - 1 + self.v[index] = line[1:4] + self.Lv[index] = line[4:7] + + def _N_strands_from_bonds(self, bonds): + ''' + This method partition nucleotides into strands according to their nearest neighbours and returns the number of such strands. + ''' + def flip_neighs(bonds, clusters, i): + for neigh in bonds[i]: + if clusters[i] > clusters[neigh]: + clusters[i] = clusters[neigh] + flip_neighs(bonds, clusters, neigh) + + + N = len(bonds) + strands = np.arange(0, N, 1, dtype=np.int_) + + for i in range(N): + flip_neighs(bonds, strands, i) + + return len(np.unique(strands)) + + def parse_bonds(self, datalines): + if len(datalines[1].split()) != 4: + raise ValueError("Bonds section should have 4 columns and not %d" % len(datalines[1].split())) + + if self.nbonds != len(datalines): + raise ValueError("Number of atoms in header %d and in Bonds %d do not coincide" % self.nbonds, len(datalines)) + + # creating a vector indicating for each particle who it is bonded too on its left and right in order of increasing index + natoms = self.natoms + self.bonds = np.ones((natoms, 2), dtype=int) * (-1) + for _, line in enumerate(datalines): + line = line.split() + p1 = int(line[2]) - 1 + p2 = int(line[3]) - 1 + + self.bonds[p1][1] = p2 + self.bonds[p2][0] = p1 + + N_strands = self._N_strands_from_bonds(self.bonds) + if N_strands != self.nstrands: + raise ValueError("There is a mismatch between the number of strands as detected by the Atoms (%d) and Bonds (%d) sections" % (self.nstrands, N_strands)) + + N_ends_3p = 0 + N_ends_5p = 0 + for b in self.bonds: + if b[0] == -1: + N_ends_3p += 1 + elif b[1] == -1: + N_ends_5p += 1 + + if N_ends_3p != N_ends_5p: + raise ValueError("There is a mismatch between the number of 3' ends (%d) and 5' ends (%d)" % (N_ends_3p, N_ends_5p), file=sys.stderr) + + def parse_ellipsoids(self, datalines): + if len(datalines[1].split()) != 8: + raise ValueError("Ellipsoid section should be the default one # Atom-ID, shape, quaternion with 8 columns and not %d" % len(datalines[1].split())) + + if self.nellipsoids != len(datalines): + raise ValueError("Number of ellipsoids in header %d and in Bonds %d do not coincide" % self.nellipsoids, len(datalines)) + + nellipsoids = self.nellipsoids + self.ellipsoids = np.zeros((nellipsoids, 4), dtype=float) + for _, line in enumerate(datalines): + line = line.split() + index = int(line[0]) - 1 + self.ellipsoids[index, :] = line[4:8] + + def iterdata(self): + with open(self.filename) as f: + for line in f: + line = line.partition('#')[0].strip() + if line: + yield line + + def grab_datafile(self): + f = list(self.iterdata()) + starts = [i for i, line in enumerate(f) + if line.split()[0] in SECTIONS] + starts += [None] + + # we save here the lammps init header information (mass, N, etc) + header = {} + for line in f[:starts[0]]: + for token in HEADERS: + if line.endswith(token): + header[token] = line.split(token)[0] + + # we associate to each section the content (Atoms, Bonds, etc) + sects = {f[l]:f[l + 1:starts[i + 1]] for i, l in enumerate(starts[:-1])} + + if 'Atoms' not in sects: + raise ValueError("Data file was missing Atoms section") + + return header, sects + diff --git a/mrdna/readers/libs/readers.py b/mrdna/readers/libs/readers.py new file mode 100644 index 0000000000000000000000000000000000000000..94bbd2bb4cd807b08007a1e99d6e8db1427009ea --- /dev/null +++ b/mrdna/readers/libs/readers.py @@ -0,0 +1,106 @@ +from . import base +import numpy as np +import os.path + +class LorenzoReader: + def __init__(self, topology, configuration): + self._conf = False + + if not os.path.isfile(configuration): + base.Logger.die("Configuration file '%s' is not readable" % configuration) + + if not os.path.isfile(topology): + base.Logger.die("Topology file '%s' is not readable" % topology) + + self._conf = open(configuration, "r") + + f = open(topology, "r") + self.N, self.N_strands = [int(x) for x in f.readline().split()] + self._top_lines = f.readlines() + if len(self._top_lines) != self.N: + raise Exception("The number of nucleotides specified in the topology file header (%d) is different from the number of nucleotide lines found in the same file (%d)" % (self.N, len(self._top_lines))) + + def __del__(self): + if self._conf: self._conf.close() + + def _read(self, only_strand_ends=False, skip=False): + try: + timeline = self._conf.readline() + time = float(timeline.split()[2]) + + box = np.array([float(x) for x in self._conf.readline().split()[2:]]) + self._conf.readline() + except Exception as e: + raise Exception("The header lines of the configuration file are invalid (caught a '%s' exception)" % e) + + if skip: + for tl in self._top_lines: + self._conf.readline() + + return False + + system = base.System(box, time=time) + base.Nucleotide.index = 0 + base.Strand.index = 0 + + s = False + strandid_current = 0 + for i_line, tl in enumerate(self._top_lines): + tls = tl.split() + n3 = int(tls[2]) + n5 = int(tls[3]) + strandid = int(tls[0]) + if (len (tls[1]) == 1): + b = base.base_to_number[tls[1]] + bb = b + else: + try: + tmp = int (tls[1]) + except: + raise Exception("The line n. %d in the topology file contains an incorrect specific base pairing" % i_line) + + if tmp > 0: + b = tmp % 4 + else: + b = (3 - ((3 - tmp) % 4)) + bb = tmp + + if strandid != strandid_current: + # check for circular strand + if n3 != -1: + iscircular = True + else: + iscircular = False + + if s: + system.add_strand(s) + s = base.Strand() + if iscircular: + s.make_circular() + strandid_current = strandid + + ls = self._conf.readline().split() + if len(ls) == 0: + raise Exception("The %d-th nucleotide line in the configuration file is empty" % i_line) + elif len(ls) != 15: + raise Exception("The %d-th nucleotide line in the configuration file is invalid" % i_line) + cm = [float(x) for x in ls[0:3]] + a1 = [float(x) for x in ls[3:6]] + a3 = [float(x) for x in ls[6:9]] + v = [float(x) for x in ls[9:12]] + L = [float(x) for x in ls[12:15]] + if not only_strand_ends or n3 == -1 or n5 == -1: + s.add_nucleotide(base.Nucleotide(cm, a1, a3, b, bb, v, L, n3)) + + system.add_strand(s) + + return system + + + # if only_strand_ends == True then a strand will contain only the first and the last nucleotide + # useful for some analysis like csd for coaxial interactions + def get_system(self, only_strand_ends=False, N_skip=0): + for _ in range(N_skip): + self._read(skip=True) + + return self._read(only_strand_ends=only_strand_ends, skip=False) diff --git a/mrdna/readers/libs/topology.py b/mrdna/readers/libs/topology.py new file mode 100644 index 0000000000000000000000000000000000000000..6a84603a21d07e545571b4c6862c079a1b2d1b31 --- /dev/null +++ b/mrdna/readers/libs/topology.py @@ -0,0 +1,126 @@ +import numpy as np +import math as mt +import numpy.linalg as la + +#angle between vectors (-pi,pi) with a reference direction vplane +def py_ang(v1, v2, vplane): + v1n = v1 / la.norm(v1) + v2n = v2 / la.norm(v2) + + return np.arctan2 (la.norm(np.cross(v1n, v2n)) , np.dot (v1n, v2n)) * np.sign(np.dot(np.cross(v1n, v2n), vplane)) + + +def get_twist(axis, ssdna1): + numrows = len(axis) + distn = np.copy(axis) + dist = np.copy(axis) + p = np.copy(axis) + a = np.copy(axis) + + TW = 0 + + #axis vectors + for c in range(0, numrows): + ind = c + ind1 = (c + 1) % numrows + + dist[ind, :] = axis[ind1, :] - axis[ind, :] + distn[ind, :] = dist[ind, :] / np.sqrt(np.dot(dist[ind, :], dist[ind, :])) + # print ind,ind1, dist[ind,:],axis[ind,:],axis[ind1,:] + + # vector perpendicular to two consecutive axis vectors + for c in range(0, numrows): + ind_1 = (c - 1 + numrows) % numrows + ind = c + + p[ind, :] = np.cross(dist[ind_1, :] , dist[ind, :]) + p[ind, :] /= np.sqrt(np.dot(p[ind, :] , p[ind, :])) + # print p[ind,0], p[ind,1], p[ind,2] + + # axis to base vectors (perpendicular to axis) + weight = 0.5 # 1 #0.5 + for c in range(0, numrows): + a[c, :] = weight * ssdna1[c, :] + (1 - weight) * ssdna1[(c - 1 + numrows) % numrows, :] - axis[c, :] + + for c in range(0, numrows): + proj = np.dot(a[c, :], distn[c, :]) + a[c, :] = a[c, :] - proj * distn[c, :] + + # twist angles + for c in range(0, numrows): + ind_1 = (c - 1 + numrows) % numrows + ind = c + + # the angle should be computed selecting dist as the axis, so we need to choose the right order + + alpha = py_ang(a[ind_1, :] , p[ind, :] , dist[ind_1, :]) + gamma = py_ang(p[ind, :] , a[ind, :] , dist[ind, :]) + + angle = (alpha + gamma + 4 * np.pi) % (2 * np.pi) + # Now we have the angle in 0 - 2pi . if it exceeds pi, let's take angle-2*pi instead + if angle > np.pi: + angle = angle - 2 * np.pi + + # print angle + + TW += angle / (2 * np.pi) + + return TW + + +#curve xyz coordinate as input +def get_writhe(coordxyz): + numrows = len(coordxyz) + dist = np.copy(coordxyz) + # distn=np.copy(coordxyz) + + WR = 0 + + for c in range(0, numrows): + ind = int(c - mt.floor(c / float(numrows)) * numrows) + ind1 = int(c + 1 - mt.floor((c + 1) / float(numrows)) * numrows) + + dist[ind, :] = coordxyz[ind1, :] - coordxyz[ind, :] + # distn[ind,:]=dist[ind,:]/np.sqrt(np.dot(dist[ind,:],dist[ind,:])) + # print ind,ind1, coordxyz[ind,:],coordxyz[ind1,:],dist[ind,:] + + for i in range(1, numrows): + for j in range(0, i): + ind_i = int(i - mt.floor(i / float(numrows)) * numrows) + ind_i1 = int(i + 1 - mt.floor((i + 1) / float(numrows)) * numrows) + ind_j = int(j - mt.floor(j / float(numrows)) * numrows) + ind_j1 = int(j + 1 - mt.floor((j + 1) / float(numrows)) * numrows) + + r12 = dist[ind_i, :] # rii1 + r34 = dist[ind_j, :] # rjj1 + r13 = coordxyz[ind_j, :] - coordxyz[ind_i, :] # rij + r23 = coordxyz[ind_j, :] - coordxyz[ind_i1, :] # ri1j + r24 = coordxyz[ind_j1, :] - coordxyz[ind_i1, :] # ri1j1 + r14 = coordxyz[ind_j1, :] - coordxyz[ind_i, :] # rij1 + + # print ind_i,ind_i1,ind_j,ind_j1,r12,dist[ind_i,:],r34,r13,r23,r24,r14 + + # do action only if 4 points are coplanar (from wolfram), otherwise solid angle is zero + # if(ind_i==ind_j+1): + # if(np.dot( r13 , np.cross(r12,r34) )> 5*mt.exp(-17)): + # print np.dot( r13 , np.cross(r12,r34) ),ind_i,ind_j + if(abs (np.dot(r13 , np.cross(r12, r34))) > 5 * mt.exp(-17)): + n1 = np.cross(r13 , r14) + n1 /= np.sqrt(np.dot(n1, n1)) + n2 = np.cross(r14 , r24) + n2 /= np.sqrt(np.dot(n2, n2)) + n3 = np.cross(r24 , r23) + n3 /= np.sqrt(np.dot(n3, n3)) + n4 = np.cross(r23 , r13) + n4 /= np.sqrt(np.dot(n4, n4)) + + # print ind_i,ind_j, np.dot( r13 , np.cross(r12,r34) ), n1,n2,n3,n4 + + wr_loc = np.arcsin(np.dot(n1, n2)) + np.arcsin(np.dot(n2, n3)) + np.arcsin(np.dot(n3, n4)) + np.arcsin(np.dot(n4, n1)) + + wr_loc = wr_loc * np.sign(np.dot(np.cross(r34 , r12) , r13)) / 4 / np.pi + + WR += 2 * wr_loc + # print wr_loc,WR + + return WR diff --git a/mrdna/readers/libs/utils.py b/mrdna/readers/libs/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0697600c1c4880bd0f2fdce7a3ac5a63cd2ec54e --- /dev/null +++ b/mrdna/readers/libs/utils.py @@ -0,0 +1,85 @@ +import numpy as np +import math + +from .base import FLT_EPSILON + + +def get_angle(a, b): + """ + Get angle between a,b + + >>> a = [0, 1, 0] + >>> b = [0, 0, 1] + >>> round(get_angle(a,b),3) + 1.571 + + """ + ab = np.dot(a, b) + if ab > (1. - FLT_EPSILON): + return 0 + elif ab < (-1. + FLT_EPSILON): + return np.pi + else: + return np.arccos(ab) + + +def get_orthonormalized_base(v1, v2, v3): + v1_norm2 = np.dot(v1, v1) + v2_v1 = np.dot(v2, v1) + + v2 -= (v2_v1 / v1_norm2) * v1 + + v3_v1 = np.dot(v3, v1) + v3_v2 = np.dot(v3, v2) + v2_norm2 = np.dot(v2, v2) + + v3 -= (v3_v1 / v1_norm2) * v1 + (v3_v2 / v2_norm2) * v2 + + v1 /= np.sqrt(v1_norm2) + v2 /= np.sqrt(v2_norm2) + v3 /= np.sqrt(np.dot(v3, v3)) + + return v1, v2, v3 + + +def get_random_vector_in_sphere(r=1): + r2 = r * r + v = np.random.uniform(-r, r, 3) + + while np.dot(v, v) > r2: + v = np.random.uniform(-r, r, 3) + + return v + + +def get_random_vector(): + ransq = 1. + + while ransq >= 1.: + ran1 = 1. - 2. * np.random.random() + ran2 = 1. - 2. * np.random.random() + ransq = ran1 * ran1 + ran2 * ran2 + + ranh = 2. * np.sqrt(1. - ransq) + return np.array([ran1 * ranh, ran2 * ranh, 1. - 2. * ransq]) + + +def get_random_rotation_matrix(): + v1, v2, v3 = get_orthonormalized_base(get_random_vector(), get_random_vector(), get_random_vector()) + + R = np.array([v1, v2, v3]) + # rotations have det == 1 + if np.linalg.det(R) < 0: R = np.array([v2, v1, v3]) + + return R + + +def get_rotation_matrix(axis, angle): + ct = math.cos(angle) + st = math.sin(angle) + olc = 1. - ct + x, y, z = axis / math.sqrt(np.dot(axis, axis)) + + return np.array([[olc * x * x + ct, olc * x * y - st * z, olc * x * z + st * y], + [olc * x * y + st * z, olc * y * y + ct, olc * y * z - st * x], + [olc * x * z - st * y, olc * y * z + st * x, olc * z * z + ct]])