diff --git a/mrdna/readers/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/mrdna/readers/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index 363fcab7ed6e9634e198cf5555ceb88932c9a245..0000000000000000000000000000000000000000 --- a/mrdna/readers/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/mrdna/readers/Untitled.ipynb b/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb similarity index 73% rename from mrdna/readers/Untitled.ipynb rename to mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb index cedb210f5b287e97979e1a0a8f0675a672fe8dcf..8e5ae4386e1950a34b5a3a6ae9d83374f0a296c1 100644 --- a/mrdna/readers/Untitled.ipynb +++ b/mrdna/readers/.ipynb_checkpoints/test-checkpoint.ipynb @@ -2,20 +2,339 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, + "id": "ef4ed889-492f-4ba3-8c1d-a0fc607b1f57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>name</th>\n", + " <th>vstrands</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 12, 'col': 16, 'num': 0, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 12, 'col': 15, 'num': 1, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 13, 'col': 15, 'num': 2, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 13, 'col': 16, 'num': 3, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 13, 'col': 17, 'num': 4, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 12, 'col': 17, 'num': 5, 'scaf': [[5, ...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " name vstrands\n", + "0 test.json {'row': 12, 'col': 16, 'num': 0, 'scaf': [[-1,...\n", + "1 test.json {'row': 12, 'col': 15, 'num': 1, 'scaf': [[-1,...\n", + "2 test.json {'row': 13, 'col': 15, 'num': 2, 'scaf': [[-1,...\n", + "3 test.json {'row': 13, 'col': 16, 'num': 3, 'scaf': [[-1,...\n", + "4 test.json {'row': 13, 'col': 17, 'num': 4, 'scaf': [[-1,...\n", + "5 test.json {'row': 12, 'col': 17, 'num': 5, 'scaf': [[5, ..." + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "df=pd.read_json(\"test.json\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "051eb3b8-2bf1-4482-9815-19937f66e09d", + "metadata": {}, + "outputs": [], + "source": [ + "class vstrands (object):\n", + "\n", + " def __init__(self):\n", + " self.vhelices = []\n", + "\n", + " def add_vhelix(self, toadd):\n", + " self.vhelices.append(toadd)\n", + "\n", + " def bbox(self):\n", + " rows = []\n", + " cols = []\n", + " lens = []\n", + " for h in self.vhelices:\n", + " rows.append(h.row)\n", + " cols.append(h.col)\n", + " lens.append(len(h.stap))\n", + "\n", + " dr = DIST_SQUARE * (max(rows) - min(rows) + 2)\n", + " dc = DIST_SQUARE * (max(cols) - min(cols) + 2)\n", + " dl = 0.34 * (max(lens) + 2)\n", + " \n", + " return 2 * max([dr, dc, dl]) * BOX_FACTOR\n", + " \n", + " def __str__(self):\n", + " a = '{\\n\"vstrands\":[\\n'\n", + " if len(self.vhelices) > 0:\n", + " for h in self.vhelices:\n", + " a = a + str(h) + ','\n", + " a = a[0:len(a) - 1]\n", + " a = a + '}\\n'\n", + " return a\n", + "class vhelix (object):\n", + "\n", + " def __init__(self):\n", + " self.stapLoop = []\n", + " self.scafLoop = []\n", + " self.skip = []\n", + " self.loop = []\n", + " self.stap_colors = []\n", + " self.row = 0\n", + " self.col = 0\n", + " self.num = 0\n", + " self.stap = []\n", + " self.scaf = []\n", + " self.cad_index = -1\n", + " self.skiploop_bases = 0\n", + "\n", + " def get_length(self):\n", + " return max (len(self.scaf), len(self.stap))\n", + "\n", + " len = property (get_length)\n", + "\n", + " def add_square(self, toadd, which):\n", + " if which == 'stap':\n", + " self.stap.append(toadd)\n", + " elif which == 'scaf':\n", + " self.scaf.append (toadd)\n", + " else:\n", + " base.Logger.log(\"Cannot add square that is not scaf or stap. Dying now\", base.Logger.CRITICAL)\n", + " sys.exit(1)\n", + " \n", + " def __str__(self):\n", + " a = '{\\n'\n", + "\n", + " a = a + '\"stapLoop\":['\n", + " if len(self.stapLoop) > 0:\n", + " for i in self.stapLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " a = a + '\"skip\":['\n", + " if len(self.skip) > 0:\n", + " for e in self.skip:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"loop\":['\n", + " if len(self.loop) > 0:\n", + " for e in self.loop:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap_colors\":['\n", + " if len (self.stap_colors) > 0:\n", + " for e in self.stap_colors:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + "\n", + " a = a + '\"row\":' + str(self.row) + ',\\n'\n", + " a = a + '\"col\":' + str(self.col) + ',\\n'\n", + " a = a + '\"num\":' + str(self.num) + ',\\n'\n", + " \n", + " a = a + '\"scafLoop\":['\n", + " if len(self.scafLoop) > 0:\n", + " for i in self.scafLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap\":['\n", + " if len(self.stap) > 0:\n", + " for i in self.stap:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"scaf\":['\n", + " if len(self.scaf) > 0:\n", + " for i in self.scaf:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + ']\\n}'\n", + " return a\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "6e88e214-c870-47d1-9a47-504caa411530", + "metadata": {}, + "outputs": [], + "source": [ + "L=[]\n", + "for i in df[\"vstrands\"]:\n", + " L.append(i)\n", + "\n", + "cadsys = vstrands()\n", + "vh = vhelix()\n", + "for s in L:\n", + " \n", + " vh.stap = [ i for i in s[\"scaf\"]]\n", + " vh.scaf = [i for i in s[\"stap\"]]\n", + " vh.skiploop_bases = len(s[\"skip\"]) + sum(s[\"loop\"]) - sum(s[\"skip\"])\n", + " cadsys.add_vhelix(vh)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "326f70c5-6a80-4802-847a-ae0de82f0eda", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\\n\"stapLoop\":[],\\n\"skip\":[],\\n\"loop\":[],\\n\"stap_colors\":[],\\n\"row\":0,\\n\"col\":0,\\n\"num\":0,\\n\"scafLoop\":[],\\n\"stap\":[[5, 1, -1, -1],[5, 2, 5, 0],[5, 3, 5, 1],[-1, -1, 5, 2],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[5, 10, 4, 9],[5, 11, 5, 9],[5, 12, 5, 10],[5, 13, 5, 11],[5, 14, 5, 12],[5, 15, 5, 13],[5, 16, 5, 14],[5, 17, 5, 15],[5, 18, 5, 16],[5, 19, 5, 17],[5, 20, 5, 18],[5, 21, 5, 19],[5, 22, 5, 20],[-1, -1, 5, 21],[5, 24, -1, -1],[5, 25, 5, 23],[5, 26, 5, 24],[5, 27, 5, 25],[5, 28, 5, 26],[5, 29, 5, 27],[5, 30, 5, 28],[5, 31, 5, 29],[5, 32, 5, 30],[5, 33, 5, 31],[5, 34, 5, 32],[5, 35, 5, 33],[5, 36, 5, 34],[5, 37, 5, 35],[5, 38, 5, 36],[5, 39, 5, 37],[4, 39, 5, 38],[-1, -1, -1, -1],[-1, -1, -1, -1]],\\n\"scaf\":[[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, 5, 10],[5, 9, 5, 11],[5, 10, 5, 12],[5, 11, 5, 13],[5, 12, 5, 14],[5, 13, 5, 15],[5, 14, 5, 16],[5, 15, 5, 17],[5, 16, 5, 18],[5, 17, 5, 19],[5, 18, 5, 20],[5, 19, 5, 21],[5, 20, 5, 22],[5, 21, 5, 23],[5, 22, 5, 24],[5, 23, 5, 25],[5, 24, 5, 26],[5, 25, 5, 27],[5, 26, 0, 27],[0, 28, 5, 29],[5, 28, 5, 30],[5, 29, 5, 31],[5, 30, 5, 32],[5, 31, 5, 33],[5, 32, 5, 34],[5, 33, 5, 35],[5, 34, 5, 36],[5, 35, 5, 37],[5, 36, 5, 38],[5, 37, 5, 39],[5, 38, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1]]\\n}'" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0=cadsys.vhelices[0]\n", + "s0.__str__()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "7b552911-d7de-4079-ab4d-e859f2281f36", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[5, 1, -1, -1],\n", + " [5, 2, 5, 0],\n", + " [5, 3, 5, 1],\n", + " [-1, -1, 5, 2],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [5, 10, 4, 9],\n", + " [5, 11, 5, 9],\n", + " [5, 12, 5, 10],\n", + " [5, 13, 5, 11],\n", + " [5, 14, 5, 12],\n", + " [5, 15, 5, 13],\n", + " [5, 16, 5, 14],\n", + " [5, 17, 5, 15],\n", + " [5, 18, 5, 16],\n", + " [5, 19, 5, 17],\n", + " [5, 20, 5, 18],\n", + " [5, 21, 5, 19],\n", + " [5, 22, 5, 20],\n", + " [-1, -1, 5, 21],\n", + " [5, 24, -1, -1],\n", + " [5, 25, 5, 23],\n", + " [5, 26, 5, 24],\n", + " [5, 27, 5, 25],\n", + " [5, 28, 5, 26],\n", + " [5, 29, 5, 27],\n", + " [5, 30, 5, 28],\n", + " [5, 31, 5, 29],\n", + " [5, 32, 5, 30],\n", + " [5, 33, 5, 31],\n", + " [5, 34, 5, 32],\n", + " [5, 35, 5, 33],\n", + " [5, 36, 5, 34],\n", + " [5, 37, 5, 35],\n", + " [5, 38, 5, 36],\n", + " [5, 39, 5, 37],\n", + " [4, 39, 5, 38],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vh.stap" + ] + }, + { + "cell_type": "code", + "execution_count": 1, "id": "94113dfa", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " _\n", - " _____ ___ _| |___ ___\n", - "| | _| . | | .'|\n", - "|_|_|_|_| |___|_|_|__,| v1.0a.dev74 \n", - "it/its\n", - "\n" + "ename": "ModuleNotFoundError", + "evalue": "No module named 'mrdna'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmrdna\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreaders\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcadnano_segments\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Document\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mrdna'" ] } ], @@ -27,6 +346,14 @@ "import cadnano" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec7f1e22-6a95-41f9-8c23-7603cc165837", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 3, @@ -930,7 +1257,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -944,7 +1271,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.18" + "version": "3.8.19" } }, "nbformat": 4, diff --git a/mrdna/readers/test.ipynb b/mrdna/readers/test.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8e5ae4386e1950a34b5a3a6ae9d83374f0a296c1 --- /dev/null +++ b/mrdna/readers/test.ipynb @@ -0,0 +1,1279 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 20, + "id": "ef4ed889-492f-4ba3-8c1d-a0fc607b1f57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>name</th>\n", + " <th>vstrands</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 12, 'col': 16, 'num': 0, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 12, 'col': 15, 'num': 1, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 13, 'col': 15, 'num': 2, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 13, 'col': 16, 'num': 3, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 13, 'col': 17, 'num': 4, 'scaf': [[-1,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>test.json</td>\n", + " <td>{'row': 12, 'col': 17, 'num': 5, 'scaf': [[5, ...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " name vstrands\n", + "0 test.json {'row': 12, 'col': 16, 'num': 0, 'scaf': [[-1,...\n", + "1 test.json {'row': 12, 'col': 15, 'num': 1, 'scaf': [[-1,...\n", + "2 test.json {'row': 13, 'col': 15, 'num': 2, 'scaf': [[-1,...\n", + "3 test.json {'row': 13, 'col': 16, 'num': 3, 'scaf': [[-1,...\n", + "4 test.json {'row': 13, 'col': 17, 'num': 4, 'scaf': [[-1,...\n", + "5 test.json {'row': 12, 'col': 17, 'num': 5, 'scaf': [[5, ..." + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "df=pd.read_json(\"test.json\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "051eb3b8-2bf1-4482-9815-19937f66e09d", + "metadata": {}, + "outputs": [], + "source": [ + "class vstrands (object):\n", + "\n", + " def __init__(self):\n", + " self.vhelices = []\n", + "\n", + " def add_vhelix(self, toadd):\n", + " self.vhelices.append(toadd)\n", + "\n", + " def bbox(self):\n", + " rows = []\n", + " cols = []\n", + " lens = []\n", + " for h in self.vhelices:\n", + " rows.append(h.row)\n", + " cols.append(h.col)\n", + " lens.append(len(h.stap))\n", + "\n", + " dr = DIST_SQUARE * (max(rows) - min(rows) + 2)\n", + " dc = DIST_SQUARE * (max(cols) - min(cols) + 2)\n", + " dl = 0.34 * (max(lens) + 2)\n", + " \n", + " return 2 * max([dr, dc, dl]) * BOX_FACTOR\n", + " \n", + " def __str__(self):\n", + " a = '{\\n\"vstrands\":[\\n'\n", + " if len(self.vhelices) > 0:\n", + " for h in self.vhelices:\n", + " a = a + str(h) + ','\n", + " a = a[0:len(a) - 1]\n", + " a = a + '}\\n'\n", + " return a\n", + "class vhelix (object):\n", + "\n", + " def __init__(self):\n", + " self.stapLoop = []\n", + " self.scafLoop = []\n", + " self.skip = []\n", + " self.loop = []\n", + " self.stap_colors = []\n", + " self.row = 0\n", + " self.col = 0\n", + " self.num = 0\n", + " self.stap = []\n", + " self.scaf = []\n", + " self.cad_index = -1\n", + " self.skiploop_bases = 0\n", + "\n", + " def get_length(self):\n", + " return max (len(self.scaf), len(self.stap))\n", + "\n", + " len = property (get_length)\n", + "\n", + " def add_square(self, toadd, which):\n", + " if which == 'stap':\n", + " self.stap.append(toadd)\n", + " elif which == 'scaf':\n", + " self.scaf.append (toadd)\n", + " else:\n", + " base.Logger.log(\"Cannot add square that is not scaf or stap. Dying now\", base.Logger.CRITICAL)\n", + " sys.exit(1)\n", + " \n", + " def __str__(self):\n", + " a = '{\\n'\n", + "\n", + " a = a + '\"stapLoop\":['\n", + " if len(self.stapLoop) > 0:\n", + " for i in self.stapLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " a = a + '\"skip\":['\n", + " if len(self.skip) > 0:\n", + " for e in self.skip:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"loop\":['\n", + " if len(self.loop) > 0:\n", + " for e in self.loop:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap_colors\":['\n", + " if len (self.stap_colors) > 0:\n", + " for e in self.stap_colors:\n", + " a = a + str(e) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + "\n", + " a = a + '\"row\":' + str(self.row) + ',\\n'\n", + " a = a + '\"col\":' + str(self.col) + ',\\n'\n", + " a = a + '\"num\":' + str(self.num) + ',\\n'\n", + " \n", + " a = a + '\"scafLoop\":['\n", + " if len(self.scafLoop) > 0:\n", + " for i in self.scafLoop:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"stap\":['\n", + " if len(self.stap) > 0:\n", + " for i in self.stap:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + '],\\n'\n", + " \n", + " a = a + '\"scaf\":['\n", + " if len(self.scaf) > 0:\n", + " for i in self.scaf:\n", + " a = a + str(i) + ','\n", + " a = a[0:len(a) - 1] # remove last comma\n", + " a = a + ']\\n}'\n", + " return a\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "6e88e214-c870-47d1-9a47-504caa411530", + "metadata": {}, + "outputs": [], + "source": [ + "L=[]\n", + "for i in df[\"vstrands\"]:\n", + " L.append(i)\n", + "\n", + "cadsys = vstrands()\n", + "vh = vhelix()\n", + "for s in L:\n", + " \n", + " vh.stap = [ i for i in s[\"scaf\"]]\n", + " vh.scaf = [i for i in s[\"stap\"]]\n", + " vh.skiploop_bases = len(s[\"skip\"]) + sum(s[\"loop\"]) - sum(s[\"skip\"])\n", + " cadsys.add_vhelix(vh)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "326f70c5-6a80-4802-847a-ae0de82f0eda", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\\n\"stapLoop\":[],\\n\"skip\":[],\\n\"loop\":[],\\n\"stap_colors\":[],\\n\"row\":0,\\n\"col\":0,\\n\"num\":0,\\n\"scafLoop\":[],\\n\"stap\":[[5, 1, -1, -1],[5, 2, 5, 0],[5, 3, 5, 1],[-1, -1, 5, 2],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[5, 10, 4, 9],[5, 11, 5, 9],[5, 12, 5, 10],[5, 13, 5, 11],[5, 14, 5, 12],[5, 15, 5, 13],[5, 16, 5, 14],[5, 17, 5, 15],[5, 18, 5, 16],[5, 19, 5, 17],[5, 20, 5, 18],[5, 21, 5, 19],[5, 22, 5, 20],[-1, -1, 5, 21],[5, 24, -1, -1],[5, 25, 5, 23],[5, 26, 5, 24],[5, 27, 5, 25],[5, 28, 5, 26],[5, 29, 5, 27],[5, 30, 5, 28],[5, 31, 5, 29],[5, 32, 5, 30],[5, 33, 5, 31],[5, 34, 5, 32],[5, 35, 5, 33],[5, 36, 5, 34],[5, 37, 5, 35],[5, 38, 5, 36],[5, 39, 5, 37],[4, 39, 5, 38],[-1, -1, -1, -1],[-1, -1, -1, -1]],\\n\"scaf\":[[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1],[-1, -1, 5, 10],[5, 9, 5, 11],[5, 10, 5, 12],[5, 11, 5, 13],[5, 12, 5, 14],[5, 13, 5, 15],[5, 14, 5, 16],[5, 15, 5, 17],[5, 16, 5, 18],[5, 17, 5, 19],[5, 18, 5, 20],[5, 19, 5, 21],[5, 20, 5, 22],[5, 21, 5, 23],[5, 22, 5, 24],[5, 23, 5, 25],[5, 24, 5, 26],[5, 25, 5, 27],[5, 26, 0, 27],[0, 28, 5, 29],[5, 28, 5, 30],[5, 29, 5, 31],[5, 30, 5, 32],[5, 31, 5, 33],[5, 32, 5, 34],[5, 33, 5, 35],[5, 34, 5, 36],[5, 35, 5, 37],[5, 36, 5, 38],[5, 37, 5, 39],[5, 38, -1, -1],[-1, -1, -1, -1],[-1, -1, -1, -1]]\\n}'" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s0=cadsys.vhelices[0]\n", + "s0.__str__()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "7b552911-d7de-4079-ab4d-e859f2281f36", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[5, 1, -1, -1],\n", + " [5, 2, 5, 0],\n", + " [5, 3, 5, 1],\n", + " [-1, -1, 5, 2],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1],\n", + " [5, 10, 4, 9],\n", + " [5, 11, 5, 9],\n", + " [5, 12, 5, 10],\n", + " [5, 13, 5, 11],\n", + " [5, 14, 5, 12],\n", + " [5, 15, 5, 13],\n", + " [5, 16, 5, 14],\n", + " [5, 17, 5, 15],\n", + " [5, 18, 5, 16],\n", + " [5, 19, 5, 17],\n", + " [5, 20, 5, 18],\n", + " [5, 21, 5, 19],\n", + " [5, 22, 5, 20],\n", + " [-1, -1, 5, 21],\n", + " [5, 24, -1, -1],\n", + " [5, 25, 5, 23],\n", + " [5, 26, 5, 24],\n", + " [5, 27, 5, 25],\n", + " [5, 28, 5, 26],\n", + " [5, 29, 5, 27],\n", + " [5, 30, 5, 28],\n", + " [5, 31, 5, 29],\n", + " [5, 32, 5, 30],\n", + " [5, 33, 5, 31],\n", + " [5, 34, 5, 32],\n", + " [5, 35, 5, 33],\n", + " [5, 36, 5, 34],\n", + " [5, 37, 5, 35],\n", + " [5, 38, 5, 36],\n", + " [5, 39, 5, 37],\n", + " [4, 39, 5, 38],\n", + " [-1, -1, -1, -1],\n", + " [-1, -1, -1, -1]]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vh.stap" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "94113dfa", + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'mrdna'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmrdna\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreaders\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcadnano_segments\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Document\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcadnano\u001b[39;00m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mrdna'" + ] + } + ], + "source": [ + "\n", + "\n", + "from mrdna.readers.cadnano_segments import *\n", + "from cadnano.document import Document\n", + "import cadnano" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec7f1e22-6a95-41f9-8c23-7603cc165837", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "40d7cdcf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "json_data=read_json_file(\"test.json\")\n", + "part=decode_cadnano_part(json_data)\n", + "model=cadnano_part(part)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "4b3748a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found cadnano version 2 file\n" + ] + } + ], + "source": [ + "doc=Document()\n", + "cadnano.fileio.v2decode.decode(doc, json_data)\n", + "parts = [p for p in doc.getParts()]\n", + "part=parts[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "aa8a8cd9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Oligo_(0.1[38])_1328\t23\t'None\n", + "Oligo_(2.1[34])_9584\t35\t'None\n", + "Oligo_(1.1[36])_7488\t188\t'None\n", + "Oligo_(4.1[39])_4384\t33\t'None\n", + "Oligo_(5.0[9])_0240\t23\t'None\n", + "Oligo_(1.0[3])_8256\t37\t'None\n", + "Oligo_(3.0[0])_3296\t33\t'None\n", + "Oligo_(0.1[23])_9088\t21\t'None\n", + "VH0\n", + "\t <fwd_StrandSet(0)> \t [(5, 36)] \n", + "\t\t\t\t ['#0066cc']\n", + "\t <rev_StrandSet(0)> \t [(2, 20), (21, 23), (24, 27), (28, 38)] \n", + "\t\t\t\t ['#cc0000', '#b8056c', '#f74308', '#1700de']\n", + "VH1\n", + "\t <fwd_StrandSet(1)> \t [(3, 20), (21, 38)] \n", + "\t\t\t\t ['#cc0000', '#b8056c']\n", + "\t <rev_StrandSet(1)> \t [(5, 18), (19, 36)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "VH2\n", + "\t <fwd_StrandSet(2)> \t [(2, 18), (19, 32)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "\t <rev_StrandSet(2)> \t [(0, 34)] \n", + "\t\t\t\t ['#888888']\n", + "VH3\n", + "\t <fwd_StrandSet(3)> \t [(0, 20), (21, 34)] \n", + "\t\t\t\t ['#cc0000', '#888888']\n", + "\t <rev_StrandSet(3)> \t [(2, 15), (16, 32)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "VH4\n", + "\t <fwd_StrandSet(4)> \t [(9, 15), (16, 39)] \n", + "\t\t\t\t ['#0066cc', '#0066cc']\n", + "\t <rev_StrandSet(4)> \t [(9, 20), (21, 39)] \n", + "\t\t\t\t ['#cc0000', '#888888']\n", + "VH5\n", + "\t <fwd_StrandSet(5)> \t [(9, 27), (28, 39)] \n", + "\t\t\t\t ['#f74308', '#1700de']\n", + "\t <rev_StrandSet(5)> \t [(9, 39)] \n", + "\t\t\t\t ['#0066cc']\n" + ] + } + ], + "source": [ + "part.__dict__.keys()\n", + "\n", + "oligos = part.oligos()\n", + "for oligo in oligos:\n", + " print(\"{0}\\t{1}\\t\\'{2}\".format(oligo,\n", + " oligo.length(),\n", + " oligo.sequence()))\n", + "\n", + "vhs = list(part.getIdNums()) # convert set to list\n", + "for vh_id in vhs: # display first 3 vhs\n", + " fwd_ss, rev_ss = part.getStrandSets(vh_id)\n", + " print('VH{0}'.format(vh_id))\n", + " print('\\t', fwd_ss, '\\t', [s.idxs() for s in fwd_ss.strands()], '\\n\\t\\t\\t\\t',\n", + " [s.getColor() for s in fwd_ss.strands()])\n", + " print('\\t', rev_ss, '\\t', [s.idxs() for s in rev_ss.strands()], '\\n\\t\\t\\t\\t',\n", + " [s.getColor() for s in rev_ss.strands()])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e0cab15d", + "metadata": {}, + "outputs": [], + "source": [ + "strands5 = [o.strand5p() for o in part.oligos()]\n", + "strands3 = [o.strand3p() for o in part.oligos()]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "9811bf9a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__class__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__getattribute__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__slots__',\n", + " '__str__',\n", + " '__subclasshook__',\n", + " '__weakref__',\n", + " '_decrementLength',\n", + " '_incrementLength',\n", + " '_is_circular',\n", + " '_parent',\n", + " '_part',\n", + " '_props',\n", + " '_setColor',\n", + " '_setLength',\n", + " '_setLoop',\n", + " '_setProperty',\n", + " '_signals',\n", + " '_strand5p',\n", + " '_strandMergeUpdate',\n", + " '_strandSplitUpdate',\n", + " 'addToPart',\n", + " 'applyAbstractSequences',\n", + " 'applyColor',\n", + " 'applySequence',\n", + " 'applySequenceCMD',\n", + " 'clearAbstractSequences',\n", + " 'connect',\n", + " 'deleteLater',\n", + " 'destroy',\n", + " 'disconnect',\n", + " 'displayAbstractSequences',\n", + " 'dump',\n", + " 'editable_properties',\n", + " 'getAbsolutePositionAtLength',\n", + " 'getColor',\n", + " 'getModelProperties',\n", + " 'getName',\n", + " 'getNumberOfBasesToEachXover',\n", + " 'getOutlineProperties',\n", + " 'getProperty',\n", + " 'getStrandLengths',\n", + " 'isCircular',\n", + " 'length',\n", + " 'locString',\n", + " 'oligoPropertyChangedSignal',\n", + " 'oligoRemovedSignal',\n", + " 'oligoSelectedChangedSignal',\n", + " 'oligoSequenceAddedSignal',\n", + " 'oligoSequenceClearedSignal',\n", + " 'parent',\n", + " 'part',\n", + " 'refreshLength',\n", + " 'remove',\n", + " 'removeFromPart',\n", + " 'sequence',\n", + " 'sequenceExport',\n", + " 'setParent',\n", + " 'setPart',\n", + " 'setProperty',\n", + " 'setStrand5p',\n", + " 'shallowCopy',\n", + " 'shouldHighlight',\n", + " 'signals',\n", + " 'splitAtAbsoluteLengths',\n", + " 'strand3p',\n", + " 'strand5p',\n", + " 'undoStack']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "L=[o for o in part.oligos()]\n", + "dir(L[2])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "86403b4b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(dict, {0: {}, 1: {}, 2: {}, 3: {}, 4: {}, 5: {}})" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "part.insertions()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5c7cea80", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'NaPart1',\n", + " 'color': '#0066cc',\n", + " 'is_visible': True,\n", + " 'active_phos': None,\n", + " 'crossover_span_angle': 45,\n", + " 'max_vhelix_length': 42,\n", + " 'neighbor_active_angle': '',\n", + " 'grid_type': <GridEnum.HONEYCOMB: 2>,\n", + " 'virtual_helix_order': [0, 1, 2, 3, 4, 5],\n", + " 'is_lattice': True,\n", + " <GridEnum.HONEYCOMB: 2>: <GridEnum.NONE: 0>}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "part.getModelProperties()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "136dcf08", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _\n", + " _____ ___ _| |___ ___\n", + "| | _| . | | .'|\n", + "|_|_|_|_| |___|_|_|__,| v1.0a.dev74 \n", + "it/its\n", + "\n" + ] + } + ], + "source": [ + "import pdb\n", + "import numpy as np\n", + "import os,sys\n", + "import scipy\n", + "\n", + "from mrdna import logger, devlogger\n", + "from mrdna.segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment\n", + "from mrdna.arbdmodel.coords import quaternion_from_matrix, rotationAboutAxis, quaternion_slerp\n", + "from mrdna import get_resource_path\n", + "\n", + "ref_stack_position = np.array((-2.41851735, -0.259761333, 3.39999978))\n", + "\n", + "def _three_prime_list_to_five_prime(three_prime):\n", + " five_prime = -np.ones(three_prime.shape, dtype=int)\n", + " has_three_prime = np.where(three_prime >= 0)[0]\n", + " five_prime[three_prime[has_three_prime]] = has_three_prime\n", + " return five_prime \n", + "def _primes_list_to_strands(three_prime, five_prime):\n", + " five_prime_ends = np.where(five_prime < 0)[0]\n", + " strands = []\n", + " strand_is_circular = []\n", + " \n", + " idx_to_strand = -np.ones(three_prime.shape, dtype=int)\n", + "\n", + " def build_strand(nt_idx, conditional):\n", + " strand = [nt_idx]\n", + " idx_to_strand[nt_idx] = len(strands)\n", + " while conditional(nt_idx):\n", + " nt_idx = three_prime[nt_idx]\n", + " strand.append(nt_idx)\n", + " idx_to_strand[nt_idx] = len(strands)\n", + " strands.append( np.array(strand, dtype=int) )\n", + "\n", + " for nt_idx in five_prime_ends:\n", + " build_strand(nt_idx,\n", + " lambda nt: three_prime[nt] >= 0)\n", + " strand_is_circular.append(False)\n", + "\n", + " while True:\n", + " ## print(\"WARNING: working on circular strand {}\".format(len(strands)))\n", + " ids = np.where(idx_to_strand < 0)[0]\n", + " if len(ids) == 0: break\n", + " build_strand(ids[0],\n", + " lambda nt: three_prime[nt] >= 0 and \\\n", + " idx_to_strand[three_prime[nt]] < 0)\n", + " strand_is_circular.append(True)\n", + "\n", + " return strands, strand_is_circular\n", + "\n", + "def find_stacks(centers, transforms):\n", + "\n", + " ## Find orientation and center of each nucleotide\n", + " expected_stack_positions = []\n", + " for R,c in zip(transforms,centers):\n", + " expected_stack_positions.append( c + ref_stack_position.dot(R) )\n", + "\n", + " expected_stack_positions = np.array(expected_stack_positions, dtype=np.float32)\n", + "\n", + " dists = scipy.spatial.distance_matrix(expected_stack_positions, centers)\n", + " dists = dists + 5*np.eye(len(dists))\n", + " idx1, idx2 = np.where(dists < 3.5)\n", + "\n", + " ## Convert distances to stacks\n", + " stacks_above = -np.ones(len(centers), dtype=int)\n", + " _z = np.array((0,0,1))\n", + " for i in np.unique(idx1):\n", + " js = idx2[ idx1 == i ]\n", + " with np.errstate(divide='ignore',invalid='ignore'):\n", + " angles = [np.arccos( transforms[j].T.dot( transforms[i].dot(_z) ).dot( _z ) ) for j in js]\n", + " angles = np.array( angles )\n", + " tmp = np.argmin(dists[i][js] + 1.0*angles)\n", + " j = js[tmp]\n", + " stacks_above[i] = j\n", + "\n", + " return stacks_above\n", + "\n", + "def basepairs_and_stacks_to_helixmap(basepairs,stacks_above):\n", + "\n", + " helixmap = -np.ones(basepairs.shape, dtype=int)\n", + " helixrank = -np.ones(basepairs.shape)\n", + " is_fwd = np.ones(basepairs.shape, dtype=int)\n", + " \n", + " ## Remove stacks with nts lacking a basepairs\n", + " nobp = np.where(basepairs < 0)[0]\n", + " stacks_above[nobp] = -1\n", + " stacks_with_nobp = np.in1d(stacks_above, nobp)\n", + " stacks_above[stacks_with_nobp] = -1\n", + "\n", + " end_ids = np.where( (stacks_above < 0)*(basepairs >= 0) )[0]\n", + "\n", + " hid = 0\n", + " for end in end_ids:\n", + " if helixmap[end] >= 0:\n", + " continue\n", + " rank = 0\n", + " nt = basepairs[end]\n", + " bp = basepairs[nt]\n", + " assert( bp == end )\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " continue\n", + " # assert(helixmap[nt] == -1)\n", + " # assert(helixmap[bp] == -1)\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " rank +=1\n", + "\n", + " _tmp = [(nt,bp)]\n", + " \n", + " while stacks_above[nt] >= 0:\n", + " nt = stacks_above[nt]\n", + " if basepairs[nt] < 0: break\n", + " bp = basepairs[nt]\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " break\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " _tmp.append((nt,bp))\n", + " rank +=1\n", + "\n", + " hid += 1\n", + "\n", + " ## Create \"helix\" for each circular segment\n", + " intrahelical = []\n", + " processed = set()\n", + " unclaimed_bases = np.where( (basepairs >= 0)*(helixmap == -1) )[0]\n", + " for nt0 in unclaimed_bases:\n", + " if nt0 in processed: continue\n", + "\n", + " nt = nt0\n", + " all_nts = [nt]\n", + "\n", + " rank = 0\n", + " nt = nt0\n", + " bp = basepairs[nt]\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed cylic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " continue\n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " rank +=1\n", + " processed.add(nt)\n", + " processed.add(bp)\n", + "\n", + " counter = 0\n", + " while stacks_above[nt] >= 0:\n", + " lastnt = nt\n", + " nt = stacks_above[nt]\n", + " bp = basepairs[nt]\n", + " if nt == nt0 or nt == basepairs[nt0]:\n", + " intrahelical.append((lastnt,nt0))\n", + " break\n", + " \n", + " assert( bp >= 0 )\n", + " if helixmap[nt] >= 0 or helixmap[bp] >= 0:\n", + " logger.warning(f'Ill-formed cyclic helix: problematic basepair or stacking data near nucleotide {nt} or {bp}... skipping')\n", + " break\n", + " \n", + " helixmap[nt] = helixmap[bp] = hid\n", + " helixrank[nt] = helixrank[bp] = rank\n", + " is_fwd[bp] = 0\n", + " processed.add(nt)\n", + " processed.add(bp)\n", + " rank +=1\n", + " hid += 1\n", + "\n", + " return helixmap, helixrank, is_fwd, intrahelical\n", + "\n", + "\n", + "def set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation=None):\n", + " maxrank = np.max( hrank[hmap==hid] )\n", + " if maxrank == 0:\n", + " ids = np.where((hmap == hid))[0]\n", + " pos = np.mean( [coordinate[r,:] for r in ids ], axis=0 )\n", + " coords = [pos,pos]\n", + " contours = [0,1]\n", + " if orientation is not None:\n", + " ids = np.where((hmap == hid) * fwd)[0]\n", + " assert( len(ids) == 1 )\n", + " q = quaternion_from_matrix( orientation[ids[0]] )\n", + " quats = [q, q]\n", + " coords[-1] = pos + orientation[ids[0]].dot(np.array((0,0,1)))\n", + "\n", + " else:\n", + " coords,contours,quats = [[],[],[]]\n", + " last_q = None\n", + " for rank in range(int(maxrank)+1):\n", + " ids = np.where((hmap == hid) * (hrank == rank))[0]\n", + " \n", + " coords.append(np.mean( [coordinate[r,:] for r in ids ], axis=0 ))\n", + " contours.append( float(rank+0.5)/(maxrank+1) )\n", + " if orientation is not None:\n", + " ids = np.where((hmap == hid) * (hrank == rank) * fwd)[0]\n", + " assert(len(ids) == 1)\n", + " q = quaternion_from_matrix( orientation[ids[0]] )\n", + "\n", + " if last_q is not None and last_q.dot(q) < 0:\n", + " q = -q\n", + "\n", + " ## Average quaterion with reverse direction\n", + " bp = basepair[ids[0]]\n", + " if bp >= 0:\n", + " bp_o = orientation[bp].dot(rotationAboutAxis(np.array((1,0,0)),180))\n", + " q2 = quaternion_from_matrix( bp_o )\n", + " if q.dot(q2) < 0:\n", + " q2 = -q2\n", + "\n", + " ## probably good enough, but slerp is better: q = (q + q2)*0.5\n", + " q = quaternion_slerp(q,q2,0.5)\n", + "\n", + " quats.append(q)\n", + " last_q = q\n", + "\n", + " coords = np.array(coords)\n", + " seg.set_splines(contours,coords)\n", + " if orientation is not None:\n", + " quats = np.array(quats)\n", + " seg.set_orientation_splines(contours,quats)\n", + "\n", + " seg.start_position = coords[0,:]\n", + " seg.end_position = coords[-1,:]\n", + "\n", + "\n", + "def model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,\n", + " sequence=None, orientation=None,\n", + " max_basepairs_per_bead = 5,\n", + " max_nucleotides_per_bead = 5,\n", + " local_twist = False,\n", + " dimensions=(5000,5000,5000),\n", + " **model_parameters):\n", + " \"\"\" \n", + " Creates a SegmentModel object from lists of each nucleotide's\n", + " basepair, its stack (on 3' side) and its 3'-connected nucleotide\n", + "\n", + " The first argument should be an N-by-3 numpy array containing the\n", + " coordinate of each nucleotide, where N is the number of\n", + " nucleotides. The following three arguments should be integer lists\n", + " where the i-th element corresponds to the i-th nucleotide; the\n", + " list element should the integer index of the corresponding\n", + " basepaired / stacked / phosphodiester-bonded nucleotide. If there\n", + " is no such nucleotide, the value should be -1.\n", + "\n", + " Args:\n", + " basepair: List of each nucleotide's basepair's index\n", + " stack: List containing index of the nucleotide stacked on the 3' of each nucleotide\n", + " three_prime: List of each nucleotide's the 3' end of each nucleotide\n", + "\n", + " Returns:\n", + " SegmentModel\n", + " \"\"\"\n", + "\n", + " \"\"\" Validate Input \"\"\"\n", + " inputs = (basepair,three_prime)\n", + " try:\n", + " basepair,three_prime = [np.array(a,dtype=int) for a in inputs]\n", + " except:\n", + " raise TypeError(\"One or more of the input lists could not be converted into a numpy array\")\n", + " inputs = (basepair,three_prime)\n", + " coordinate = np.array(coordinate)\n", + "\n", + " if np.any( [len(a.shape) > 1 for a in inputs] ):\n", + " raise ValueError(\"One or more of the input lists has the wrong dimensionality\")\n", + "\n", + " if len(coordinate.shape) != 2:\n", + " raise ValueError(\"Coordinate array has the wrong dimensionality\")\n", + "\n", + " inputs = (coordinate,basepair,three_prime)\n", + " if not np.all(np.diff([len(a) for a in inputs]) == 0):\n", + " raise ValueError(\"Inputs are not the same length\")\n", + " \n", + " num_nt = len(basepair)\n", + " if sequence is not None and len(sequence) != num_nt:\n", + " raise ValueError(\"The 'sequence' parameter is the wrong length {} != {}\".format(len(sequence),num_nt))\n", + "\n", + " if orientation is not None:\n", + " orientation = np.array(orientation)\n", + " if len(orientation.shape) != 3:\n", + " raise ValueError(\"The 'orientation' array has the wrong dimensionality (should be Nx3x3)\")\n", + " if orientation.shape != (num_nt,3,3):\n", + " raise ValueError(\"The 'orientation' array is not properly formatted\")\n", + "\n", + " if stack is None:\n", + " if orientation is not None:\n", + " stack = find_stacks(coordinate, orientation)\n", + " else:\n", + " ## Guess stacking based on 3' connectivity\n", + " stack = np.array(three_prime,dtype=int) # Assume nts on 3' ends are stacked\n", + " _stack_below = _three_prime_list_to_five_prime(stack)\n", + " _has_bp = (basepair >= 0)\n", + " _nostack = np.where( (stack == -1)*_has_bp )[0]\n", + " _has_stack_below = _stack_below[basepair[_nostack]] >= 0\n", + " _nostack2 = _nostack[_has_stack_below]\n", + " stack[_nostack2] = basepair[_stack_below[basepair[_nostack2]]]\n", + "\n", + " else:\n", + " try:\n", + " stack = np.array(stack,dtype=int)\n", + " except:\n", + " raise TypeError(\"The 'stack' array could not be converted into a numpy integer array\")\n", + "\n", + " if len(stack.shape) != 1:\n", + " raise ValueError(\"The 'stack' array has the wrong dimensionality\")\n", + "\n", + " if len(stack) != num_nt:\n", + " raise ValueError(\"The length of the 'stack' array does not match other inputs\")\n", + "\n", + " bps = basepair # alias\n", + "\n", + " \"\"\" Fix stacks: require that the stack of a bp of a base's stack is its bp \"\"\"\n", + " _has_bp = (bps >= 0)\n", + " _has_stack = (stack >= 0)\n", + " _stack_has_basepair = (bps[stack] >= 0) * _has_stack\n", + " stack = np.where( (stack[bps[stack]] == bps) * _has_bp * _has_stack * _has_bp,\n", + " stack, -np.ones(len(stack),dtype=int) )\n", + "\n", + " five_prime = _three_prime_list_to_five_prime(three_prime)\n", + "\n", + " \"\"\" Build map of dsDNA helices and strands \"\"\"\n", + " hmap,hrank,fwd,intrahelical = basepairs_and_stacks_to_helixmap(bps,stack)\n", + " double_stranded_helices = np.unique(hmap[hmap >= 0]) \n", + " strands, strand_is_circular = _primes_list_to_strands(three_prime, five_prime)\n", + "\n", + " \"\"\" Add ssDNA to hmap \"\"\"\n", + " if len(double_stranded_helices) > 0:\n", + " hid = double_stranded_helices[-1]+1\n", + " else:\n", + " hid = 0\n", + " ss_residues = hmap < 0\n", + " #\n", + " if np.any(bps[ss_residues] != -1):\n", + " logger.warning(f'{np.sum(bps[ss_residues] != -1)} ssDNA nucleotides appear to have basepairs... ignoring')\n", + " \n", + " for s,c in zip(strands, strand_is_circular):\n", + " strand_segment_ends = [i for i in np.where( np.diff(hmap[s]) != 0 )[0]] + [len(s)-1]\n", + " seg_start = 0\n", + " for i in strand_segment_ends:\n", + " if hmap[s[i]] < 0:\n", + " ## Found single-stranded segment\n", + " ids = s[seg_start:i+1]\n", + " assert( np.all(hmap[ids] == -1) )\n", + " hmap[ids] = hid\n", + " hrank[ids] = np.arange(i+1-seg_start)\n", + " hid+=1\n", + " seg_start = i+1\n", + "\n", + " if len(double_stranded_helices) > 0:\n", + " single_stranded_helices = np.arange(double_stranded_helices[-1]+1,hid)\n", + " else:\n", + " single_stranded_helices = np.arange(hid)\n", + "\n", + " ## Create double-stranded segments\n", + " doubleSegments = []\n", + " for hid in double_stranded_helices:\n", + " seg = DoubleStrandedSegment(name=str(hid),\n", + " num_bp = np.sum(hmap==hid)//2)\n", + " set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)\n", + "\n", + " assert(hid == len(doubleSegments))\n", + " doubleSegments.append(seg)\n", + "\n", + " ## Create single-stranded segments\n", + " singleSegments = []\n", + " for hid in single_stranded_helices:\n", + " seg = SingleStrandedSegment(name=str(hid),\n", + " num_nt = np.sum(hmap==hid))\n", + " set_splines(seg, coordinate, hid, hmap, hrank, fwd, basepair, orientation)\n", + "\n", + " assert(hid == len(doubleSegments) + len(singleSegments))\n", + " singleSegments.append(seg)\n", + "\n", + " ## Find crossovers and 5prime/3prime ends\n", + " crossovers,prime5,prime3 = [[],[],[]]\n", + " for s,c in zip(strands,strand_is_circular):\n", + " tmp = np.where(np.diff(hmap[s]) != 0)[0]\n", + " for i in tmp:\n", + " crossovers.append( (s[i],s[i+1]) )\n", + " if c:\n", + " if hmap[s[-1]] != hmap[s[0]]:\n", + " crossovers.append( (s[-1],s[0]) )\n", + " else:\n", + " prime5.append(s[0])\n", + " prime3.append(s[-1])\n", + "\n", + " ## Add connections\n", + " allSegments = doubleSegments+singleSegments\n", + "\n", + " for r1,r2 in crossovers:\n", + " seg1,seg2 = [allSegments[hmap[i]] for i in (r1,r2)]\n", + " nt1,nt2 = [hrank[i] for i in (r1,r2)]\n", + " f1,f2 = [fwd[i] for i in (r1,r2)]\n", + "\n", + " ## Handle connections at the ends\n", + " is_terminal1 = (nt1,f1) in ((0,0),(seg1.num_nt-1,1))\n", + " is_terminal2 = (nt2,f2) in ((0,1),(seg2.num_nt-1,0))\n", + "\n", + " print(seg1,seg2, r1, r2, is_terminal1, is_terminal2)\n", + " if is_terminal1 or is_terminal2:\n", + " \"\"\" Ensure that we don't have three-way dsDNA junctions \"\"\"\n", + " if is_terminal1 and (bps[r1] >= 0) and (five_prime[bps[r1]] >= 0) and (three_prime[r1] >= 0):\n", + " if (bps[five_prime[bps[r1]]] >= 0) and (bps[three_prime[r1]] >= 0):\n", + " # is_terminal1 = (three_prime[r1] == bps[five_prime[bps[r1]]])\n", + " is_terminal1 = hmap[five_prime[bps[r1]]] == hmap[three_prime[r1]]\n", + " if is_terminal2 and (bps[r2] >= 0) and (three_prime[bps[r2]] >= 0) and (five_prime[r2] >= 0):\n", + " if (bps[three_prime[bps[r2]]] >= 0) and (bps[five_prime[r2]] >= 0):\n", + " # is_terminal2 = (five_prime[r2] == bps[three_prime[bps[r2]]])\n", + " is_terminal2 = hmap[three_prime[bps[r2]]] == hmap[five_prime[r2]]\n", + " \n", + " \"\"\" Place connection \"\"\"\n", + " if is_terminal1 and is_terminal2:\n", + " end1 = seg1.end3 if f1 else seg1.start3\n", + " end2 = seg2.start5 if f2 else seg2.end5\n", + " seg1._connect_ends( end1, end2, type_='intrahelical')\n", + " else:\n", + " seg1.add_crossover(nt1,seg2,nt2,[f1,f2],type_=\"terminal_crossover\")\n", + " else:\n", + " seg1.add_crossover(nt1,seg2,nt2,[f1,f2])\n", + "\n", + " ## Add 5prime/3prime ends\n", + " for r in prime5:\n", + " seg = allSegments[hmap[r]]\n", + " seg.add_5prime(hrank[r],fwd[r])\n", + " for r in prime3:\n", + " seg = allSegments[hmap[r]]\n", + " seg.add_3prime(hrank[r],fwd[r])\n", + "\n", + " ## Add intrahelical connections to circular helical sections\n", + " for nt0,nt1 in intrahelical:\n", + " seg = allSegments[hmap[nt0]]\n", + " assert( seg is allSegments[hmap[nt1]] )\n", + " if three_prime[nt0] >= 0:\n", + " if hmap[nt0] == hmap[three_prime[nt0]]:\n", + " seg.connect_end3(seg.start5)\n", + "\n", + " bp0,bp1 = [bps[nt] for nt in (nt0,nt1)]\n", + " if three_prime[bp1] >= 0:\n", + " if hmap[bp1] == hmap[three_prime[bp1]]:\n", + " seg.connect_start3(seg.end5)\n", + "\n", + " ## Assign sequence\n", + " if sequence is not None:\n", + " for hid in range(len(allSegments)):\n", + " resids = np.where( (hmap==hid)*(fwd==1) )[0]\n", + " s = allSegments[hid]\n", + " s.sequence = [sequence[r] for r in sorted(resids,key=lambda x: hrank[x])]\n", + "\n", + "\n", + " ## Build model\n", + " model = SegmentModel( allSegments,\n", + " max_basepairs_per_bead = max_basepairs_per_bead,\n", + " max_nucleotides_per_bead = max_nucleotides_per_bead,\n", + " local_twist = local_twist,\n", + " dimensions = dimensions,\n", + " **model_parameters )\n", + "\n", + "\n", + " model._reader_list_coordinates = coordinate\n", + " model._reader_list_basepair = basepair\n", + " model._reader_list_stack = stack\n", + " model._reader_list_three_prime = three_prime\n", + " model._reader_list_five_prime = five_prime\n", + " model._reader_list_sequence = sequence\n", + " model._reader_list_orientation = orientation\n", + " model._reader_list_hmap = hmap\n", + " model._reader_list_fwd = fwd\n", + " model._reader_list_hrank = hrank\n", + "\n", + " if sequence is None:\n", + " for s in model.segments:\n", + " s.randomize_unset_sequence()\n", + "\n", + " return model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "88f7d20e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<DoubleStrandedSegment'> 1[1]> <DoubleStrandedSegment'> 0[1]> 5 3 True True\n", + "<SingleStrandedSegment'> 2[2]> <DoubleStrandedSegment'> 0[1]> 1 2 True True\n", + "<DoubleStrandedSegment'> 0[1]> <DoubleStrandedSegment'> 1[1]> 2 4 True True\n", + "<DoubleStrandedSegment'> 1[1]> <SingleStrandedSegment'> 3[1]> 4 6 True True\n", + "<SingleStrandedSegment'> 3[1]> <SingleStrandedSegment'> 2[2]> 6 0 True True\n" + ] + } + ], + "source": [ + "coordinate = [(0,0,3.4*i) for i in range(7)]\n", + "three_prime = [ 1, 2, 4,-1, 6, 3, 0]\n", + "basepair = [-1,-1, 3, 2, 5, 4,-1]\n", + "stack = [-1,-1, -1,-1,-1, -1,-1]\n", + "for i in [3,5]:\n", + " coordinate[i] = (1,0,3.4*i)\n", + "\n", + "model = model_from_basepair_stack_3prime(coordinate, basepair, stack, three_prime,\n", + " max_basepairs_per_bead=1,\n", + " max_nucleotides_per_bead=1,\n", + " local_twist=False)\n", + "model.writePsf(\"list.psf\")\n", + "model.writePdb(\"list.pdb\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "69d29f1e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[<SegmentParticle DNA on <DoubleStrandedSegment'> 0[1]>[1.00]>], [<SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[0.00]>, <SegmentParticle DNA on <DoubleStrandedSegment'> 1[1]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.00]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[0.50]>, <SegmentParticle NAS on <SingleStrandedSegment'> 2[2]>[1.00]>], [<SegmentParticle NAS on <SingleStrandedSegment'> 3[1]>[0.50]>]]\n", + "[[<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>], [<Connection <Location 1.end3[0,on_fwd_strand]>--intrahelical--<Location 0.end5[0,on_fwd_strand]>]>, <Connection <Location 0.end3[0,on_rev_strand]>--intrahelical--<Location 1.end5[0,on_rev_strand]>]>, <Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>], [<Connection <Location 2.end3[1,on_fwd_strand]>--sscrossover--<Location 0.end5[0,on_rev_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>], [<Connection <Location 1.end3[0,on_rev_strand]>--intrahelical--<Location 3.end5[0,on_fwd_strand]>]>, <Connection <Location 3.end3[0,on_fwd_strand]>--intrahelical--<Location 2.end5[0,on_fwd_strand]>]>]]\n" + ] + } + ], + "source": [ + "print([i.children for i in model.children])\n", + "print([i.connections for i in model.children])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e103e8b4", + "metadata": {}, + "outputs": [], + "source": [ + "s=model.children[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a30b582d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.segname" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "dcd8bf8a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(0, 0, 0.0),\n", + " (0, 0, 3.4),\n", + " (0, 0, 6.8),\n", + " (1, 0, 10.2),\n", + " (0, 0, 13.6),\n", + " (1, 0, 17.0),\n", + " (0, 0, 20.4)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coordinate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf20c9ef", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}