From c3f9b21550f40bb4bcabfd197b65d7997ae37ca6 Mon Sep 17 00:00:00 2001 From: pinyili2 <pinyili2@illinois.edu> Date: Fri, 2 Aug 2024 20:24:15 -0500 Subject: [PATCH] cad --- mrdna/readers/cadnano_segments.py | 319 +++++++++++++++--------------- 1 file changed, 159 insertions(+), 160 deletions(-) diff --git a/mrdna/readers/cadnano_segments.py b/mrdna/readers/cadnano_segments.py index 47cf148..b39bd2e 100644 --- a/mrdna/readers/cadnano_segments.py +++ b/mrdna/readers/cadnano_segments.py @@ -17,7 +17,166 @@ from ..model.dna_sequence import m13 as m13seq ## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix) ## - circular constructs +def combineRegionLists(loHi1,loHi2,intersect=False): + + """Combines two lists of (lo,hi) pairs specifying integer + regions a single list of regions. """ + + ## Validate input + for l in (loHi1,loHi2): + ## Assert each region in lists is sorted + for pair in l: + assert(len(pair) == 2) + assert(pair[0] <= pair[1]) + + if len(loHi1) == 0: + if intersect: + return [] + else: + return loHi2 + if len(loHi2) == 0: + if intersect: + return [] + else: + return loHi1 + + ## Break input into lists of compact regions + compactRegions1,compactRegions2 = [[],[]] + for compactRegions,loHi in zip( + [compactRegions1,compactRegions2], + [loHi1,loHi2]): + tmp = [] + lastHi = loHi[0][0]-1 + for lo,hi in loHi: + if lo-1 != lastHi: + compactRegions.append(tmp) + tmp = [] + tmp.append((lo,hi)) + lastHi = hi + if len(tmp) > 0: + compactRegions.append(tmp) + + ## Build result + result = [] + region = [] + i,j = [0,0] + compactRegions1.append([[1e10]]) + compactRegions2.append([[1e10]]) + while i < len(compactRegions1)-1 or j < len(compactRegions2)-1: + cr1 = compactRegions1[i] + cr2 = compactRegions2[j] + + ## initialize region + if len(region) == 0: + if cr1[0][0] <= cr2[0][0]: + region = cr1 + i += 1 + continue + else: + region = cr2 + j += 1 + continue + + if region[-1][-1] >= cr1[0][0]: + region = combineCompactRegionLists(region, cr1, intersect=False) + i+=1 + elif region[-1][-1] >= cr2[0][0]: + region = combineCompactRegionLists(region, cr2, intersect=False) + j+=1 + else: + result.extend(region) + region = [] + + assert( len(region) > 0 ) + result.extend(region) + result = sorted(result) + + # print("loHi1:",loHi1) + # print("loHi2:",loHi2) + # print(result,"\n") + + if intersect: + lo = max( [loHi1[0][0], loHi2[0][0]] ) + hi = min( [loHi1[-1][1], loHi2[-1][1]] ) + result = [r for r in result if r[0] >= lo and r[1] <= hi] + + return result + +def combineCompactRegionLists(loHi1,loHi2,intersect=False): + + """Combines two lists of (lo,hi) pairs specifying regions within a + compact integer set into a single list of regions. + + examples: + loHi1 = [[0,4],[5,7]] + loHi2 = [[2,4],[5,9]] + out = [(0, 1), (2, 4), (5, 7), (8, 9)] + + loHi1 = [[0,3],[5,7]] + loHi2 = [[2,4],[5,9]] + out = [(0, 1), (2, 3), (4, 4), (5, 7), (8, 9)] + """ + + ## Validate input + for l in (loHi1,loHi2): + ## Assert each region in lists is sorted + for pair in l: + assert(len(pair) == 2) + assert(pair[0] <= pair[1]) + ## Assert lists are compact + for pair1,pair2 in zip(l[::2],l[1::2]): + assert(pair1[1]+1 == pair2[0]) + + if len(loHi1) == 0: + if intersect: + return [] + else: + return loHi2 + if len(loHi2) == 0: + if intersect: + return [] + else: + return loHi1 + ## Find the ends of the region + lo = min( [loHi1[0][0], loHi2[0][0]] ) + hi = max( [loHi1[-1][1], loHi2[-1][1]] ) + + ## Make a list of indices where each region will be split + splitAfter = [] + for l,h in loHi2: + if l != lo: + splitAfter.append(l-1) + if h != hi: + splitAfter.append(h) + + for l,h in loHi1: + if l != lo: + splitAfter.append(l-1) + if h != hi: + splitAfter.append(h) + splitAfter = sorted(list(set(splitAfter))) + + # print("splitAfter:",splitAfter) + + split=[] + last = -2 + for s in splitAfter: + split.append(s) + last = s + + # print("split:",split) + returnList = [(i+1,j) if i != j else (i,j) for i,j in zip([lo-1]+split,split+[hi])] + + if intersect: + lo = max( [loHi1[0][0], loHi2[0][0]] ) + hi = min( [loHi1[-1][1], loHi2[-1][1]] ) + returnList = [r for r in returnList if r[0] >= lo and r[1] <= hi] + + # print("loHi1:",loHi1) + # print("loHi2:",loHi2) + # print(returnList,"\n") + return returnList class cadnano_part(SegmentModel): def __init__(self, part, @@ -541,166 +700,6 @@ def read_model(json_data, sequence=None, fill_sequence='T', **kwargs): # pynvml.nvmlShutdown() # gpus = [0,1,2] # print(gpus) -def combineRegionLists(loHi1,loHi2,intersect=False): - - """Combines two lists of (lo,hi) pairs specifying integer - regions a single list of regions. """ - - ## Validate input - for l in (loHi1,loHi2): - ## Assert each region in lists is sorted - for pair in l: - assert(len(pair) == 2) - assert(pair[0] <= pair[1]) - - if len(loHi1) == 0: - if intersect: - return [] - else: - return loHi2 - if len(loHi2) == 0: - if intersect: - return [] - else: - return loHi1 - - ## Break input into lists of compact regions - compactRegions1,compactRegions2 = [[],[]] - for compactRegions,loHi in zip( - [compactRegions1,compactRegions2], - [loHi1,loHi2]): - tmp = [] - lastHi = loHi[0][0]-1 - for lo,hi in loHi: - if lo-1 != lastHi: - compactRegions.append(tmp) - tmp = [] - tmp.append((lo,hi)) - lastHi = hi - if len(tmp) > 0: - compactRegions.append(tmp) - - ## Build result - result = [] - region = [] - i,j = [0,0] - compactRegions1.append([[1e10]]) - compactRegions2.append([[1e10]]) - while i < len(compactRegions1)-1 or j < len(compactRegions2)-1: - cr1 = compactRegions1[i] - cr2 = compactRegions2[j] - - ## initialize region - if len(region) == 0: - if cr1[0][0] <= cr2[0][0]: - region = cr1 - i += 1 - continue - else: - region = cr2 - j += 1 - continue - - if region[-1][-1] >= cr1[0][0]: - region = combineCompactRegionLists(region, cr1, intersect=False) - i+=1 - elif region[-1][-1] >= cr2[0][0]: - region = combineCompactRegionLists(region, cr2, intersect=False) - j+=1 - else: - result.extend(region) - region = [] - - assert( len(region) > 0 ) - result.extend(region) - result = sorted(result) - - # print("loHi1:",loHi1) - # print("loHi2:",loHi2) - # print(result,"\n") - - if intersect: - lo = max( [loHi1[0][0], loHi2[0][0]] ) - hi = min( [loHi1[-1][1], loHi2[-1][1]] ) - result = [r for r in result if r[0] >= lo and r[1] <= hi] - - return result - -def combineCompactRegionLists(loHi1,loHi2,intersect=False): - - """Combines two lists of (lo,hi) pairs specifying regions within a - compact integer set into a single list of regions. - - examples: - loHi1 = [[0,4],[5,7]] - loHi2 = [[2,4],[5,9]] - out = [(0, 1), (2, 4), (5, 7), (8, 9)] - - loHi1 = [[0,3],[5,7]] - loHi2 = [[2,4],[5,9]] - out = [(0, 1), (2, 3), (4, 4), (5, 7), (8, 9)] - """ - - ## Validate input - for l in (loHi1,loHi2): - ## Assert each region in lists is sorted - for pair in l: - assert(len(pair) == 2) - assert(pair[0] <= pair[1]) - ## Assert lists are compact - for pair1,pair2 in zip(l[::2],l[1::2]): - assert(pair1[1]+1 == pair2[0]) - - if len(loHi1) == 0: - if intersect: - return [] - else: - return loHi2 - if len(loHi2) == 0: - if intersect: - return [] - else: - return loHi1 - - ## Find the ends of the region - lo = min( [loHi1[0][0], loHi2[0][0]] ) - hi = max( [loHi1[-1][1], loHi2[-1][1]] ) - - ## Make a list of indices where each region will be split - splitAfter = [] - for l,h in loHi2: - if l != lo: - splitAfter.append(l-1) - if h != hi: - splitAfter.append(h) - - for l,h in loHi1: - if l != lo: - splitAfter.append(l-1) - if h != hi: - splitAfter.append(h) - splitAfter = sorted(list(set(splitAfter))) - - # print("splitAfter:",splitAfter) - - split=[] - last = -2 - for s in splitAfter: - split.append(s) - last = s - - # print("split:",split) - returnList = [(i+1,j) if i != j else (i,j) for i,j in zip([lo-1]+split,split+[hi])] - - if intersect: - lo = max( [loHi1[0][0], loHi2[0][0]] ) - hi = min( [loHi1[-1][1], loHi2[-1][1]] ) - returnList = [r for r in returnList if r[0] >= lo and r[1] <= hi] - - # print("loHi1:",loHi1) - # print("loHi2:",loHi2) - # print(returnList,"\n") - return returnList if __name__ == '__main__': loHi1 = [[0,4],[5,7]] -- GitLab