ALPHABET_UNMOD = { "A": 1, "C": 2, "D": 3, "E": 4, "F": 5, "G": 6, "H": 7, "I": 8, "K": 9, "L": 10, "M": 11, "N": 12, "P": 13, "Q": 14, "R": 15, "S": 16, "T": 17, "V": 18, "W": 19, "Y": 20, } # trypsin cut after K or R (if not followed by P) def cut(seq, format): cuts = [] l = len(seq) if format == 'alphabetical': for i in range(l): if seq[i] == 'R' or seq[i] == 'K': if i < l - 1 and seq[i + 1] != 'P': cuts.append(i + 1) if format == 'numerical': for i in range(l): if seq[i] == 15 or seq[i] == 9: if i < l - 1 and seq[i + 1] != 13: cuts.append(i + 1) return cuts def cut_with_ind(seq, ind_list): l = [] size = len(seq) ind_list.append(size) for i in range(len(ind_list) - 1): if i == 0: l.append(seq[:ind_list[i]]) l.append(seq[ind_list[i]:ind_list[i + 1]]) return l def digest(seq, format): ind = cut(seq, format) return cut_with_ind(seq, ind) res = digest('MNPLLILTFVAAALAAPFDDDDKIVGGYNCEENSVPYQVSLNSGYHFCGGSLINEQWVVSAGHCYKSRIQVRLGEHNIEVLEGNEQFINAAKIIRHPQYDRKTLNNDIMLIKLSSRAVINARVSTISLPTAPPATGTKCLISGWGNTASSGADYPDELQCLDAPVLSQAKCEASYPGKITSNMFCVGFLEGGKDSCQGDSGGPVVCNGQLQGVVSWGDGCAQKNKPGVYTKVYNYVKWIKNTIAANS','alphabetical')