Skip to content
Snippets Groups Projects
Commit 48db0218 authored by Schneider Leo's avatar Schneider Leo
Browse files

ref image genration

parent 67873613
No related branches found
No related tags found
No related merge requests found
...@@ -11,13 +11,13 @@ def load_lib(path): ...@@ -11,13 +11,13 @@ def load_lib(path):
return table return table
if __name__ =='__main__':
df1 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet')
df2 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033499_2025_03_14.predicted.parquet')
df1 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet') set1 = set(df1['Stripped.Sequence'].to_list())
df2 = load_lib('fasta/steigerwaltii variants/uniparc_proteome_UP000033499_2025_03_14.predicted.parquet') set2 = set(df2['Stripped.Sequence'].to_list())
set1 = set(df1['Stripped.Sequence'].to_list()) venn2((set1, set2), ('Group1', 'Group2'))
set2 = set(df2['Stripped.Sequence'].to_list()) plt.show()
plt.savefig('fasta_similarity_diann.png')
venn2((set1, set2), ('Group1', 'Group2')) \ No newline at end of file
plt.show()
plt.savefig('fasta_similarity_diann.png')
\ No newline at end of file
#TODO REFAIRE UN DATASET https://discuss.pytorch.org/t/upload-a-customize-data-set-for-multi-regression-task/43413?u=ptrblck
"""1er methode load 1 image pour 1 ref
2eme methode : load 1 image et toutes les refs : ok pour l'instant mais a voir comment est ce que cela scale avec l'augmentation du nb de classes
3eme methods 2 datasets différents : plus efficace en stockage mais pas facil a maintenir"""
\ No newline at end of file
...@@ -202,7 +202,7 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m ...@@ -202,7 +202,7 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m
return im return im
def build_ref_image_from_diann(path_parqet, ms1_end_mz, ms1_start_mz, bin_mz, max_cycle, rt_pred): def build_ref_image_from_diann(path_parqet, ms1_end_mz, ms1_start_mz, bin_mz, max_cycle, min_rt=None, max_rt=None):
df = load_lib(path_parqet) df = load_lib(path_parqet)
...@@ -212,8 +212,10 @@ def build_ref_image_from_diann(path_parqet, ms1_end_mz, ms1_start_mz, bin_mz, ma ...@@ -212,8 +212,10 @@ def build_ref_image_from_diann(path_parqet, ms1_end_mz, ms1_start_mz, bin_mz, ma
total_ms1_mz = ms1_end_mz - ms1_start_mz total_ms1_mz = ms1_end_mz - ms1_start_mz
n_bin_ms1 = int(total_ms1_mz // bin_mz) n_bin_ms1 = int(total_ms1_mz // bin_mz)
im = np.zeros([max_cycle, n_bin_ms1]) im = np.zeros([max_cycle, n_bin_ms1])
max_rt = np.max(df_unique['RT']) if max_rt is None:
min_rt = np.min(df_unique['RT']) max_rt = np.max(df_unique['RT'])
if min_rt is None:
min_rt = np.min(df_unique['RT'])
total_rt = max_rt - min_rt +1e-3 total_rt = max_rt - min_rt +1e-3
for row in df_unique.iterrows() : for row in df_unique.iterrows() :
if 900 > int(((row[1]['Precursor.Mz']-ms1_start_mz)/total_ms1_mz)*n_bin_ms1) >= 0: if 900 > int(((row[1]['Precursor.Mz']-ms1_start_mz)/total_ms1_mz)*n_bin_ms1) >= 0:
...@@ -230,8 +232,13 @@ if __name__ == '__main__': ...@@ -230,8 +232,13 @@ if __name__ == '__main__':
# mpimg.imsave('test_img.png', im) # mpimg.imsave('test_img.png', im)
df = build_database_ref_peptide() df = build_database_ref_peptide()
df_full = load_lib('fasta/full proteom/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet')
min_rt = df_full['RT'].min()
max_rt = df_full['RT'].max()
for spe in ['Proteus mirabilis','Klebsiella pneumoniae','Klebsiella oxytoca','Enterobacter hormaechei','Citrobacter freundii']: for spe in ['Proteus mirabilis','Klebsiella pneumoniae','Klebsiella oxytoca','Enterobacter hormaechei','Citrobacter freundii']:
df_spe = df[df['Specie']==spe] im = build_ref_image_from_diann(
with open(spe+'.fasta','w') as f: 'fasta/optimal peptide set/'+spe+'.parquet', ms1_end_mz=1250,
for r in df_spe.iterrows(): ms1_start_mz=350, bin_mz=1, max_cycle=663, min_rt=min_rt, max_rt=max_rt)
f.write(r[1]['Sequence']) plt.clf()
mpimg.imsave(spe+'.png', im)
...@@ -106,7 +106,7 @@ class ResNet(nn.Module): ...@@ -106,7 +106,7 @@ class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None, groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None): norm_layer=None, in_channels=3):
super(ResNet, self).__init__() super(ResNet, self).__init__()
if norm_layer is None: if norm_layer is None:
norm_layer = nn.BatchNorm2d norm_layer = nn.BatchNorm2d
...@@ -123,7 +123,7 @@ class ResNet(nn.Module): ...@@ -123,7 +123,7 @@ class ResNet(nn.Module):
"or a 3-element tuple, got {}".format(replace_stride_with_dilation)) "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups self.groups = groups
self.base_width = width_per_group self.base_width = width_per_group
self.conv1 = nn.Conv2d(1, self.inplanes, kernel_size=7, stride=2, padding=3, self.conv1 = nn.Conv2d(in_channels, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False) bias=False)
self.bn1 = norm_layer(self.inplanes) self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
...@@ -266,19 +266,32 @@ class Classification_model(nn.Module): ...@@ -266,19 +266,32 @@ class Classification_model(nn.Module):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.n_class = n_class self.n_class = n_class
if model =='ResNet18': if model =='ResNet18':
self.im_encoder = resnet18(num_classes=self.n_class) self.im_encoder = resnet18(num_classes=self.n_class, in_channels=1)
def forward(self, input): def forward(self, input):
return self.im_encoder(input) return self.im_encoder(input)
class Classification_model_contrastive(nn.Module):
def __init__(self, model, n_class, *args, **kwargs):
super().__init__(*args, **kwargs)
self.n_class = n_class
if model =='ResNet18':
self.im_encoder = resnet18(num_classes=self.n_class, in_channels=2)
def forward(self, input, ref):
input = torch.concat(input,ref,dim=2)
return self.im_encoder(input)
class Classification_model_duo(nn.Module): class Classification_model_duo(nn.Module):
def __init__(self, model, n_class, *args, **kwargs): def __init__(self, model, n_class, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.n_class = n_class self.n_class = n_class
if model =='ResNet18': if model =='ResNet18':
self.im_encoder = resnet18(num_classes=self.n_class) self.im_encoder = resnet18(num_classes=self.n_class, in_channels=1)
self.predictor = nn.Linear(in_features=self.n_class*2,out_features=self.n_class) self.predictor = nn.Linear(in_features=self.n_class*2,out_features=self.n_class)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment