Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
pseudo_image
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Léo Calmettes
pseudo_image
Commits
48db0218
Commit
48db0218
authored
4 weeks ago
by
Schneider Leo
Browse files
Options
Downloads
Patches
Plain Diff
ref image genration
parent
67873613
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
image_ref/analyse_diann_digestion.py
+8
-8
8 additions, 8 deletions
image_ref/analyse_diann_digestion.py
image_ref/main.py
+4
-0
4 additions, 0 deletions
image_ref/main.py
image_ref/utils.py
+14
-7
14 additions, 7 deletions
image_ref/utils.py
models/model.py
+17
-4
17 additions, 4 deletions
models/model.py
with
43 additions
and
19 deletions
image_ref/analyse_diann_digestion.py
+
8
−
8
View file @
48db0218
...
...
@@ -11,13 +11,13 @@ def load_lib(path):
return
table
if
__name__
==
'
__main__
'
:
df1
=
load_lib
(
'
fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet
'
)
df2
=
load_lib
(
'
fasta/steigerwaltii variants/uniparc_proteome_UP000033499_2025_03_14.predicted.parquet
'
)
df1
=
load_lib
(
'
fasta/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet
'
)
df2
=
load_lib
(
'
fasta/steigerwaltii variants/uniparc_proteome_UP000033499_2025_03_14.predicted.parquet
'
)
set1
=
set
(
df1
[
'
Stripped.Sequence
'
].
to_list
()
)
set2
=
set
(
df2
[
'
Stripped.Sequence
'
].
to_list
()
)
set1
=
set
(
df1
[
'
Stripped.Sequence
'
].
to_list
())
set2
=
set
(
df2
[
'
Stripped.Sequence
'
].
to_list
())
venn2
((
set1
,
set2
),
(
'
Group1
'
,
'
Group2
'
))
plt
.
show
()
plt
.
savefig
(
'
fasta_similarity_diann.png
'
)
\ No newline at end of file
venn2
((
set1
,
set2
),
(
'
Group1
'
,
'
Group2
'
))
plt
.
show
()
plt
.
savefig
(
'
fasta_similarity_diann.png
'
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
image_ref/main.py
0 → 100644
+
4
−
0
View file @
48db0218
#TODO REFAIRE UN DATASET https://discuss.pytorch.org/t/upload-a-customize-data-set-for-multi-regression-task/43413?u=ptrblck
"""
1er methode load 1 image pour 1 ref
2eme methode : load 1 image et toutes les refs : ok pour l
'
instant mais a voir comment est ce que cela scale avec l
'
augmentation du nb de classes
3eme methods 2 datasets différents : plus efficace en stockage mais pas facil a maintenir
"""
\ No newline at end of file
This diff is collapsed.
Click to expand it.
image_ref/utils.py
+
14
−
7
View file @
48db0218
...
...
@@ -202,7 +202,7 @@ def build_ref_image(path_fasta, possible_charge, ms1_end_mz, ms1_start_mz, bin_m
return
im
def
build_ref_image_from_diann
(
path_parqet
,
ms1_end_mz
,
ms1_start_mz
,
bin_mz
,
max_cycle
,
rt_pred
):
def
build_ref_image_from_diann
(
path_parqet
,
ms1_end_mz
,
ms1_start_mz
,
bin_mz
,
max_cycle
,
min_rt
=
None
,
max_rt
=
None
):
df
=
load_lib
(
path_parqet
)
...
...
@@ -212,8 +212,10 @@ def build_ref_image_from_diann(path_parqet, ms1_end_mz, ms1_start_mz, bin_mz, ma
total_ms1_mz
=
ms1_end_mz
-
ms1_start_mz
n_bin_ms1
=
int
(
total_ms1_mz
//
bin_mz
)
im
=
np
.
zeros
([
max_cycle
,
n_bin_ms1
])
max_rt
=
np
.
max
(
df_unique
[
'
RT
'
])
min_rt
=
np
.
min
(
df_unique
[
'
RT
'
])
if
max_rt
is
None
:
max_rt
=
np
.
max
(
df_unique
[
'
RT
'
])
if
min_rt
is
None
:
min_rt
=
np
.
min
(
df_unique
[
'
RT
'
])
total_rt
=
max_rt
-
min_rt
+
1e-3
for
row
in
df_unique
.
iterrows
()
:
if
900
>
int
(((
row
[
1
][
'
Precursor.Mz
'
]
-
ms1_start_mz
)
/
total_ms1_mz
)
*
n_bin_ms1
)
>=
0
:
...
...
@@ -230,8 +232,13 @@ if __name__ == '__main__':
# mpimg.imsave('test_img.png', im)
df
=
build_database_ref_peptide
()
df_full
=
load_lib
(
'
fasta/full proteom/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet
'
)
min_rt
=
df_full
[
'
RT
'
].
min
()
max_rt
=
df_full
[
'
RT
'
].
max
()
for
spe
in
[
'
Proteus mirabilis
'
,
'
Klebsiella pneumoniae
'
,
'
Klebsiella oxytoca
'
,
'
Enterobacter hormaechei
'
,
'
Citrobacter freundii
'
]:
df_spe
=
df
[
df
[
'
Specie
'
]
==
spe
]
with
open
(
spe
+
'
.fasta
'
,
'
w
'
)
as
f
:
for
r
in
df_spe
.
iterrows
():
f
.
write
(
r
[
1
][
'
Sequence
'
])
im
=
build_ref_image_from_diann
(
'
fasta/optimal peptide set/
'
+
spe
+
'
.parquet
'
,
ms1_end_mz
=
1250
,
ms1_start_mz
=
350
,
bin_mz
=
1
,
max_cycle
=
663
,
min_rt
=
min_rt
,
max_rt
=
max_rt
)
plt
.
clf
()
mpimg
.
imsave
(
spe
+
'
.png
'
,
im
)
This diff is collapsed.
Click to expand it.
models/model.py
+
17
−
4
View file @
48db0218
...
...
@@ -106,7 +106,7 @@ class ResNet(nn.Module):
def
__init__
(
self
,
block
,
layers
,
num_classes
=
1000
,
zero_init_residual
=
False
,
groups
=
1
,
width_per_group
=
64
,
replace_stride_with_dilation
=
None
,
norm_layer
=
None
):
norm_layer
=
None
,
in_channels
=
3
):
super
(
ResNet
,
self
).
__init__
()
if
norm_layer
is
None
:
norm_layer
=
nn
.
BatchNorm2d
...
...
@@ -123,7 +123,7 @@ class ResNet(nn.Module):
"
or a 3-element tuple, got {}
"
.
format
(
replace_stride_with_dilation
))
self
.
groups
=
groups
self
.
base_width
=
width_per_group
self
.
conv1
=
nn
.
Conv2d
(
1
,
self
.
inplanes
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
self
.
conv1
=
nn
.
Conv2d
(
in_channels
,
self
.
inplanes
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias
=
False
)
self
.
bn1
=
norm_layer
(
self
.
inplanes
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
...
...
@@ -266,19 +266,32 @@ class Classification_model(nn.Module):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
n_class
=
n_class
if
model
==
'
ResNet18
'
:
self
.
im_encoder
=
resnet18
(
num_classes
=
self
.
n_class
)
self
.
im_encoder
=
resnet18
(
num_classes
=
self
.
n_class
,
in_channels
=
1
)
def
forward
(
self
,
input
):
return
self
.
im_encoder
(
input
)
class
Classification_model_contrastive
(
nn
.
Module
):
def
__init__
(
self
,
model
,
n_class
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
n_class
=
n_class
if
model
==
'
ResNet18
'
:
self
.
im_encoder
=
resnet18
(
num_classes
=
self
.
n_class
,
in_channels
=
2
)
def
forward
(
self
,
input
,
ref
):
input
=
torch
.
concat
(
input
,
ref
,
dim
=
2
)
return
self
.
im_encoder
(
input
)
class
Classification_model_duo
(
nn
.
Module
):
def
__init__
(
self
,
model
,
n_class
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
n_class
=
n_class
if
model
==
'
ResNet18
'
:
self
.
im_encoder
=
resnet18
(
num_classes
=
self
.
n_class
)
self
.
im_encoder
=
resnet18
(
num_classes
=
self
.
n_class
,
in_channels
=
1
)
self
.
predictor
=
nn
.
Linear
(
in_features
=
self
.
n_class
*
2
,
out_features
=
self
.
n_class
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment