Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
pseudo_image
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Léo Schneider
pseudo_image
Commits
3c8721d0
Commit
3c8721d0
authored
2 months ago
by
Schneider Leo
Browse files
Options
Downloads
Patches
Plain Diff
add : filter data based on charge for image creation
parent
97d68603
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
image_processing/build_image.py
+99
-2
99 additions, 2 deletions
image_processing/build_image.py
image_ref/utils.py
+21
-20
21 additions, 20 deletions
image_ref/utils.py
with
120 additions
and
22 deletions
image_processing/build_image.py
+
99
−
2
View file @
3c8721d0
...
...
@@ -9,7 +9,8 @@ def build_image_ms1_wiff(path, bin_mz):
#load raw data
rawFile
=
WiffFileReader
(
path
)
max_cycle
=
0
for
scanNumber
in
range
(
rawFile
.
GetLastSpectrumNumber
()):
total_scan_number
=
rawFile
.
GetLastSpectrumNumber
()
for
scanNumber
in
range
(
total_scan_number
):
if
rawFile
.
GetMSOrderForScanNum
(
scanNumber
)
==
1
:
ms1_start_mz
=
rawFile
.
source
.
ScanInfos
[
scanNumber
].
LowMz
ms1_end_mz
=
rawFile
.
source
.
ScanInfos
[
scanNumber
].
HighMz
...
...
@@ -23,7 +24,7 @@ def build_image_ms1_wiff(path, bin_mz):
im
=
np
.
zeros
([
max_cycle
,
n_bin_ms1
])
cycle
=
0
for
scanNumber
in
range
(
rawFile
.
GetLastSpectrumN
umber
()
):
for
scanNumber
in
range
(
total_scan_n
umber
):
if
rawFile
.
GetMSOrderForScanNum
(
scanNumber
)
==
1
:
masses
,
intensities
=
rawFile
.
GetCentroidMassListFromScanNum
(
scanNumber
)
line
=
np
.
zeros
(
n_bin_ms1
)
...
...
@@ -35,6 +36,84 @@ def build_image_ms1_wiff(path, bin_mz):
return
im
def
build_image_ms1_wiff_charge_filtered
(
path
,
bin_mz
=
1
,
tolerance
=
0.01
):
#load raw data
rawFile
=
WiffFileReader
(
path
)
max_cycle
=
0
total_scan_number
=
rawFile
.
GetLastSpectrumNumber
()
for
scanNumber
in
range
(
total_scan_number
):
if
rawFile
.
GetMSOrderForScanNum
(
scanNumber
)
==
1
:
ms1_start_mz
=
rawFile
.
source
.
ScanInfos
[
scanNumber
].
LowMz
ms1_end_mz
=
rawFile
.
source
.
ScanInfos
[
scanNumber
].
HighMz
max_cycle
+=
1
print
(
'
start
'
,
ms1_start_mz
,
'
end
'
,
ms1_end_mz
)
total_ms1_mz
=
ms1_end_mz
-
ms1_start_mz
n_bin_ms1
=
int
(
total_ms1_mz
//
bin_mz
)
size_bin_ms1
=
total_ms1_mz
/
n_bin_ms1
im
=
np
.
zeros
([
max_cycle
,
n_bin_ms1
])
cycle
=
0
for
scanNumber
in
range
(
total_scan_number
):
if
rawFile
.
GetMSOrderForScanNum
(
scanNumber
)
==
1
:
masses
,
intensities
=
rawFile
.
GetCentroidMassListFromScanNum
(
scanNumber
)
is_monocharged
=
[
False
for
i
in
range
(
len
(
masses
))]
is_bicharged
=
[
False
for
i
in
range
(
len
(
masses
))]
is_tricharged
=
[
False
for
i
in
range
(
len
(
masses
))]
for
i
in
range
(
len
(
masses
)):
m
=
masses
[
i
]
found_s
=
True
found_b
=
True
found_t
=
True
s
=
1
b
=
1
t
=
1
if
not
is_monocharged
[
i
]:
dec
=
1
while
found_s
:
found_s
=
False
while
(
i
+
dec
)
<
len
(
masses
)
and
masses
[
i
+
dec
]
-
m
<
(
s
+
tolerance
):
if
masses
[
i
+
dec
]
-
m
>
(
1
-
tolerance
):
is_monocharged
[
i
]
=
True
is_monocharged
[
i
+
dec
]
=
True
found_s
=
True
dec
+=
1
s
+=
1
if
not
is_bicharged
[
i
]:
dec
=
1
while
found_b
:
found_b
=
False
while
(
i
+
dec
)
<
len
(
masses
)
and
masses
[
i
+
dec
]
-
m
<
(
b
/
2
+
tolerance
):
if
masses
[
i
+
dec
]
-
m
>
(
1
/
2
-
tolerance
):
is_bicharged
[
i
]
=
True
is_bicharged
[
i
+
dec
]
=
True
found_b
=
True
dec
+=
1
b
+=
1
if
not
is_tricharged
[
i
]:
dec
=
1
while
found_t
:
found_t
=
False
while
(
i
+
dec
)
<
len
(
masses
)
and
masses
[
i
+
dec
]
-
m
<
(
1
/
3
+
tolerance
):
if
masses
[
i
+
dec
]
-
m
>
(
t
/
3
-
tolerance
):
is_tricharged
[
i
]
=
True
is_tricharged
[
i
+
dec
]
=
True
found_t
=
True
dec
+=
1
t
+=
1
line
=
np
.
zeros
(
n_bin_ms1
)
if
len
(
masses
)
>
0
:
for
k
in
range
(
len
(
masses
)):
if
is_bicharged
[
k
]
or
is_tricharged
[
k
]
:
line
[
int
((
masses
[
k
]
-
ms1_start_mz
)
//
size_bin_ms1
)]
+=
intensities
[
k
]
im
[
cycle
,
:]
=
line
cycle
+=
1
rawFile
.
Close
()
return
im
def
build_image_ms1
(
path
,
bin_mz
):
#load raw data
...
...
@@ -79,3 +158,21 @@ def build_image_ms1(path, bin_mz):
im
[
c
,
:]
=
line
return
im
if
__name__
==
'
__main__
'
:
path
=
'
../data/raw_data/SERMAR-10-AER-d200.wiff
'
im_filtered
=
build_image_ms1_wiff_charge_filtered
(
path
,
tolerance
=
0.0001
)
# im_base = build_image_ms1_wiff(path,bin_mz=1)
im_filtered
=
np
.
log
(
im_filtered
+
1
)
# im_base=np.log(im_base+1)
plt
.
imshow
(
im_filtered
,
interpolation
=
'
nearest
'
)
plt
.
savefig
(
'
filtered_image_tol_00001.png
'
)
plt
.
clf
()
# plt.imshow(im_base, interpolation='nearest')
# plt.savefig('base_image.png')
# plt.clf()
\ No newline at end of file
This diff is collapsed.
Click to expand it.
image_ref/utils.py
+
21
−
20
View file @
3c8721d0
...
...
@@ -162,6 +162,7 @@ def build_ref_image_from_diann_global(path_parqet, target_seq, ms1_end_mz, ms1_s
if
__name__
==
'
__main__
'
:
df
=
load_lib
(
'
fasta/global_peptide_list.parquet
'
)
# df = build_database_ref_peptide()
# df.to_csv("dataset_species_ref_peptides.csv", index=False)
# Write fasta file
...
...
@@ -173,24 +174,24 @@ if __name__ == '__main__':
# print(pep)
# f.write(pep+'\n')
#
df_count
=
compute_common_peptide
(
"
dataset_species_ref_peptides.csv
"
,
SPECIES
)
#
# Create ref img
df_full
=
load_lib
(
'
fasta/full proteom/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet
'
)
min_rt
=
df_full
[
'
RT
'
].
min
()
max_rt
=
df_full
[
'
RT
'
].
max
()
#
df
=
pd
.
read_csv
(
"
dataset_species_ref_peptides.csv
"
)
# df_count = compute_common_peptide("dataset_species_ref_peptides.csv", SPECIES)
#
for
spe
in
SPECIES
:
print
(
spe
)
df_spe
=
df
[
df
[
'
Specie
'
]
==
spe
]
df_spec_no_common
=
df_spe
[
df_spe
[
'
Sequence
'
].
isin
(
df_count
[
df_count
[
'
Count
'
]
<
5
][
'
Sequence
'
])]
im
=
build_ref_image_from_diann_global
(
'
fasta/global_peptide_list.parquet
'
,
target_seq
=
df_spec_no_common
[
'
Sequence
'
].
to_list
(),
ms1_end_mz
=
1250
,
ms1_start_mz
=
350
,
bin_mz
=
1
,
max_cycle
=
663
,
min_rt
=
min_rt
,
max_rt
=
max_rt
)
plt
.
clf
()
mpimg
.
imsave
(
'
img_ref_count_th_5/
'
+
spe
+
'
.png
'
,
im
)
np
.
save
(
'
img_ref_count_th_5/
'
+
spe
+
'
.npy
'
,
im
)
# #
# # Create ref img
# df_full = load_lib(
# 'fasta/full proteom/steigerwaltii variants/uniparc_proteome_UP000033376_2025_03_14.predicted.parquet')
# min_rt = df_full['RT'].min()
# max_rt = df_full['RT'].max()
# #
# df = pd.read_csv("dataset_species_ref_peptides.csv")
# #
# for spe in SPECIES:
# print(spe)
# df_spe = df[df['Specie'] == spe]
# df_spec_no_common = df_spe[df_spe['Sequence'].isin(df_count[df_count['Count']<5]['Sequence'])]
# im = build_ref_image_from_diann_global(
# 'fasta/global_peptide_list.parquet', target_seq=df_spec_no_common['Sequence'].to_list(), ms1_end_mz=1250,
# ms1_start_mz=350, bin_mz=1, max_cycle=663, min_rt=min_rt, max_rt=max_rt)
# plt.clf()
# mpimg.imsave('img_ref_count_th_5/' + spe + '.png', im)
# np.save('img_ref_count_th_5/' + spe + '.npy', im)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment