libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
locationsaver.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/locationsaver.cpp
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief save protein subsequences for alignment
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
30 */
31
32#include <algorithm>
33#include "locationsaver.h"
37
38namespace pappso
39{
40namespace specpeptidoms
41{
42
43QString
45{
46 std::size_t length2;
47 if((qsizetype)(beginning + length) <= proteinPtr->size())
48 {
49 length2 = length;
50 }
51 else
52 {
53 length2 = proteinPtr->size() - beginning;
54 }
55 return proteinPtr->getSequence().sliced(proteinPtr->size() - beginning - length2, length2);
56}
57
58
60{
61 Location location_zero;
62 location_zero.beginning = 0;
63 location_zero.length = 0;
64 location_zero.proteinPtr = nullptr;
65 location_zero.score = MIN_ALIGNMENT_SCORE;
66 location_zero.tree = -1;
67
68 m_locations_heap.resize(MAX_SAVED_ALIGNMENTS, location_zero);
69 // std::make_heap(m_locations_heap.begin(), m_locations_heap.end(),
70 // LocationSaver::locationCompare); ?
71 // m_tree_scores.reserve ?
72}
73
74bool
76{
77 return loc1.score > loc2.score;
78}
79
80// TODO : check complexity if m_tree_in_heap[tree] == true
81void
83 std::size_t beginning, std::size_t length, int tree, int score, const SpOMSProtein *protein_ptr)
84{
85 try
86 {
87 m_tree_scores.at(tree) = score;
88 if(m_tree_in_heap.at(tree))
89 {
90 for(std::vector<Location>::iterator iter = m_locations_heap.begin();
91 iter != m_locations_heap.end();
92 iter++)
93 {
94 if(iter->tree == tree)
95 {
96 iter->score = score;
97 iter->length = length;
98 }
99 }
100 std::make_heap(
102 }
103 else
104 {
105 if(m_locations_heap.begin()->tree >= 0)
106 {
107 m_tree_in_heap.at(m_locations_heap.begin()->tree) = false;
108 }
109 m_tree_in_heap.at(tree) = true;
110 std::pop_heap(
112 m_locations_heap.pop_back();
113 m_locations_heap.push_back({beginning, length, tree, score, protein_ptr});
114 std::push_heap(
116 }
117 }
118 catch(const std::exception &error)
119 {
121 QObject::tr("addLocation failed std::exception :\n%1").arg(error.what()));
122 }
123}
124
125std::vector<pappso::specpeptidoms::Location>
127{
128 std::vector<Location> locations;
129 locations.reserve(m_locations_heap.size());
130 for(std::vector<Location>::const_iterator iter = m_locations_heap.begin();
131 iter != m_locations_heap.end();
132 iter++)
133 {
134 if(iter->tree >= 0)
135 {
136 locations.push_back(*iter);
137 }
138 }
139 return locations;
140}
141
142std::size_t
144{
146 m_tree_in_heap.push_back(false);
147 return m_tree_scores.size() - 1;
148}
149
150int
152{
153 if(m_tree_scores.size() == 0)
154 {
155 return m_locations_heap.begin()->score;
156 }
157 else
158 {
159 if(tree_id > (int)m_tree_scores.size())
160 {
161
163 QObject::tr("LocationSaver::getMinScore failed :\nout of "
164 "range access %1 with m_tree_scores.size() %2")
165 .arg(tree_id)
166 .arg(m_tree_scores.size()));
167 }
168
169 /*To be stored, an alignment's score must be higher than: its tree's current score, the
170 * current worse stored alignment's score and a score that depends of the current best
171 * alignment's score
172 */
173 return std::max(
174 m_tree_scores.at(tree_id),
175 std::max(m_locations_heap.begin()->score,
176 (int)std::ceil(ALIGNMENT_FILTER *
177 *(std::max_element(m_tree_scores.begin(), m_tree_scores.end())))));
178 }
179}
180
184
185void
187{
188 Location location_zero;
189 location_zero.beginning = 0;
190 location_zero.length = 0;
191 location_zero.proteinPtr = nullptr;
192 location_zero.score = MIN_ALIGNMENT_SCORE;
193 location_zero.tree = -1;
194
195 std::fill(m_locations_heap.begin(), m_locations_heap.end(), location_zero);
196 m_tree_scores.clear();
197 m_tree_in_heap.clear();
198
199
200 // int m_min_score, m_max_score;
201}
202} // namespace specpeptidoms
203} // namespace pappso
int getMinScore(int tree_id) const
Returns the minimum score for a location with the provided tree_id to be saved in the heap.
static bool locationCompare(const Location &loc1, const Location &loc2)
void addLocation(std::size_t beginning, std::size_t length, int tree, int score, const SpOMSProtein *protein_ptr)
Adds a location to the locations heap. If a saved location has the same tree_id, it will replace it....
std::vector< Location > getLocations() const
Returns a vector containing the saved locations.
std::vector< Location > m_locations_heap
std::size_t getNextTree()
Creates a new alignment tree and returns its id.
const uint MAX_SAVED_ALIGNMENTS(5)
const int MIN_ALIGNMENT_SCORE(15)
const float ALIGNMENT_FILTER(0.9)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::size_t length
length of the subsequence
QString getPeptideString() const
convenient function to get peptide sequence from location
std::size_t beginning
start position of the subsequence
const SpOMSProtein * proteinPtr
Protein accession.