libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msrundatasettree.cpp
Go to the documentation of this file.
1// GPL 3+
2// Filippo Rusconi
3
4#include <map>
5#include <limits>
6
7#include "msrundatasettree.h"
8
10
11#include <QVariant>
12#include <QElapsedTimer>
13
14namespace pappso
15{
16
17
19 : mcsp_msRunId(ms_run_id_csp)
20{
21}
22
24{
25 // qDebug();
26
27 for(auto &&node : m_rootNodes)
28 {
29 // Each node is responsible for freeing its children nodes!
30
31 delete node;
32 }
33
34 m_rootNodes.clear();
35
36 // Beware not to delete the node member of the map, as we have already
37 // destroyed them above!
38 //
39 // for(auto iterator = m_indexNodeMap.begin(); iterator !=
40 // m_indexNodeMap.end();
41 //++iterator)
42 //{
43 // delete(iterator->second);
44 //}
45
46 // qDebug();
47}
48
51 QualifiedMassSpectrumCstSPtr mass_spectrum_csp)
52{
53 // qDebug();
54
55 if(mass_spectrum_csp == nullptr)
56 qFatal("Cannot be nullptr");
57
58 if(mass_spectrum_csp.get() == nullptr)
59 qFatal("Cannot be nullptr");
60
61 // We need to get the precursor spectrum index, in case this spectrum is a
62 // fragmentation index.
63
64 MsRunDataSetTreeNode *new_node_p = nullptr;
65
66 std::size_t precursor_spectrum_index =
67 mass_spectrum_csp->getPrecursorSpectrumIndex();
68
69 // qDebug() << "The precursor_spectrum_index:" << precursor_spectrum_index;
70
71 if(precursor_spectrum_index == std::numeric_limits<std::size_t>::max())
72 {
73 // This spectrum is a full scan spectrum, not a fragmentation spectrum.
74 // Create a new node with no parent and push it back to the root nodes
75 // vector.
76
77 new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, nullptr);
78
79 // Since there is no parent in this overload, it is assumed that the node
80 // to be populated with the new node is the root node.
81
82 m_rootNodes.push_back(new_node_p);
83
84 // true: with_data
85 // qDebug().noquote() << "Pushed back to the roots node vector node:"
86 //<< new_node_p->toString(true);
87 }
88 else
89 {
90 // This spectrum is a fragmentation spectrum.
91
92 // Sanity check
93
94 if(mass_spectrum_csp->getMsLevel() <= 1)
95 {
97 "msrundatasettree.cpp -- ERROR the MS level needs to be > 1 in a "
98 "fragmentation spectrum.");
99 }
100
101 // Get the node that contains the precursor ion mass spectrum.
102 MsRunDataSetTreeNode *parent_node_p = findNode(precursor_spectrum_index);
103
104 if(parent_node_p == nullptr)
105 {
107 QString("%1 @ %2 - %3\n")
108 .arg(__FILE__)
109 .arg(__LINE__)
110 .arg("msrundatasettree.cpp -- ERROR could not find "
111 "a tree node matching the index."));
112 }
113
114 // qDebug() << "Fragmentation spectrum"
115 //<< "Found parent node:" << parent_node_p
116 //<< "for precursor index:" << precursor_spectrum_index;
117
118 // At this point, create a new node with the right parent.
119
120 new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, parent_node_p);
121
122 parent_node_p->m_children.push_back(new_node_p);
123 }
124
125 // And now document that addition in the node index map.
126 m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
127 mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
128
129 // We also want to document the new node relating to the
130 // retention time.
131
133 mass_spectrum_csp->getRtInMinutes(), new_node_p, Enums::DataKind::rt);
134
135 // Likewise for the ion mobility time.
136
137 double ion_mobility_value = -1;
138
139 Enums::MsDataFormat file_format = mcsp_msRunId->getMsDataFormat();
140 bool ok = false;
141
142 if(file_format == Enums::MsDataFormat::brukerTims)
143 {
144 // Start by looking if there is a OneOverK0 valid value, which
145 // means we have effectively handled a genuine mobility scan spectrum.
146 QVariant ion_mobility_variant_value =
147 mass_spectrum_csp->getParameterValue(
149
150 if(ion_mobility_variant_value.isValid())
151 {
152 // Yes, genuine ion mobility scan handled here.
153
154 ion_mobility_value = ion_mobility_variant_value.toDouble(&ok);
155
156 if(!ok)
157 {
158 qFatal(
159 "The data are Bruker timsTOF data but failed to convert valid "
160 "QVariant 1/K0 value to double.");
161 }
162 }
163 else
164 {
165 // We are not handling a genuine single ion mobility scan here.
166 // We must be handling a mass spectrum that correspond to
167 // the combination of all the ion mobility scans for a single
168 // frame. This is when the user asks that ion mobility scans
169 // be flattended. In this case, the OneOverK0 value is not valid
170 // but there are two values that are set:
171 // TimsFrameInvKoBegin and TimsFrameInvKoEnd.
172 // See TimsFramesMsRunReader::readSpectrumCollection2.
173
174 // Test one of these values as a sanity check. But
175 // give the value of -1 for the ion mobility because we do not
176 // have ion mobility data here.
177
178 ion_mobility_variant_value = mass_spectrum_csp->getParameterValue(
180
181 if(!ion_mobility_variant_value.isValid())
182 {
183 qFatal(
184 "The data are Bruker timsTOF data but failed to get correct "
185 "ion mobility data. Inconsistency found.");
186 }
187 }
188 }
189 else
190 {
191 ion_mobility_value = mass_spectrum_csp->getDtInMilliSeconds();
192 }
193
194 if(ion_mobility_value != -1)
195 documentNodeInDtRtMap(ion_mobility_value, new_node_p, Enums::DataKind::dt);
196
198
199 // qDebug() << "New index/node map:"
200 //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
201 //<< new_node_p;
202
203 return new_node_p;
204}
205
206const std::map<std::size_t, MsRunDataSetTreeNode *> &
211
212std::size_t
214{
215 // We have a node and we want to get the matching mass spectrum index.
216
217 if(node == nullptr)
218 throw("Cannot be that the node pointer is nullptr");
219
220 std::map<std::size_t, MsRunDataSetTreeNode *>::const_iterator iterator =
221 std::find_if(
222 m_indexNodeMap.begin(),
223 m_indexNodeMap.end(),
224 [node](const std::pair<std::size_t, MsRunDataSetTreeNode *> pair) {
225 return pair.second == node;
226 });
227
228 if(iterator != m_indexNodeMap.end())
229 return iterator->first;
230
231 return std::numeric_limits<std::size_t>::max();
232}
233
234std::size_t
236 QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp) const
237{
238 MsRunDataSetTreeNode *node_p = findNode(qualified_mass_spectrum_csp);
239
240 return massSpectrumIndex(node_p);
241}
242
243const std::vector<MsRunDataSetTreeNode *> &
245{
246 return m_rootNodes;
247}
248
249void
251{
252 // qDebug() << "Going to call node->accept(visitor) for each root node.";
253
254 for(auto &&node : m_rootNodes)
255 {
256 // qDebug() << "Calling accept for root node:" << node;
257
258 if(visitor.shouldStop())
259 break;
260
261 node->accept(visitor);
262 }
263}
264
265void
268 std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_begin_iterator,
269 std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_end_iterator)
270{
271 // qDebug() << "Visitor:" << &visitor << "The distance is between iterators
272 // is:"
273 //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
274
275 using Iterator = std::vector<MsRunDataSetTreeNode *>::const_iterator;
276
277 Iterator iter = nodes_begin_iterator;
278
279 // Inform the visitor of the number of nodes to work on.
280
281 std::size_t node_count =
282 std::distance(nodes_begin_iterator, nodes_end_iterator);
283
284 visitor.setNodesToProcessCount(node_count);
285
286 while(iter != nodes_end_iterator)
287 {
288 // qDebug() << "Visitor:" << &visitor
289 //<< "The distance is between iterators is:"
290 //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
291
292 // qDebug() << "Node visited:" << (*iter)->toString();
293
294 if(visitor.shouldStop())
295 break;
296
297 (*iter)->accept(visitor);
298 ++iter;
299 }
300}
301
304{
305 // qDebug();
306
307 for(auto &node : m_rootNodes)
308 {
309 // qDebug() << "In one node of the root nodes.";
310
311 MsRunDataSetTreeNode *iterNode = node->findNode(mass_spectrum_csp);
312 if(iterNode != nullptr)
313 return iterNode;
314 }
315
316 return nullptr;
317}
318
320MsRunDataSetTree::findNode(std::size_t spectrum_index) const
321{
322 // qDebug();
323
324 for(auto &node : m_rootNodes)
325 {
326 // qDebug() << "In one node of the root nodes.";
327
328 MsRunDataSetTreeNode *iterNode = node->findNode(spectrum_index);
329 if(iterNode != nullptr)
330 return iterNode;
331 }
332
333 return nullptr;
334}
335
336std::vector<MsRunDataSetTreeNode *>
338{
339 // We want to push back all the nodes of the tree in a flat vector of nodes.
340
341 std::vector<MsRunDataSetTreeNode *> nodes;
342
343 for(auto &&node : m_rootNodes)
344 {
345 // The node will store itself and all of its children.
346 node->flattenedView(nodes, true /* with_descendants */);
347 }
348
349 return nodes;
350}
351
352std::vector<MsRunDataSetTreeNode *>
354 bool with_descendants)
355{
356 std::vector<MsRunDataSetTreeNode *> nodes;
357
358 // Logically, ms_level cannot be 0.
359
360 if(!ms_level)
361 {
363 "msrundatasettree.cpp -- ERROR the MS level cannot be 0.");
364
365 return nodes;
366 }
367
368 // The depth of the tree at which we are right at this point is 0, we have not
369 // gone into the children yet.
370
371 std::size_t depth = 0;
372
373 // If ms_level is 1, then that means that we want the nodes starting right at
374 // the root nodes with or without the descendants.
375
376 // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
377 //<< "ms_level: " << ms_level << " depth: " << depth << std::endl;
378
379 if(ms_level == 1)
380 {
381 for(auto &&node : m_rootNodes)
382 {
383 // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__
384 //<< " () "
385 //<< "Handling one of the root nodes at ms_level = 1."
386 //<< std::endl;
387
388 node->flattenedView(nodes, with_descendants);
389 }
390
391 return nodes;
392 }
393
394 // At this point, we know that we want the descendants of the root nodes since
395 // we want ms_level > 1, so we need go to to the children of the root nodes.
396
397 // Let depth to 0, because if we go to the children of the root nodes we will
398 // still be at depth 0, that is MS level 1.
399
400 for(auto &node : m_rootNodes)
401 {
402 // std::cout
403 //<< __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
404 //<< std::setprecision(15)
405 //<< "Requesting a flattened view of the root's child nodes with depth: "
406 //<< depth << std::endl;
407
408 node->flattenedViewMsLevelNodes(ms_level, depth, nodes, with_descendants);
409 }
410
411 return nodes;
412}
413
416 std::size_t product_spectrum_index)
417{
418
419 // qDebug();
420
421 // Find the node that holds the mass spectrum that was acquired as the
422 // precursor that when fragmented gave a spectrum at spectrum_index;
423
424 // Get the node that contains the product_spectrum_index first.
425 MsRunDataSetTreeNode *node = nullptr;
426 node = findNode(product_spectrum_index);
427
428 // Now get the node that contains the precursor_spectrum_index.
429
430 return findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex());
431}
432
433std::vector<MsRunDataSetTreeNode *>
435 std::size_t precursor_spectrum_index)
436{
437 std::vector<MsRunDataSetTreeNode *> nodes;
438
439 // First get the node of the precursor spectrum index.
440
441 MsRunDataSetTreeNode *precursor_node = findNode(precursor_spectrum_index);
442
443 if(precursor_node == nullptr)
444 return nodes;
445
446 nodes.assign(precursor_node->m_children.begin(),
447 precursor_node->m_children.end());
448
449 return nodes;
450}
451
452std::vector<MsRunDataSetTreeNode *>
454 PrecisionPtr precision_ptr)
455{
456
457 // Find all the precursor nodes holding a mass spectrum that contained a
458 // precursor mz-value.
459
460 if(precision_ptr == nullptr)
462 "msrundatasettree.cpp -- ERROR precision_ptr cannot be nullptr.");
463
464 std::vector<MsRunDataSetTreeNode *> product_nodes;
465
466 // As a first step, find all the nodes that hold a mass spectrum that was
467 // acquired as a fragmentation spectrum of an ion of mz, that is, search all
468 // the product ion nodes for which precursor was mz.
469
470 for(auto &&node : m_rootNodes)
471 {
472 node->productNodesByPrecursorMz(mz, precision_ptr, product_nodes);
473 }
474
475 // Now, for each node found get the precursor node
476
477 std::vector<MsRunDataSetTreeNode *> precursor_nodes;
478
479 for(auto &&node : product_nodes)
480 {
481 precursor_nodes.push_back(
482 findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex()));
483 }
484
485 return precursor_nodes;
486}
487
488bool
490 MsRunDataSetTreeNode *node_p,
491 Enums::DataKind data_kind)
492{
493 // qDebug();
494
495 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
496 using DoubleNodeVectorMap = std::map<double, NodeVector>;
497 using MapPair = std::pair<double, NodeVector>;
498 using MapIterator = DoubleNodeVectorMap::iterator;
499
500 DoubleNodeVectorMap *map_p;
501
502 if(data_kind == Enums::DataKind::rt)
503 {
505 }
506 else if(data_kind == Enums::DataKind::dt)
507 {
509 }
510 else
511 qFatal("Programming error.");
512
513 // There are two possibilities:
514 //
515 // 1. The time was never encountered yet. We won't find it. We need to
516 // allocate a vector of Node's and set it associated to time in the map.
517 //
518 // 2. The time was encountered already, we will find it in the maps, we'll
519 // just push_back the Node in the vector of nodes.
520
521 MapIterator found_iterator = map_p->find(time);
522
523 if(found_iterator != map_p->end())
524 {
525 // The time value was encountered already.
526
527 found_iterator->second.push_back(node_p);
528
529 // qDebug() << "Found iterator for time:" << time;
530 }
531 else
532 {
533 // We need to create a new vector with the node.
534
535 NodeVector node_vector = {node_p};
536
537 map_p->insert(MapPair(time, node_vector));
538
539 // qDebug() << "Inserted new time:node_vector pair.";
540 }
541
542 return true;
543}
544
547 QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
548 MsRunDataSetTreeNode *parent_p)
549{
550 // qDebug();
551
552 // We want to add a mass spectrum. Either the parent_p argument is nullptr or
553 // not. If it is nullptr, then we just append the mass spectrum to the vector
554 // of root nodes. If it is not nullptr, we need to append the mass spectrum to
555 // that node.
556
557 MsRunDataSetTreeNode *new_node_p =
558 new MsRunDataSetTreeNode(mass_spectrum_csp, parent_p);
559
560 if(parent_p == nullptr)
561 {
562 m_rootNodes.push_back(new_node_p);
563
564 // qDebug() << "Pushed back" << new_node << "to root nodes:" <<
565 // &m_rootNodes;
566 }
567 else
568 {
569 parent_p->m_children.push_back(new_node_p);
570
571 // qDebug() << "Pushed back" << new_node << "with parent:" << parent_p;
572 }
573
575
576 // And now document that addition in the node index map.
577 m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
578 mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
579
580 // We also want to document the new node relating to the
581 // retention time.
582
584 mass_spectrum_csp->getRtInMinutes(), new_node_p, Enums::DataKind::rt);
585
586 // Likewise for the ion mobility time.
587
588 double ion_mobility_value = -1;
589
590 Enums::MsDataFormat file_format = mcsp_msRunId->getMsDataFormat();
591 bool ok = false;
592
593 if(file_format == Enums::MsDataFormat::brukerTims)
594 {
595 // Start by looking if there is a OneOverK0 valid value, which
596 // means we have effectively handled a genuine mobility scan spectrum.
597 QVariant ion_mobility_variant_value =
598 mass_spectrum_csp->getParameterValue(
600
601 if(ion_mobility_variant_value.isValid())
602 {
603 // Yes, genuine ion mobility scan handled here.
604
605 ion_mobility_value = ion_mobility_variant_value.toDouble(&ok);
606
607 if(!ok)
608 {
609 qFatal(
610 "The data are Bruker timsTOF data but failed to convert valid "
611 "QVariant 1/K0 value to double.");
612 }
613 }
614 else
615 {
616 // We are not handling a genuine single ion mobility scan here.
617 // We must be handling a mass spectrum that correspond to
618 // the combination of all the ion mobility scans for a single
619 // frame. This is when the user asks that ion mobility scans
620 // be flattended. In this case, the OneOverK0 value is not valid
621 // but there are two values that are set:
622 // TimsFrameInvKoBegin and TimsFrameInvKoEnd.
623 // See TimsFramesMsRunReader::readSpectrumCollection2.
624
625 // Test one of these values as a sanity check. But
626 // give the value of -1 for the ion mobility because we do not
627 // have ion mobility data here.
628
629 ion_mobility_variant_value = mass_spectrum_csp->getParameterValue(
631
632 if(!ion_mobility_variant_value.isValid())
633 {
634 qFatal(
635 "The data are Bruker timsTOF data but failed to get correct "
636 "ion mobility data. Inconsistency found.");
637 }
638 }
639 }
640 else
641 {
642 ion_mobility_value = mass_spectrum_csp->getDtInMilliSeconds();
643 }
644
645 if(ion_mobility_value != -1)
646 documentNodeInDtRtMap(ion_mobility_value, new_node_p, Enums::DataKind::dt);
647
648 // qDebug() << "New index/node map:"
649 //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
650 //<< new_node;
651
652 return new_node_p;
653}
654
657 QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
658 std::size_t precursor_spectrum_index)
659{
660 // qDebug();
661
662 // First get the node containing the mass spectrum that was acquired at index
663 // precursor_spectrum_index.
664
665 // qDebug() << "Need to find the precursor's mass spectrum node for precursor
666 // "
667 //"spectrum index:"
668 //<< precursor_spectrum_index;
669
670 MsRunDataSetTreeNode *mass_spec_data_node_p =
671 findNode(precursor_spectrum_index);
672
673 // qDebug() << "Found node" << mass_spec_data_node_p
674 //<< "for precursor index:" << precursor_spectrum_index;
675
676 if(mass_spec_data_node_p == nullptr)
677 {
679 "msrundatasettree.cpp -- ERROR could not find a a "
680 "tree node matching the index.");
681 }
682
683 // qDebug() << "Calling addMassSpectrum with parent node:"
684 //<< mass_spec_data_node_p;
685
686 return addMassSpectrum(mass_spectrum_csp, mass_spec_data_node_p);
687}
688
689std::size_t
691 double start, double end, NodeVector &nodes, Enums::DataKind data_kind) const
692{
693 // qDebug() << "Adding ms run data set tree nodes" << "inside of [" << start
694 // << "-" << end << "]"
695 // << dataKindMap[data_kind] << "range";
696
697 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
698 using DoubleNodeVectorMap = std::map<double, NodeVector>;
699 using MapIterator = DoubleNodeVectorMap::const_iterator;
700
701 const DoubleNodeVectorMap *map_p;
702
703 if(data_kind == Enums::DataKind::rt)
704 {
706 }
707 else if(data_kind == Enums::DataKind::dt)
708 {
710 }
711 else
712 qFatal("Programming error.");
713
714 std::size_t added_nodes = 0;
715
716 // Get the iterator to the map item that has the key greater or equal to
717 // start.
718
719 MapIterator start_iterator = map_p->lower_bound(start);
720
721 if(start_iterator == map_p->end())
722 return 0;
723
724 // Now get the end of the map useful range of items.
725
726 MapIterator end_iterator = map_p->upper_bound(end);
727
728 // Now that we have the iterator range, iterate in it and get the mass spectra
729 // from each item's pair.second node vector.
730
731 for(MapIterator iterator = start_iterator; iterator != end_iterator;
732 ++iterator)
733 {
734 // We are iterating in MapPair items.
735
736 NodeVector node_vector = iterator->second;
737
738 // All the nodes in the node vector need to be copied to the mass_spectra
739 // vector passed as parameter.
740
741 for(auto &&node_p : node_vector)
742 {
743 nodes.push_back(node_p);
744
745 ++added_nodes;
746 }
747 }
748
749 return added_nodes;
750}
751
752std::size_t
754 double start, double end, NodeVector &nodes, Enums::DataKind data_kind) const
755{
756 // qDebug() << "Removing ms run data set tree nodes" << "outside of [" <<
757 // start << "-" << end << "]"
758 // << dataKindMap[data_kind] << "range";
759
760 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
761 using NodeVectorIterator = NodeVector::iterator;
762
763 using DoubleNodeVectorMap = std::map<double, NodeVector>;
764 using MapIterator = DoubleNodeVectorMap::const_iterator;
765
766 const DoubleNodeVectorMap *map_p;
767
768 if(data_kind == Enums::DataKind::rt)
769 {
771 }
772 else if(data_kind == Enums::DataKind::dt)
773 {
775 }
776 else
777 qFatal("Programming error.");
778
779 std::size_t removed_vector_items = 0;
780
781 // We want to remove from the nodes vector all the nodes that contain a mass
782 // spectrum acquired at a time range outside of [ start-end ], that is, the
783 // time values [begin() - start [ and ]end -- end()[.
784
785 // Get the iterator to the map item that has the key less to
786 // start (we want to keep the map item having key == start).
787
788 MapIterator first_end_iterator = (*map_p).upper_bound(start);
789
790 // Now that we have the first_end_iterator, we can iterate between [begin --
791 // first_end_iterator[
792
793 for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
794 ++iterator)
795 {
796 // Remove from the nodes vector the nodes.
797
798 // We are iterating in MapPair items.
799
800 NodeVector node_vector = iterator->second;
801
802 // All the nodes in the node vector need to be removed from the
803 // mass_spectra vector passed as parameter if found.
804
805 for(auto &&node_p : node_vector)
806 {
807 NodeVectorIterator iterator =
808 std::find(nodes.begin(), nodes.end(), node_p);
809
810 if(iterator != nodes.end())
811 {
812 // We found the node: remove it.
813
814 nodes.erase(iterator);
815
816 ++removed_vector_items;
817 }
818 }
819 }
820
821 // Now the second begin iterator, so that we can remove all the items
822 // contained in the second range, that is, ]end--end()[.
823
824 // The second_first_iterator will point to the item having its time value less
825 // or equal to end. But we do not want to get items having their time equal to
826 // end, only < end. So, if the iterator is not begin(), we just need to
827 // decrement it once.
828 MapIterator second_first_iterator = map_p->upper_bound(end);
829 if(second_first_iterator != map_p->begin())
830 --second_first_iterator;
831
832 for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
833 ++iterator)
834 {
835 // We are iterating in MapPair items.
836
837 NodeVector node_vector = iterator->second;
838
839 // All the nodes in the node vector need to be removed from the
840 // mass_spectra vector passed as parameter if found.
841
842 for(auto &&node_p : node_vector)
843 {
844 NodeVectorIterator iterator =
845 std::find(nodes.begin(), nodes.end(), node_p);
846
847 if(iterator != nodes.end())
848 {
849 // We found the node: remove it.
850
851 nodes.erase(iterator);
852
853 ++removed_vector_items;
854 }
855 }
856 }
857
858 return removed_vector_items;
859}
860
861std::size_t
863 double start,
864 double end,
865 QualMassSpectraVector &mass_spectra,
866 Enums::DataKind data_kind) const
867{
868 // qDebug() << "Adding spectra" << "inside of [" << start << "-" << end << "]"
869 // << dataKindMap[data_kind] << "range";
870
871 QElapsedTimer timer;
872 timer.start();
873
874 // if(start == end)
875 // qDebug() << "Special case, start and end are equal:" << start;
876
877 // We will use the maps that relate rt | dt to a vector of data tree nodes.
878 // Indeed, we may have more than one mass spectrum acquired for a given rt, in
879 // case of ion mobility mass spectrometry. Same for dt: we will have as many
880 // spectra for each dt as there are retention time values...
881
882 using DoubleNodeVectorMap = std::map<double, NodeVector>;
883 using MapIterator = DoubleNodeVectorMap::const_iterator;
884
885 const DoubleNodeVectorMap *map_p;
886
887 if(data_kind == Enums::DataKind::rt)
888 {
890
891 // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
892 // start
893 //<< "end:" << end;
894 }
895 else if(data_kind == Enums::DataKind::dt)
896 {
898
899 // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
900 // start
901 //<< "end:" << end;
902 }
903 else
904 qFatal("Programming error.");
905
906 // QString msg = QString("The %1/mz map has size: %2 with start : %3 and end :
907 // %4\n")
908 // .arg(dataKindMap[data_kind])
909 // .arg(map_p->size())
910 // .arg(start)
911 // .arg(end);
912
913 // qDebug() << msg;
914
915 std::size_t added_mass_spectra = 0;
916
917 // Get the iterator to the map item that has the key greater or equal to
918 // start.
919
920 MapIterator start_iterator = map_p->lower_bound(start);
921
922 if(start_iterator == map_p->end())
923 {
924 // qDebug() << "The start iterator is end()!";
925 return 0;
926 }
927
928 // qDebug() << "The start_iterator points to:" << start_iterator->first << "as
929 // a"
930 // << dataKindMap[data_kind] << "time.";
931
932 // Now get the end of the map's useful range of items.
933
934 // Returns an iterator pointing to the first element in the container whose
935 // key is considered to go after 'end'.
936
937 MapIterator end_iterator = map_p->upper_bound(end);
938
939 // Immediately verify if there is no distance between start and end.
940 if(!std::distance(start_iterator, end_iterator))
941 {
942 // qDebug() << "No range of mass spectra could be selected.";
943 return 0;
944 }
945
946 if(end_iterator == map_p->end())
947 {
948 // qDebug() << "The end_iterator points to the end of the map."
949 // << "The last map item is prev() at" << dataKindMap[data_kind]
950 // << "key value: " << std::prev(end_iterator)->first;
951 }
952 else
953 {
954 // qDebug() << "The end_iterator points to:" << end_iterator->first << "as
955 // a"
956 // << dataKindMap[data_kind] << "time and the accounted key value
957 // is actually"
958 // << std::prev(end_iterator)->first;
959 }
960
961 // qDebug() << "The number of time values to iterate through the" <<
962 // dataKindMap[data_kind]
963 // << "/mz map:" << std::distance(start_iterator, end_iterator)
964 // << "with values: start: " << start_iterator->first
965 // << "and end: " << std::prev(end_iterator)->first;
966
967 // Now that we have the iterator range, iterate in it and get the mass
968 // spectra from each item's pair.second node vector.
969
970 for(MapIterator iterator = start_iterator; iterator != end_iterator;
971 ++iterator)
972 {
973 // We are iterating in DoubleNodeVectorMap MapPair items,
974 // with double = rt or dt and NodeVector being just that.
975
976 NodeVector node_vector = iterator->second;
977
978 // All the nodes' mass spectra in the node vector need to be copied to
979 // the mass_spectra vector passed as parameter.
980
981 for(auto &&node_p : node_vector)
982 {
983 QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp =
984 node_p->getQualifiedMassSpectrum();
985
986#if 0
987 // Sanity check only for deep debugging.
988
989 if(qualified_mass_spectrum_csp == nullptr ||
990 qualified_mass_spectrum_csp.get() == nullptr)
991 {
993 "The QualifiedMassSpectrumCstSPtr cannot be nullptr.");
994 }
995 else
996 {
997 //qDebug() << "Current mass spectrum is valid with rt:"
998 //<< qualified_mass_spectrum_csp->getRtInMinutes();
999 }
1000#endif
1001
1002 mass_spectra.push_back(qualified_mass_spectrum_csp);
1003
1004 ++added_mass_spectra;
1005 }
1006 }
1007
1008 // qint64 elapsed = timer.elapsed(); // milliseconds
1009 // qDebug() << "Took" << elapsed << "ms to add" << added_mass_spectra << "mass
1010 // spectra";
1011
1012 return added_mass_spectra;
1013}
1014
1015std::size_t
1017 double start,
1018 double end,
1019 QualMassSpectraVector &mass_spectra,
1020 Enums::DataKind data_kind) const
1021{
1022 // qDebug() << "Removing spectra" << "outside of [" << start << "-" << end <<
1023 // "]"
1024 // << dataKindMap[data_kind] << "range";
1025
1026 QElapsedTimer timer;
1027 timer.start();
1028
1029 using DoubleNodeVectorMap = std::map<double, NodeVector>;
1030 using MapIterator = DoubleNodeVectorMap::const_iterator;
1031
1032 const DoubleNodeVectorMap *map_p;
1033
1034 if(data_kind == Enums::DataKind::rt)
1035 {
1036 map_p = &m_rtDoubleNodeVectorMap;
1037
1038 // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
1039 // start
1040 //<< "end:" << end;
1041 }
1042 else if(data_kind == Enums::DataKind::dt)
1043 {
1044 map_p = &m_dtDoubleNodeVectorMap;
1045
1046 // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
1047 // start
1048 //<< "end:" << end;
1049 }
1050 else
1051 qFatal("Programming error.");
1052
1053 std::size_t removed_vector_items = 0;
1054
1055 // We want to remove from the nodes vector all the nodes that contain a mass
1056 // spectrum acquired at a time range outside of [ start-end ], that is, the
1057 // time values [begin() - start [ and ]end -- end()[.
1058
1059 // Looking for an iterator that points to an item having a time < start.
1060
1061 // lower_bound returns an iterator pointing to the first element in the
1062 // range [first, last) that is not less than (i.e. greater or equal to)
1063 // value, or last if no such element is found.
1064
1065 MapIterator first_end_iterator = (*map_p).lower_bound(start);
1066
1067 // first_end_iterator points to the item that has the next time value with
1068 // respect to start. This is fine because we'll not remove that point
1069 // because the for loop below will stop one item short of
1070 // first_end_iterator. That means that we effectively remove all the items
1071 // [begin() -> start[ (start not include). Exactly what we want.
1072
1073 // qDebug() << "lower_bound for start:" << first_end_iterator->first;
1074
1075 // Now that we have the first_end_iterator, we can iterate between [begin --
1076 // first_end_iterator[ and remove all the mass spectra listed in these
1077 // iterated nodes.
1078
1079 QSet<QualifiedMassSpectrumCstSPtr> set_of_mass_spectra_to_remove;
1080
1081 for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
1082 ++iterator)
1083 {
1084 // We are iterating in MapPair items.
1085
1086 NodeVector node_vector = iterator->second;
1087
1088 for(std::size_t index = 0; index < node_vector.size(); ++index)
1089 {
1090 set_of_mass_spectra_to_remove.insert(
1091 node_vector.at(index)->getQualifiedMassSpectrum());
1092
1093 ++removed_vector_items;
1094 }
1095 }
1096
1097 // Now the second begin iterator, so that we can remove all the items
1098 // contained in the second range, that is, ]end--end()[.
1099
1100 // The second_first_iterator will point to the item having its time value
1101 // less or equal to end. But we do not want to get items having their time
1102 // equal to end, only < end. So, if the iterator is not begin(), we just
1103 // need to decrement it once.
1104
1105 MapIterator second_first_iterator = map_p->upper_bound(end);
1106
1107 // second_first_iterator now points to the item after the one having time
1108 // end. Which is exactly what we want: we want to remove ]end--end()[ and
1109 // this is exactly what the loop starting a the point after end below.
1110
1111 // qDebug() << "second_first_iterator for end:" <<
1112 // second_first_iterator->first;
1113
1114 for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
1115 ++iterator)
1116 {
1117 // We are iterating in MapPair items.
1118
1119 NodeVector node_vector = iterator->second;
1120
1121
1122 for(std::size_t index = 0; index < node_vector.size(); ++index)
1123 {
1124 set_of_mass_spectra_to_remove.insert(
1125 node_vector.at(index)->getQualifiedMassSpectrum());
1126
1127 ++removed_vector_items;
1128 }
1129 }
1130
1131 // Now that we have the set of all the mass spectra to be removed from
1132 // mass_spectra...
1133
1134 mass_spectra.erase(
1135 std::remove_if(
1136 mass_spectra.begin(),
1137 mass_spectra.end(),
1138 [&set_of_mass_spectra_to_remove](QualifiedMassSpectrumCstSPtr &ptr) {
1139 return set_of_mass_spectra_to_remove.find(ptr) !=
1140 set_of_mass_spectra_to_remove.end();
1141 }),
1142 mass_spectra.end());
1143
1144 // qint64 elapsed = timer.elapsed(); // milliseconds
1145 // qDebug() << "Took" << elapsed << "ms to remove" << removed_vector_items <<
1146 // "mass spectra";
1147
1148 return removed_vector_items;
1149}
1150
1151std::size_t
1153{
1154 // We want to know what is the depth of the tree, that is the highest level
1155 // of MSn, that is, n.
1156
1157 if(!m_rootNodes.size())
1158 return 0;
1159
1160 // qDebug() << "There are" << m_rootNodes.size() << "root nodes";
1161
1162 // By essence, we are at MS0: only if we have at least one root node do we
1163 // know we have MS1 data. So we already know that we have at least one
1164 // child, so start with depth 1.
1165
1166 std::size_t depth = 1;
1167 std::size_t tmp_depth = 0;
1168 std::size_t greatest_depth = 0;
1169
1170 for(auto &node : m_rootNodes)
1171 {
1172 tmp_depth = node->depth(depth);
1173
1174 // qDebug() << "Returned depth:" << tmp_depth;
1175
1176 if(tmp_depth > greatest_depth)
1177 greatest_depth = tmp_depth;
1178 }
1179
1180 return greatest_depth;
1181}
1182
1183std::size_t
1185{
1186
1187 std::size_t cumulative_node_count = 0;
1188
1189 for(auto &node : m_rootNodes)
1190 {
1191 node->size(cumulative_node_count);
1192
1193 // qDebug() << "Returned node_count:" << node_count;
1194 }
1195
1196 return cumulative_node_count;
1197}
1198
1199std::size_t
1201{
1202 return m_indexNodeMap.size();
1203}
1204
1205std::size_t
1210
1211
1212} // namespace pappso
virtual void setNodesToProcessCount(std::size_t)=0
QualifiedMassSpectrumCstSPtr mcsp_massSpectrum
MsRunDataSetTreeNode * findNode(std::size_t spectrum_index)
std::vector< MsRunDataSetTreeNode * > m_children
MsRunDataSetTreeNode * findNode(QualifiedMassSpectrumCstSPtr mass_spectrum_csp) const
const std::vector< MsRunDataSetTreeNode * > & getRootNodes() const
std::vector< QualifiedMassSpectrumCstSPtr > QualMassSpectraVector
std::vector< MsRunDataSetTreeNode * > flattenedViewMsLevel(std::size_t ms_level, bool with_descendants=false)
std::size_t indexNodeMapSize() const
void accept(MsRunDataSetTreeNodeVisitorInterface &visitor)
MsRunDataSetTree(MsRunIdCstSPtr ms_run_id_csp)
std::vector< MsRunDataSetTreeNode * > flattenedView()
std::size_t addDataSetQualMassSpectraInsideDtOrRtRange(double start, double end, QualMassSpectraVector &mass_spectra, Enums::DataKind data_kind) const
bool documentNodeInDtRtMap(double time, MsRunDataSetTreeNode *node_p, Enums::DataKind data_kind)
std::size_t removeDataSetTreeNodesOutsideDtOrRtRange(double start, double end, NodeVector &nodes, Enums::DataKind data_kind) const
std::size_t getSpectrumCount() const
std::map< std::size_t, MsRunDataSetTreeNode * > m_indexNodeMap
std::vector< MsRunDataSetTreeNode * > m_rootNodes
std::map< double, NodeVector > DoubleNodeVectorMap
std::vector< MsRunDataSetTreeNode * > precursorNodesByPrecursorMz(pappso_double mz, PrecisionPtr precision_ptr)
std::vector< MsRunDataSetTreeNode * > NodeVector
MsRunDataSetTreeNode * precursorNodeByProductSpectrumIndex(std::size_t product_spectrum_index)
const std::map< std::size_t, MsRunDataSetTreeNode * > & getIndexNodeMap() const
MsRunDataSetTreeNode * addMassSpectrum(QualifiedMassSpectrumCstSPtr mass_spectrum)
std::size_t massSpectrumIndex(const MsRunDataSetTreeNode *node) const
DoubleNodeVectorMap m_rtDoubleNodeVectorMap
std::vector< MsRunDataSetTreeNode * > productNodesByPrecursorSpectrumIndex(std::size_t precursor_spectrum_index)
DoubleNodeVectorMap m_dtDoubleNodeVectorMap
std::size_t addDataSetTreeNodesInsideDtOrRtRange(double start, double end, NodeVector &nodes, Enums::DataKind data_kind) const
std::size_t removeDataSetQualMassSpectraOutsideDtOrRtRange(double start, double end, QualMassSpectraVector &mass_spectra, Enums::DataKind data_kind) const
@ dt
Drift time.
Definition types.h:252
@ rt
Retention time.
Definition types.h:251
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
double pappso_double
A type definition for doubles.
Definition types.h:60
std::shared_ptr< const QualifiedMassSpectrum > QualifiedMassSpectrumCstSPtr
const PrecisionBase * PrecisionPtr
Definition precision.h:122