SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
csa_bitcompressed.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_CSA_UNCOMPRESSED
9#define INCLUDED_SDSL_CSA_UNCOMPRESSED
10
11#include <iostream>
12#include <stddef.h>
13#include <stdint.h>
14#include <string>
15
16#include <sdsl/cereal.hpp>
17#include <sdsl/config.hpp>
20#include <sdsl/int_vector.hpp>
22#include <sdsl/io.hpp>
23#include <sdsl/iterators.hpp>
27#include <sdsl/util.hpp>
28
29namespace sdsl
30{
31
33
48template <class t_alphabet_strat = byte_alphabet>
50{
52
53public:
54 typedef uint64_t value_type; // STL Container requirement
56 typedef const_iterator iterator; // STL Container requirement
60 typedef const pointer const_pointer;
61 typedef int_vector<>::size_type size_type; // STL Container requirement
63 typedef ptrdiff_t difference_type; // STL Container requirement
72 typedef t_alphabet_strat alphabet_type;
73 typedef typename alphabet_type::char_type char_type; // Note: This is the char type of the CSA not the WT!
74 typedef typename alphabet_type::comp_char_type comp_char_type;
75 typedef typename alphabet_type::string_type string_type;
76 typedef typename alphabet_type::alphabet_category alphabet_category;
78
81
82 enum
83 {
86 };
87
88private:
89 sa_sample_type m_sa; // vector for suffix array values
90 isa_sample_type m_isa; // vector for inverse suffix array values
91 alphabet_type m_alphabet;
92
93public:
94 const typename alphabet_type::char2comp_type & char2comp = m_alphabet.char2comp;
95 const typename alphabet_type::comp2char_type & comp2char = m_alphabet.comp2char;
96 const typename alphabet_type::C_type & C = m_alphabet.C;
97 const typename alphabet_type::sigma_type & sigma = m_alphabet.sigma;
98 const psi_type psi = psi_type(*this);
99 const lf_type lf = lf_type(*this);
100 const bwt_type bwt = bwt_type(*this);
101 const bwt_type L = bwt_type(*this);
102 isa_type const & isa = m_isa;
104 const text_type text = text_type(*this);
105 sa_sample_type const & sa_sample = m_sa;
107
112 csa_bitcompressed(csa_bitcompressed const & csa) : m_sa(csa.m_sa), m_isa(csa.m_isa), m_alphabet(csa.m_alphabet)
113 {}
114
117 {
118 *this = std::move(csa);
119 }
120
123 {
124 std::string text_file = cache_file_name(key_text<alphabet_type::int_width>(), config);
127 size_type n = text_buf.size();
128
129 m_alphabet = alphabet_type(text_buf, n);
130 m_sa = sa_sample_type(config);
131 m_isa = isa_sample_type(config);
132 }
133
135
139 {
140 return m_sa.size();
141 }
142
144
148 {
149 return int_vector<>::max_size();
150 }
151
153
156 bool empty() const
157 {
158 return m_sa.empty();
159 }
160
162
166 {
167 return const_iterator(this, 0);
168 }
169
171
175 {
176 return const_iterator(this, size());
177 }
178
180
185 {
186 return m_sa[i];
187 }
188
190
194 {
195 if (this != &csa)
196 {
197 csa_bitcompressed tmp(csa);
198 *this = std::move(tmp);
199 }
200 return *this;
201 }
202
204
208 {
209 if (this != &csa)
210 {
211 m_sa = std::move(csa.m_sa);
212 m_isa = std::move(csa.m_isa);
213 m_alphabet = std::move(csa.m_alphabet);
214 }
215 return *this;
216 }
217
219 bool operator==(csa_bitcompressed const & other) const noexcept
220 {
221 return (m_sa == other.m_sa) && (m_isa == other.m_isa) && (m_alphabet == other.m_alphabet);
222 }
223
225 bool operator!=(csa_bitcompressed const & other) const noexcept
226 {
227 return !(*this == other);
228 }
229
231
234 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
235 {
236 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
237 size_type written_bytes = 0;
238 written_bytes += m_sa.serialize(out, child, "m_sa");
239 written_bytes += m_isa.serialize(out, child, "m_isa");
240 written_bytes += m_alphabet.serialize(out, child, "m_alphabet");
241 structure_tree::add_size(child, written_bytes);
242 return written_bytes;
243 }
244
245 void load(std::istream & in)
246 {
247 m_sa.load(in);
248 m_isa.load(in);
249 m_alphabet.load(in);
250 }
251
252 template <typename archive_t>
253 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
254 {
255 ar(CEREAL_NVP(m_sa));
256 ar(CEREAL_NVP(m_isa));
257 ar(CEREAL_NVP(m_alphabet));
258 }
259
260 template <typename archive_t>
261 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
262 {
263 ar(CEREAL_NVP(m_sa));
264 ar(CEREAL_NVP(m_isa));
265 ar(CEREAL_NVP(m_alphabet));
266 }
267
269 {
270 return 1;
271 }
272
273private:
274 // Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text.
275 /*
276 * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
277 * \param c The symbol to count the occurrences in the prefix.
278 * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
279 * \par Time complexity
280 * \f$ \Order{\log n} \f$
281 */
282 size_type rank_bwt(size_type i, const char_type c) const
283 {
284 // TODO: special case if c == BWT[i-1] we can use LF to get a constant time answer
286 if (cc == 0 and c != 0) // character is not in the text => return 0
287 return 0;
288 // binary search the interval [C[cc]..C[cc+1]-1] for the result
289 size_type lower_b = C[cc],
290 upper_b = C[((size_type)1) + cc]; // lower_b inclusive, upper_b exclusive
291 while (lower_b + 1 < upper_b)
292 {
293 size_type mid = (lower_b + upper_b) / 2;
294 if (psi[mid] >= i)
295 upper_b = mid;
296 else
297 lower_b = mid;
298 }
299 if (lower_b > C[cc])
300 return lower_b - C[cc] + 1;
301 else
302 { // lower_b == m_C[cc]
303 return psi[lower_b] < i; // 1 if psi[lower_b]<i, 0 otherwise
304 }
305 }
306
307 // Calculates the i-th occurrence of symbol c in the BWT of the original text.
308 /*
309 * \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
310 * \param c Character c.
311 * \returns The i-th occurrence of c in the BWT or size() if c does
312 * not occur t times in BWT>
313 * \par Time complexity
314 * \f$ \Order{t_{\Psi}} \f$
315 */
316 size_type select_bwt(size_type i, const char_type c) const
317 {
319 if (cc == 0 and c != 0) // character is not in the text => return size()
320 return size();
321 if (C[cc] + i - 1 < C[((size_type)1) + cc])
322 {
323 return psi[C[cc] + i - 1];
324 }
325 return size();
326 }
327};
328
329} // end namespace sdsl
330#endif
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
A wrapper for the bwt of a compressed suffix array that is based on the function.
A class for the uncompressed suffix array (SA).
bwt_of_csa_psi< csa_bitcompressed > bwt_type
csa_bitcompressed()
Default constructor.
sa_sample_type const & sa_sample
const alphabet_type::char2comp_type & char2comp
bool operator==(csa_bitcompressed const &other) const noexcept
Equality operator.
traverse_csa_saisa< csa_bitcompressed, false > lf_type
const alphabet_type::C_type & C
const alphabet_type::comp2char_type & comp2char
_isa_sampling< csa_bitcompressed, 0 > isa_sample_type
void load(std::istream &in)
_sa_order_sampling< csa_bitcompressed, 0 > sa_sample_type
bool empty() const
Returns if the data structure is empty.
size_type get_sample_dens() const
value_type operator[](size_type i) const
[]-operator
const alphabet_type::sigma_type & sigma
bool operator!=(csa_bitcompressed const &other) const noexcept
Inequality operator.
csa_bitcompressed & operator=(csa_bitcompressed &&csa)
Assignment Move Operator.
size_type size() const
Number of elements in the instance.
alphabet_type::string_type string_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
csa_bitcompressed(csa_bitcompressed &&csa)
Move constructor.
csa_bitcompressed(cache_config &config)
Constructor.
random_access_const_iterator< csa_bitcompressed > const_iterator
traverse_csa_saisa< csa_bitcompressed, true > psi_type
text_of_csa< csa_bitcompressed > text_type
alphabet_type::char_type char_type
const_iterator begin() const
Returns a const_iterator to the first element.
isa_sample_type const & isa_sample
csa_bitcompressed(csa_bitcompressed const &csa)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
csa_bitcompressed & operator=(csa_bitcompressed const &csa)
Assignment Operator.
const_iterator end() const
Returns a const_iterator to the element after the last element.
const value_type const_reference
static size_type max_size()
Returns the largest size that csa_bitcompressed can ever have.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
alphabet_type::comp_char_type comp_char_type
first_row_of_csa< csa_bitcompressed > first_row_type
int_vector ::size_type size_type
alphabet_type::alphabet_category alphabet_category
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
static size_type max_size() noexcept
Maximum size of the int_vector.
Generic iterator for a random access container.
Definition iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A helper class for the function for (compressed) suffix arrays which provide also the inverse suffix...
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
csa_sampling_strategy.hpp includes different strategy classes for suffix array sampling in the CSAs.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
constexpr char KEY_SA[]
Definition config.hpp:36
Namespace for the succinct data structure library.
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
Contains declarations and definitions of data structure concepts.
Helper class for construction process.
Definition config.hpp:66
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
suffix_array_helper.hpp contains some helper classes for CSTs
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.