libstdc++
codecvt.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 // 2009, 2010, 2011 Free Software Foundation, Inc.
5 //
6 // This file is part of the GNU ISO C++ Library. This library is free
7 // software; you can redistribute it and/or modify it under the
8 // terms of the GNU General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option)
10 // any later version.
11 
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 
17 // Under Section 7 of GPL version 3, you are granted additional
18 // permissions described in the GCC Runtime Library Exception, version
19 // 3.1, as published by the Free Software Foundation.
20 
21 // You should have received a copy of the GNU General Public License and
22 // a copy of the GCC Runtime Library Exception along with this program;
23 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 // <http://www.gnu.org/licenses/>.
25 
26 /** @file bits/codecvt.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{locale}
29  */
30 
31 //
32 // ISO C++ 14882: 22.2.1.5 Template class codecvt
33 //
34 
35 // Written by Benjamin Kosnik <bkoz@redhat.com>
36 
37 #ifndef _CODECVT_H
38 #define _CODECVT_H 1
39 
40 #pragma GCC system_header
41 
42 namespace std _GLIBCXX_VISIBILITY(default)
43 {
44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
45 
46  /// Empty base class for codecvt facet [22.2.1.5].
48  {
49  public:
50  enum result
51  {
52  ok,
53  partial,
54  error,
55  noconv
56  };
57  };
58 
59  /**
60  * @brief Common base for codecvt functions.
61  *
62  * This template class provides implementations of the public functions
63  * that forward to the protected virtual functions.
64  *
65  * This template also provides abstract stubs for the protected virtual
66  * functions.
67  */
68  template<typename _InternT, typename _ExternT, typename _StateT>
70  : public locale::facet, public codecvt_base
71  {
72  public:
73  // Types:
74  typedef codecvt_base::result result;
75  typedef _InternT intern_type;
76  typedef _ExternT extern_type;
77  typedef _StateT state_type;
78 
79  // 22.2.1.5.1 codecvt members
80  /**
81  * @brief Convert from internal to external character set.
82  *
83  * Converts input string of intern_type to output string of
84  * extern_type. This is analogous to wcsrtombs. It does this by
85  * calling codecvt::do_out.
86  *
87  * The source and destination character sets are determined by the
88  * facet's locale, internal and external types.
89  *
90  * The characters in [from,from_end) are converted and written to
91  * [to,to_end). from_next and to_next are set to point to the
92  * character following the last successfully converted character,
93  * respectively. If the result needed no conversion, from_next and
94  * to_next are not affected.
95  *
96  * The @a state argument should be initialized if the input is at the
97  * beginning and carried from a previous call if continuing
98  * conversion. There are no guarantees about how @a state is used.
99  *
100  * The result returned is a member of codecvt_base::result. If
101  * all the input is converted, returns codecvt_base::ok. If no
102  * conversion is necessary, returns codecvt_base::noconv. If
103  * the input ends early or there is insufficient space in the
104  * output, returns codecvt_base::partial. Otherwise the
105  * conversion failed and codecvt_base::error is returned.
106  *
107  * @param state Persistent conversion state data.
108  * @param from Start of input.
109  * @param from_end End of input.
110  * @param from_next Returns start of unconverted data.
111  * @param to Start of output buffer.
112  * @param to_end End of output buffer.
113  * @param to_next Returns start of unused output area.
114  * @return codecvt_base::result.
115  */
116  result
117  out(state_type& __state, const intern_type* __from,
118  const intern_type* __from_end, const intern_type*& __from_next,
119  extern_type* __to, extern_type* __to_end,
120  extern_type*& __to_next) const
121  {
122  return this->do_out(__state, __from, __from_end, __from_next,
123  __to, __to_end, __to_next);
124  }
125 
126  /**
127  * @brief Reset conversion state.
128  *
129  * Writes characters to output that would restore @a state to initial
130  * conditions. The idea is that if a partial conversion occurs, then
131  * the converting the characters written by this function would leave
132  * the state in initial conditions, rather than partial conversion
133  * state. It does this by calling codecvt::do_unshift().
134  *
135  * For example, if 4 external characters always converted to 1 internal
136  * character, and input to in() had 6 external characters with state
137  * saved, this function would write two characters to the output and
138  * set the state to initialized conditions.
139  *
140  * The source and destination character sets are determined by the
141  * facet's locale, internal and external types.
142  *
143  * The result returned is a member of codecvt_base::result. If the
144  * state could be reset and data written, returns codecvt_base::ok. If
145  * no conversion is necessary, returns codecvt_base::noconv. If the
146  * output has insufficient space, returns codecvt_base::partial.
147  * Otherwise the reset failed and codecvt_base::error is returned.
148  *
149  * @param state Persistent conversion state data.
150  * @param to Start of output buffer.
151  * @param to_end End of output buffer.
152  * @param to_next Returns start of unused output area.
153  * @return codecvt_base::result.
154  */
155  result
156  unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
157  extern_type*& __to_next) const
158  { return this->do_unshift(__state, __to,__to_end,__to_next); }
159 
160  /**
161  * @brief Convert from external to internal character set.
162  *
163  * Converts input string of extern_type to output string of
164  * intern_type. This is analogous to mbsrtowcs. It does this by
165  * calling codecvt::do_in.
166  *
167  * The source and destination character sets are determined by the
168  * facet's locale, internal and external types.
169  *
170  * The characters in [from,from_end) are converted and written to
171  * [to,to_end). from_next and to_next are set to point to the
172  * character following the last successfully converted character,
173  * respectively. If the result needed no conversion, from_next and
174  * to_next are not affected.
175  *
176  * The @a state argument should be initialized if the input is at the
177  * beginning and carried from a previous call if continuing
178  * conversion. There are no guarantees about how @a state is used.
179  *
180  * The result returned is a member of codecvt_base::result. If
181  * all the input is converted, returns codecvt_base::ok. If no
182  * conversion is necessary, returns codecvt_base::noconv. If
183  * the input ends early or there is insufficient space in the
184  * output, returns codecvt_base::partial. Otherwise the
185  * conversion failed and codecvt_base::error is returned.
186  *
187  * @param state Persistent conversion state data.
188  * @param from Start of input.
189  * @param from_end End of input.
190  * @param from_next Returns start of unconverted data.
191  * @param to Start of output buffer.
192  * @param to_end End of output buffer.
193  * @param to_next Returns start of unused output area.
194  * @return codecvt_base::result.
195  */
196  result
197  in(state_type& __state, const extern_type* __from,
198  const extern_type* __from_end, const extern_type*& __from_next,
199  intern_type* __to, intern_type* __to_end,
200  intern_type*& __to_next) const
201  {
202  return this->do_in(__state, __from, __from_end, __from_next,
203  __to, __to_end, __to_next);
204  }
205 
206  int
207  encoding() const throw()
208  { return this->do_encoding(); }
209 
210  bool
211  always_noconv() const throw()
212  { return this->do_always_noconv(); }
213 
214  int
215  length(state_type& __state, const extern_type* __from,
216  const extern_type* __end, size_t __max) const
217  { return this->do_length(__state, __from, __end, __max); }
218 
219  int
220  max_length() const throw()
221  { return this->do_max_length(); }
222 
223  protected:
224  explicit
225  __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
226 
227  virtual
228  ~__codecvt_abstract_base() { }
229 
230  /**
231  * @brief Convert from internal to external character set.
232  *
233  * Converts input string of intern_type to output string of
234  * extern_type. This function is a hook for derived classes to change
235  * the value returned. @see out for more information.
236  */
237  virtual result
238  do_out(state_type& __state, const intern_type* __from,
239  const intern_type* __from_end, const intern_type*& __from_next,
240  extern_type* __to, extern_type* __to_end,
241  extern_type*& __to_next) const = 0;
242 
243  virtual result
244  do_unshift(state_type& __state, extern_type* __to,
245  extern_type* __to_end, extern_type*& __to_next) const = 0;
246 
247  virtual result
248  do_in(state_type& __state, const extern_type* __from,
249  const extern_type* __from_end, const extern_type*& __from_next,
250  intern_type* __to, intern_type* __to_end,
251  intern_type*& __to_next) const = 0;
252 
253  virtual int
254  do_encoding() const throw() = 0;
255 
256  virtual bool
257  do_always_noconv() const throw() = 0;
258 
259  virtual int
260  do_length(state_type&, const extern_type* __from,
261  const extern_type* __end, size_t __max) const = 0;
262 
263  virtual int
264  do_max_length() const throw() = 0;
265  };
266 
267 
268 
269  /**
270  * @brief Primary class template codecvt.
271  * @ingroup locales
272  *
273  * NB: Generic, mostly useless implementation.
274  *
275  */
276  template<typename _InternT, typename _ExternT, typename _StateT>
277  class codecvt
278  : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
279  {
280  public:
281  // Types:
282  typedef codecvt_base::result result;
283  typedef _InternT intern_type;
284  typedef _ExternT extern_type;
285  typedef _StateT state_type;
286 
287  protected:
288  __c_locale _M_c_locale_codecvt;
289 
290  public:
291  static locale::id id;
292 
293  explicit
294  codecvt(size_t __refs = 0)
296 
297  explicit
298  codecvt(__c_locale __cloc, size_t __refs = 0);
299 
300  protected:
301  virtual
302  ~codecvt() { }
303 
304  virtual result
305  do_out(state_type& __state, const intern_type* __from,
306  const intern_type* __from_end, const intern_type*& __from_next,
307  extern_type* __to, extern_type* __to_end,
308  extern_type*& __to_next) const;
309 
310  virtual result
311  do_unshift(state_type& __state, extern_type* __to,
312  extern_type* __to_end, extern_type*& __to_next) const;
313 
314  virtual result
315  do_in(state_type& __state, const extern_type* __from,
316  const extern_type* __from_end, const extern_type*& __from_next,
317  intern_type* __to, intern_type* __to_end,
318  intern_type*& __to_next) const;
319 
320  virtual int
321  do_encoding() const throw();
322 
323  virtual bool
324  do_always_noconv() const throw();
325 
326  virtual int
327  do_length(state_type&, const extern_type* __from,
328  const extern_type* __end, size_t __max) const;
329 
330  virtual int
331  do_max_length() const throw();
332  };
333 
334  template<typename _InternT, typename _ExternT, typename _StateT>
336 
337  /// class codecvt<char, char, mbstate_t> specialization.
338  template<>
339  class codecvt<char, char, mbstate_t>
340  : public __codecvt_abstract_base<char, char, mbstate_t>
341  {
342  public:
343  // Types:
344  typedef char intern_type;
345  typedef char extern_type;
346  typedef mbstate_t state_type;
347 
348  protected:
349  __c_locale _M_c_locale_codecvt;
350 
351  public:
352  static locale::id id;
353 
354  explicit
355  codecvt(size_t __refs = 0);
356 
357  explicit
358  codecvt(__c_locale __cloc, size_t __refs = 0);
359 
360  protected:
361  virtual
362  ~codecvt();
363 
364  virtual result
365  do_out(state_type& __state, const intern_type* __from,
366  const intern_type* __from_end, const intern_type*& __from_next,
367  extern_type* __to, extern_type* __to_end,
368  extern_type*& __to_next) const;
369 
370  virtual result
371  do_unshift(state_type& __state, extern_type* __to,
372  extern_type* __to_end, extern_type*& __to_next) const;
373 
374  virtual result
375  do_in(state_type& __state, const extern_type* __from,
376  const extern_type* __from_end, const extern_type*& __from_next,
377  intern_type* __to, intern_type* __to_end,
378  intern_type*& __to_next) const;
379 
380  virtual int
381  do_encoding() const throw();
382 
383  virtual bool
384  do_always_noconv() const throw();
385 
386  virtual int
387  do_length(state_type&, const extern_type* __from,
388  const extern_type* __end, size_t __max) const;
389 
390  virtual int
391  do_max_length() const throw();
392  };
393 
394 #ifdef _GLIBCXX_USE_WCHAR_T
395  /// class codecvt<wchar_t, char, mbstate_t> specialization.
396  template<>
397  class codecvt<wchar_t, char, mbstate_t>
398  : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
399  {
400  public:
401  // Types:
402  typedef wchar_t intern_type;
403  typedef char extern_type;
404  typedef mbstate_t state_type;
405 
406  protected:
407  __c_locale _M_c_locale_codecvt;
408 
409  public:
410  static locale::id id;
411 
412  explicit
413  codecvt(size_t __refs = 0);
414 
415  explicit
416  codecvt(__c_locale __cloc, size_t __refs = 0);
417 
418  protected:
419  virtual
420  ~codecvt();
421 
422  virtual result
423  do_out(state_type& __state, const intern_type* __from,
424  const intern_type* __from_end, const intern_type*& __from_next,
425  extern_type* __to, extern_type* __to_end,
426  extern_type*& __to_next) const;
427 
428  virtual result
429  do_unshift(state_type& __state,
430  extern_type* __to, extern_type* __to_end,
431  extern_type*& __to_next) const;
432 
433  virtual result
434  do_in(state_type& __state,
435  const extern_type* __from, const extern_type* __from_end,
436  const extern_type*& __from_next,
437  intern_type* __to, intern_type* __to_end,
438  intern_type*& __to_next) const;
439 
440  virtual
441  int do_encoding() const throw();
442 
443  virtual
444  bool do_always_noconv() const throw();
445 
446  virtual
447  int do_length(state_type&, const extern_type* __from,
448  const extern_type* __end, size_t __max) const;
449 
450  virtual int
451  do_max_length() const throw();
452  };
453 #endif //_GLIBCXX_USE_WCHAR_T
454 
455  /// class codecvt_byname [22.2.1.6].
456  template<typename _InternT, typename _ExternT, typename _StateT>
457  class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
458  {
459  public:
460  explicit
461  codecvt_byname(const char* __s, size_t __refs = 0)
463  {
464  if (__builtin_strcmp(__s, "C") != 0
465  && __builtin_strcmp(__s, "POSIX") != 0)
466  {
467  this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
468  this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
469  }
470  }
471 
472  protected:
473  virtual
474  ~codecvt_byname() { }
475  };
476 
477  // Inhibit implicit instantiations for required instantiations,
478  // which are defined via explicit instantiations elsewhere.
479 #if _GLIBCXX_EXTERN_TEMPLATE
480  extern template class codecvt_byname<char, char, mbstate_t>;
481 
482  extern template
484  use_facet<codecvt<char, char, mbstate_t> >(const locale&);
485 
486  extern template
487  bool
488  has_facet<codecvt<char, char, mbstate_t> >(const locale&);
489 
490 #ifdef _GLIBCXX_USE_WCHAR_T
491  extern template class codecvt_byname<wchar_t, char, mbstate_t>;
492 
493  extern template
495  use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
496 
497  extern template
498  bool
499  has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
500 #endif
501 #endif
502 
503 _GLIBCXX_END_NAMESPACE_VERSION
504 } // namespace std
505 
506 #endif // _CODECVT_H
Localization functionality base class.The facet class is the base class for a localization feature...
Extension to use iconv for dealing with character encodings.
result in(state_type &__state, const extern_type *__from, const extern_type *__from_end, const extern_type *&__from_next, intern_type *__to, intern_type *__to_end, intern_type *&__to_next) const
Convert from external to internal character set.
Definition: codecvt.h:197
virtual result do_out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const =0
Convert from internal to external character set.
class codecvt<char, char, mbstate_t> specialization.
Definition: codecvt.h:339
result out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Definition: codecvt.h:117
class codecvt_byname [22.2.1.6].
Definition: codecvt.h:457
Container class for localization functionality.The locale class is first a class wrapper for C librar...
result unshift(state_type &__state, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Reset conversion state.
Definition: codecvt.h:156
Empty base class for codecvt facet [22.2.1.5].
Definition: codecvt.h:47
Primary class template codecvt.NB: Generic, mostly useless implementation.
Definition: codecvt.h:277
Facet ID class.The ID class provides facets with an index used to identify them. Every facet class mu...
Common base for codecvt functions.
Definition: codecvt.h:69
facet(size_t __refs=0)
Facet constructor.
class codecvt<wchar_t, char, mbstate_t> specialization.
Definition: codecvt.h:397