Panda3D
 All Classes Functions Variables Enumerations
stringDecoder.cxx
1 // Filename: stringDecoder.cxx
2 // Created by: drose (11Feb02)
3 //
4 ////////////////////////////////////////////////////////////////////
5 //
6 // PANDA 3D SOFTWARE
7 // Copyright (c) Carnegie Mellon University. All rights reserved.
8 //
9 // All use of this software is subject to the terms of the revised BSD
10 // license. You should have received a copy of this license along
11 // with this source code in a file named "LICENSE."
12 //
13 ////////////////////////////////////////////////////////////////////
14 
15 #include "stringDecoder.h"
16 #include "config_dtoolutil.h"
17 
18 ostream *StringDecoder::_notify_ptr = &cerr;
19 
20 ////////////////////////////////////////////////////////////////////
21 // Function: StringDecoder::Destructor
22 // Access: Public, Virtual
23 // Description:
24 ////////////////////////////////////////////////////////////////////
25 StringDecoder::
26 ~StringDecoder() {
27 }
28 
29 ////////////////////////////////////////////////////////////////////
30 // Function: StringDecoder::get_next_character
31 // Access: Public, Virtual
32 // Description: Returns the next character in sequence.
33 ////////////////////////////////////////////////////////////////////
36  if (test_eof()) {
37  return -1;
38  }
39  return (unsigned char)_input[_p++];
40 }
41 
42 ////////////////////////////////////////////////////////////////////
43 // Function: StringDecoder::set_notify_ptr
44 // Access: Public, Static
45 // Description: Sets the ostream that is used to write error messages
46 // to. This is necessary because of the low-level
47 // placement of this class, before the definition of the
48 // NotifyCategory class, so it cannot specify its own
49 // notify.
50 ////////////////////////////////////////////////////////////////////
51 void StringDecoder::
52 set_notify_ptr(ostream *notify_ptr) {
53  _notify_ptr = notify_ptr;
54 }
55 
56 ////////////////////////////////////////////////////////////////////
57 // Function: StringDecoder::get_notify_ptr
58 // Access: Public, Static
59 // Description: Returns the ostream that is used to write error messages
60 // to. See set_notify_ptr().
61 ////////////////////////////////////////////////////////////////////
62 ostream *StringDecoder::
64  return _notify_ptr;
65 }
66 
67 
68 /*
69 In UTF-8, each 16-bit Unicode character is encoded as a sequence of
70 one, two, or three 8-bit bytes, depending on the value of the
71 character. The following table shows the format of such UTF-8 byte
72 sequences (where the "free bits" shown by x's in the table are
73 combined in the order shown, and interpreted from most significant to
74 least significant):
75 
76  Binary format of bytes in sequence:
77  Number of Maximum expressible
78  1st byte 2nd byte 3rd byte free bits: Unicode value:
79 
80  0xxxxxxx 7 007F hex (127)
81  110xxxxx 10xxxxxx (5+6)=11 07FF hex (2047)
82  1110xxxx 10xxxxxx 10xxxxxx (4+6+6)=16 FFFF hex (65535)
83 
84 The value of each individual byte indicates its UTF-8 function, as follows:
85 
86  00 to 7F hex (0 to 127): first and only byte of a sequence.
87  80 to BF hex (128 to 191): continuing byte in a multi-byte sequence.
88  C2 to DF hex (194 to 223): first byte of a two-byte sequence.
89  E0 to EF hex (224 to 239): first byte of a three-byte sequence.
90 */
91 
92 ////////////////////////////////////////////////////////////////////
93 // Function: StringUtf8Decoder::get_next_character
94 // Access: Public, Virtual
95 // Description: Returns the next character in sequence.
96 ////////////////////////////////////////////////////////////////////
99  unsigned int result;
100  while (!test_eof()) {
101  result = (unsigned char)_input[_p++];
102  if ((result & 0x80) == 0) {
103  // A 7-bit ascii value in one byte.
104  return result;
105 
106  } if ((result & 0xe0) == 0xc0) {
107  // First byte of two.
108  unsigned int two = 0;
109  if (test_eof()) {
110  if (_notify_ptr != NULL) {
111  (*_notify_ptr)
112  << "utf-8 encoded string '" << _input << "' ends abruptly.\n";
113  }
114  return -1;
115  }
116  two = (unsigned char)_input[_p++];
117  result = ((result & 0x1f) << 6) | (two & 0x3f);
118  return result;
119 
120  } else if ((result & 0xf0) == 0xe0) {
121  // First byte of three.
122  if (test_eof()) {
123  if (_notify_ptr != NULL) {
124  (*_notify_ptr)
125  << "utf-8 encoded string '" << _input << "' ends abruptly.\n";
126  }
127  return -1;
128  }
129  unsigned int two = (unsigned char)_input[_p++];
130  if (test_eof()) {
131  if (_notify_ptr != NULL) {
132  (*_notify_ptr)
133  << "utf-8 encoded string '" << _input << "' ends abruptly.\n";
134  }
135  return -1;
136  }
137  unsigned int three = (unsigned char)_input[_p++];
138  result = ((result & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
139  return result;
140  }
141 
142  // Otherwise--the high bit is set but it is not one of the
143  // introductory utf-8 bytes--we have an error.
144  if (_notify_ptr != NULL) {
145  (*_notify_ptr)
146  << "Non utf-8 byte in string: 0x" << hex << result << dec
147  << ", string is '" << _input << "'\n";
148  }
149  return -1;
150  }
151 
152  // End of string reached.
153  return -1;
154 }
155 
156 ////////////////////////////////////////////////////////////////////
157 // Function: StringUnicodeDecoder::get_next_character
158 // Access: Public, Virtual
159 // Description: Returns the next character in sequence.
160 ////////////////////////////////////////////////////////////////////
163  if (test_eof()) {
164  return -1;
165  }
166 
167  unsigned int high = (unsigned char)_input[_p++];
168  if (test_eof()) {
169  if (_notify_ptr != NULL) {
170  (*_notify_ptr)
171  << "Unicode-encoded string has odd number of bytes.\n";
172  }
173  return -1;
174  }
175  unsigned int low = (unsigned char)_input[_p++];
176  return ((high << 8) | low);
177 }
virtual int get_next_character()
Returns the next character in sequence.
virtual int get_next_character()
Returns the next character in sequence.
virtual int get_next_character()
Returns the next character in sequence.
static ostream * get_notify_ptr()
Returns the ostream that is used to write error messages to.
static void set_notify_ptr(ostream *ptr)
Sets the ostream that is used to write error messages to.