LibMusicXML  3.18
bsrBasicTypes.h
1 /*
2  MusicXML Library
3  Copyright (C) Grame 2006-2013
4 
5  This Source Code Form is subject to the terms of the Mozilla Public
6  License, v. 2.0. If a copy of the MPL was not distributed with this
7  file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
9  Grame Research Laboratory, 11, cours de Verdun Gensoul 69002 Lyon - France
10  research@grame.fr
11 */
12 
13 #ifndef ___bsrBasicTypes___
14 #define ___bsrBasicTypes___
15 
16 #include <list>
17 #include <map>
18 
19 #include "msrBasicTypes.h"
20 
21 
22 namespace MusicXML2
23 {
24 
25 // cell kinds
26 //______________________________________________________________________________
27 enum bsrCellKind {
28  kCellUnknown,
29 
30  // non 6dots values
31  kCellEOL , // L'\u000a'
32  kCellEOP , // L'\u000c'
33 
34  // 6dots values for Braille music
35  kDotsNone , // L'\u2800'
36  kDots1 , // L'\u2801'
37  kDots2 , // L'\u2802'
38  kDots12 , // L'\u2803'
39  kDots3 , // L'\u2804'
40  kDots13 , // L'\u2805'
41  kDots23 , // L'\u2806'
42  kDots123 , // L'\u2807'
43  kDots4 , // L'\u2808'
44  kDots14 , // L'\u2809'
45  kDots24 , // L'\u280a'
46  kDots124 , // L'\u280b'
47  kDots34 , // L'\u280c'
48  kDots134 , // L'\u280d'
49  kDots234 , // L'\u280e'
50  kDots1234 , // L'\u280f'
51 
52  kDots5 , // L'\u2810'
53  kDots15 , // L'\u2811'
54  kDots25 , // L'\u2812'
55  kDots125 , // L'\u2813'
56  kDots35 , // L'\u2814'
57  kDots135 , // L'\u2815'
58  kDots235 , // L'\u2816'
59  kDots1235 , // L'\u2817'
60  kDots45 , // L'\u2818'
61  kDots145 , // L'\u2819'
62  kDots245 , // L'\u281a'
63  kDots1245 , // L'\u281b'
64  kDots345 , // L'\u281c'
65  kDots1345 , // L'\u281d'
66  kDots2345 , // L'\u281e'
67  kDots12345 , // L'\u281f'
68 
69  kDots6 , // L'\u2820'
70  kDots16 , // L'\u2821'
71  kDots26 , // L'\u2822'
72  kDots126 , // L'\u2823'
73  kDots36 , // L'\u2824'
74  kDots136 , // L'\u2825'
75  kDots236 , // L'\u2826'
76  kDots1236 , // L'\u2827'
77  kDots46 , // L'\u2828'
78  kDots146 , // L'\u2829'
79  kDots246 , // L'\u282a'
80  kDots1246 , // L'\u282b'
81  kDots346 , // L'\u282c'
82  kDots1346 , // L'\u282d'
83  kDots2346 , // L'\u282e'
84  kDots12346 , // L'\u282f'
85 
86  kDots56 , // L'\u2830'
87  kDots156 , // L'\u2831'
88  kDots256 , // L'\u2832'
89  kDots1256 , // L'\u2833'
90  kDots356 , // L'\u2834'
91  kDots1356 , // L'\u2835'
92  kDots2356 , // L'\u2836'
93  kDots12356 , // L'\u2837'
94  kDots456 , // L'\u2838'
95  kDots1456 , // L'\u2839'
96  kDots2456 , // L'\u283a'
97  kDots12456 , // L'\u283b'
98  kDots3456 , // L'\u283c'
99  kDots13456 , // L'\u283d'
100  kDots23456 , // L'\u283e'
101  kDots123456 // L'\u283f'
102 };
103 
104 //______________________________________________________________________________
105 string bsrCellKindAsShortString (bsrCellKind cellKind);
106 
107 string bsrCellKindAsString (bsrCellKind cellKind);
108 
109 // lower case letters
110 //______________________________________________________________________________
111 const bsrCellKind
112  kCellA = kDots1,
113  kCellB = kDots12,
114  kCellC = kDots14,
115  kCellD = kDots145,
116  kCellE = kDots15,
117  kCellF = kDots124,
118  kCellG = kDots1245,
119  kCellH = kDots125,
120  kCellI = kDots24,
121  kCellJ = kDots245,
122 
123  kCellK = kDots13,
124  kCellL = kDots123,
125  kCellM = kDots134,
126  kCellN = kDots1345,
127  kCellO = kDots135,
128  kCellP = kDots1234,
129  kCellQ = kDots12345,
130  kCellR = kDots1235,
131  kCellS = kDots234,
132  kCellT = kDots2345,
133 
134  kCellU = kDots136,
135  kCellV = kDots1236,
136  kCellW = kDots2456,
137  kCellX = kDots1346,
138  kCellY = kDots13456,
139  kCellZ = kDots1356;
140 
141 // capitals
142 //______________________________________________________________________________
143 const bsrCellKind
144  kCellCapitalsSign = kDots46;
145 // kCellCapitalsSequenceSign, // { kCellCapitalsSign, kCellCapitalsSign };
146 
147 // decimal digits
148 //______________________________________________________________________________
149 const bsrCellKind
150  kCellNumberSign = kDots3456,
151  kCell1 = kCellA,
152  kCell2 = kCellB,
153  kCell3 = kCellC,
154  kCell4 = kCellD,
155  kCell5 = kCellE,
156  kCell6 = kCellF,
157  kCell7 = kCellG,
158  kCell8 = kCellH,
159  kCell9 = kCellI,
160  kCell0 = kCellJ;
161 
162 // lower decimal digits
163 //______________________________________________________________________________
164 const bsrCellKind
165  kCellLower1 = kDots2,
166  kCellLower2 = kDots23,
167  kCellLower3 = kDots25,
168  kCellLower4 = kDots256,
169  kCellLower5 = kDots26,
170  kCellLower6 = kDots235,
171  kCellLower7 = kDots2356,
172  kCellLower8 = kDots236,
173  kCellLower9 = kDots35,
174  kCellLower0 = kDots356;
175 
176 // alterations
177 //______________________________________________________________________________
178 const bsrCellKind
179  kCellFlat = kDots126,
180  kCellNatural = kDots16,
181  kCellSharp = kDots146;
182 
183 // augmentation dots
184 //______________________________________________________________________________
185 const bsrCellKind
186  kCellAugmentationDot = kDots3;
187 
188 // arithmetic operators
189 //______________________________________________________________________________
190 const bsrCellKind
191  kCell_ac_plus = kDots235,
192  kCell_ac_minus = kDots36,
193  kCell_ac_times = kDots35,
194  kCell_ac_dividedBy = kDots25,
195  kCell_ac_equals = kDots2356;
196 
197 // words
198 //______________________________________________________________________________
199 const bsrCellKind
200  kCellWordSign = kDots345,
201 
202  kCellWordApostrophe = kDots6,
203 
204  kCellParenthesis = kDots2356,
205  kCellQuestionMark = kDots26;
206 
207 // braille cells
208 //______________________________________________________________________________
209 void brailleCellKindAsUTF8 (bsrCellKind cellKind, ostream& os);
210 void brailleCellKindAsUTF16 (bsrCellKind cellKind, ostream& os);
211 
212 void brailleCellKind (bsrCellKind cellKind, ostream& os);
213 
214 // braille output kinds
215 //______________________________________________________________________________
216 enum bsrBrailleOutputKind {
217  kBrailleOutputAscii, // default value
218  kBrailleOutputUTF8, kBrailleOutputUTF16 };
219 
220 string bsrBrailleOutputKindAsString (
221  bsrBrailleOutputKind brailleOutputKind);
222 
223 extern map<string, bsrBrailleOutputKind>
224  gBsrBrailleOutputKindsMap;
225 
226 string existingBsrBrailleOutputKinds (int namesListMaxLength);
227 
228 void initializeBsrBrailleOutputKindsMap ();
229 
230 // chords languages
231 //______________________________________________________________________________
232 enum bsrTextsLanguageKind {
233  kTextsEnglish, // BANA's default
234  kTextsGerman, kTextsItalian, kTextsFrench };
235 
236 string bsrTextsLanguageKindAsString (
237  bsrTextsLanguageKind languageKind);
238 
239 extern map<string, bsrTextsLanguageKind>
240  gBsrTextsLanguageKindsMap;
241 
242 string existingBsrTextsLanguageKinds (int namesListMaxLength);
243 
244 void initializeBsrTextsLanguageKindsMap ();
245 
246 
247 /*
248 //______________________________________________________________________________
249 // brailling numbers
250 wstring braille (int n);
251 
252 //______________________________________________________________________________
253 // brailling characters and strings
254 bsrDot6Cell braille (char ch);
255 
256 wstring braille (string str);
257 
258 //______________________________________________________________________________
259 // writing UTF-16 to ostreams
260 void write_bsrDot6Cell (ostream& os, bsrDot6Cell cell);
261 
262 void write_bsrDot6Cell ( bsrDot6Cell cell );
263 
264 EXP ostream& operator<< (ostream& os, const bsrDot6Cell cell);
265 
266 void write_wstring (ostream& os, wstring wstr );
267 
268 EXP ostream& operator<< (ostream& os, const wstring& wstr);
269 */
270 
271 // warnings and errors
272 //______________________________________________________________________________
273 EXP void bsrMusicXMLWarning (
274  std::string inputSourceName,
275  int inputLineNumber,
276  std::string message);
277 
278 EXP void bsrInternalError (
279  std::string inputSourceName,
280  int inputLineNumber,
281  std::string sourceCodeFileName,
282  int sourceCodeLineNumber,
283  std::string message);
284 
285 // initialization
286 //______________________________________________________________________________
287 void initializeBSRBasicTypes ();
288 
289 
290 } // namespace MusicXML2
291 
292 
293 #endif
294 
295 
296 /*
297 from https://brltty.app/pipermail/brltty/2012-October/009556.html :
298 
299 [BRLTTY] Braille Code
300 
301 Dave Mielke dave at mielke.cc
302 Wed Oct 10 14:15:08 EDT 2012
303 Previous message: [BRLTTY] Braille Code
304 Next message: [BRLTTY] Braille Code
305 Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
306 [quoted lines by Dave Mielke on 2012/10/10 at 14:13 -0400]
307 
308 >Each language has its own mapping. That being said, the NABCC (North American
309 >Braille Computer Code) is probably as good a place to start as any. You can
310 >find a table for that mapping within the file Tables/en-nabcc.ttb in brltty's
311 >source tree. I've attached it to this message, as well.
312 
313 I forgot to attach the table. Here it is.
314 
315 --
316 Dave Mielke | 2213 Fox Crescent | The Bible is the very Word of God.
317 Phone: 1-613-726-0014 | Ottawa, Ontario | 2011 May 21 is the End of Salvation.
318 EMail: dave at mielke.cc | Canada K2A 1H7 | http://Mielke.cc/now.html
319 http://FamilyRadio.com/ | http://Mielke.cc/bible/
320 -------------- next part --------------
321 ###############################################################################
322 # BRLTTY - A background process providing access to the console screen (when in
323 # text mode) for a blind person using a refreshable braille display.
324 #
325 # Copyright (C) 1995-2012 by The BRLTTY Developers.
326 #
327 # BRLTTY comes with ABSOLUTELY NO WARRANTY.
328 #
329 # This is free software, placed under the terms of the
330 # GNU Lesser General Public License, as published by the Free Software
331 # Foundation; either version 2.1 of the License, or (at your option) any
332 # later version. Please see the file LICENSE-LGPL for details.
333 #
334 # Web Page: http://mielke.cc/brltty/
335 #
336 # This software is maintained by Dave Mielke <dave at mielke.cc>.
337 ###############################################################################
338 
339 # BRLTTY Text Table - English (North American Braille Computer Code)
340 
341 # This is a description of the default text table used by BRLTTY.
342 # It's based on the North American Braille Computer Code, but defines the full
343 # Latin1 (ISO-8859-1) character set.
344 
345 # The 95 printable characters of the standard 7-bit US-ASCII character set
346 # (32-126) are identical to their representations within the North American
347 # Braille Computer Code (these are the only characters which the NABCC actually
348 # defines). Characters from literary braille, symbols from The Nemeth Braille
349 # Code for Mathematics and Science Notation, and a bit of human imagination
350 # have all been combined to create an easy-to-remember, one-to-one mapping
351 # between each character and its braille counterpart. All possible combinations
352 # involving only the original 6 braille dots are used, but that only allows for
353 # 64 out of the required 95 character representations. The presence or absence
354 # of dot 7 is used to differentiate between pairs of characters which either
355 # are very closely related in meaning or, in a few cases where a more intuitive
356 # reason couldn't be found, have a very close logical relationship within the
357 # US-ASCII code. Dot 8 isn't used at all.
358 
359 # The space and the 26 lowercase letters (a-z) are the same as in literary
360 # braille:
361 
362  #Hex Dots Dec Char Description
363 char \X20 ( ) # 32 space
364 char \X61 (1 ) # 97 a latin small letter a
365 char \X62 (12 ) # 98 b latin small letter b
366 char \X63 (1 4 ) # 99 c latin small letter c
367 char \X64 (1 45 ) # 100 d latin small letter d
368 char \X65 (1 5 ) # 101 e latin small letter e
369 char \X66 (12 4 ) # 102 f latin small letter f
370 char \X67 (12 45 ) # 103 g latin small letter g
371 char \X68 (12 5 ) # 104 h latin small letter h
372 char \X69 ( 2 4 ) # 105 i latin small letter i
373 char \X6A ( 2 45 ) # 106 j latin small letter j
374 char \X6B (1 3 ) # 107 k latin small letter k
375 char \X6C (123 ) # 108 l latin small letter l
376 char \X6D (1 34 ) # 109 m latin small letter m
377 char \X6E (1 345 ) # 110 n latin small letter n
378 char \X6F (1 3 5 ) # 111 o latin small letter o
379 char \X70 (1234 ) # 112 p latin small letter p
380 char \X71 (12345 ) # 113 q latin small letter q
381 char \X72 (123 5 ) # 114 r latin small letter r
382 char \X73 ( 234 ) # 115 s latin small letter s
383 char \X74 ( 2345 ) # 116 t latin small letter t
384 char \X75 (1 3 6 ) # 117 u latin small letter u
385 char \X76 (123 6 ) # 118 v latin small letter v
386 char \X77 ( 2 456 ) # 119 w latin small letter w
387 char \X78 (1 34 6 ) # 120 x latin small letter x
388 char \X79 (1 3456 ) # 121 y latin small letter y
389 char \X7A (1 3 56 ) # 122 z latin small letter z
390 
391 # The 26 uppercase letters (A-Z) are the same as their lowercase counterparts
392 # except that dot 7 is added:
393 
394  #Hex Dots Dec Char Description
395 char \X41 (1 7 ) # 65 A latin capital letter a
396 char \X42 (12 7 ) # 66 B latin capital letter b
397 char \X43 (1 4 7 ) # 67 C latin capital letter c
398 char \X44 (1 45 7 ) # 68 D latin capital letter d
399 char \X45 (1 5 7 ) # 69 E latin capital letter e
400 char \X46 (12 4 7 ) # 70 F latin capital letter f
401 char \X47 (12 45 7 ) # 71 G latin capital letter g
402 char \X48 (12 5 7 ) # 72 H latin capital letter h
403 char \X49 ( 2 4 7 ) # 73 I latin capital letter i
404 char \X4A ( 2 45 7 ) # 74 J latin capital letter j
405 char \X4B (1 3 7 ) # 75 K latin capital letter k
406 char \X4C (123 7 ) # 76 L latin capital letter l
407 char \X4D (1 34 7 ) # 77 M latin capital letter m
408 char \X4E (1 345 7 ) # 78 N latin capital letter n
409 char \X4F (1 3 5 7 ) # 79 O latin capital letter o
410 char \X50 (1234 7 ) # 80 P latin capital letter p
411 char \X51 (12345 7 ) # 81 Q latin capital letter q
412 char \X52 (123 5 7 ) # 82 R latin capital letter r
413 char \X53 ( 234 7 ) # 83 S latin capital letter s
414 char \X54 ( 2345 7 ) # 84 T latin capital letter t
415 char \X55 (1 3 67 ) # 85 U latin capital letter u
416 char \X56 (123 67 ) # 86 V latin capital letter v
417 char \X57 ( 2 4567 ) # 87 W latin capital letter w
418 char \X58 (1 34 67 ) # 88 X latin capital letter x
419 char \X59 (1 34567 ) # 89 Y latin capital letter y
420 char \X5A (1 3 567 ) # 90 Z latin capital letter z
421 
422 # The 10 decimal digits (0-9) are the same as in the Nemeth Code:
423 
424  #Hex Dots Dec Char Description
425 char \X30 ( 3 56 ) # 48 0 digit zero
426 char \X31 ( 2 ) # 49 1 digit one
427 char \X32 ( 23 ) # 50 2 digit two
428 char \X33 ( 2 5 ) # 51 3 digit three
429 char \X34 ( 2 56 ) # 52 4 digit four
430 char \X35 ( 2 6 ) # 53 5 digit five
431 char \X36 ( 23 5 ) # 54 6 digit six
432 char \X37 ( 23 56 ) # 55 7 digit seven
433 char \X38 ( 23 6 ) # 56 8 digit eight
434 char \X39 ( 3 5 ) # 57 9 digit nine
435 
436 # Common symbols used within mathematical expressions by popular computer
437 # programming languages are the same as in the Nemeth Code:
438 
439  #Hex Dots Dec Char Description
440 char \X2E ( 4 6 ) # 46 . full stop
441 char \X2B ( 34 6 ) # 43 + plus sign
442 char \X2D ( 3 6 ) # 45 - hyphen-minus
443 char \X2A (1 6 ) # 42 * asterisk
444 char \X2F ( 34 ) # 47 / solidus
445 char \X28 (123 56 ) # 40 ( left parenthesis
446 char \X29 ( 23456 ) # 41 ) right parenthesis
447 
448 # With all of these major considerations having been taken into account,
449 # convenient representations were still available, and are used, for some of
450 # the remaining characters:
451 
452  #Hex Dots Dec Char Description
453 char \X26 (1234 6 ) # 38 & ampersand
454 char \X23 ( 3456 ) # 35 # number sign
455 
456 # The remaining characters are what they are. Dot 7 isn't used either within
457 # the number block (32-63) or, with the exception of the DEL control character
458 # (127), within the lowercase block (96-127). With the exception of the
459 # underscore (95), dot 7 is used for every character within the uppercase block
460 # (64-95). Adding dot 7 to any character within the lowercase block (96-127)
461 # yields its corresponding character within the uppercase block (64-95) except
462 # that removing dot 7 from the DEL control character yields the underscore.
463 
464  #Hex Dots Dec Char Description
465 char \X2C ( 6 ) # 44 , comma
466 char \X3B ( 56 ) # 59 ; semicolon
467 char \X3A (1 56 ) # 58 : colon
468 char \X21 ( 234 6 ) # 33 ! exclamation mark
469 char \X3F (1 456 ) # 63 ? question mark
470 char \X22 ( 5 ) # 34 " quotation mark
471 char \X27 ( 3 ) # 39 ' apostrophe
472 char \X60 ( 4 ) # 96 ` grave accent
473 char \X5E ( 45 7 ) # 94 ^ circumflex accent
474 char \X7E ( 45 ) # 126 ~ tilde
475 char \X5B ( 2 4 67 ) # 91 [ left square bracket
476 char \X5D (12 4567 ) # 93 ] right square bracket
477 char \X7B ( 2 4 6 ) # 123 { left curly bracket
478 char \X7D (12 456 ) # 125 } right curly bracket
479 char \X3D (123456 ) # 61 = equals sign
480 char \X3C (12 6 ) # 60 < less-than sign
481 char \X3E ( 345 ) # 62 > greater-than sign
482 char \X24 (12 4 6 ) # 36 $ dollar sign
483 char \X25 (1 4 6 ) # 37 % percent sign
484 char \X40 ( 4 7 ) # 64 @ commercial at
485 char \X7C (12 56 ) # 124 | vertical line
486 char \X5C (12 567 ) # 92 \ reverse solidus
487 char \X5F ( 456 ) # 95 _ low line
488 
489 # Each of the characters within the basic control character block (0-31) is the
490 # same as its corresponding character within both the uppercase block (64-95)
491 # and the lowercase block (96-127) except that dots 7 and 8 are both used.
492 
493  #Hex Dots Dec Char Description
494 char \X00 ( 4 78) # 0 ^@ null
495 char \X01 (1 78) # 1 ^A start of heading
496 char \X02 (12 78) # 2 ^B start of text
497 char \X03 (1 4 78) # 3 ^C end of text
498 char \X04 (1 45 78) # 4 ^D end of transmission
499 char \X05 (1 5 78) # 5 ^E enquiry
500 char \X06 (12 4 78) # 6 ^F acknowledge
501 char \X07 (12 45 78) # 7 ^G bell
502 char \X08 (12 5 78) # 8 ^H backspace
503 char \X09 ( 2 4 78) # 9 ^I horizontal tabulation
504 char \X0A ( 2 45 78) # 10 ^J line feed
505 char \X0B (1 3 78) # 11 ^K vertical tabulation
506 char \X0C (123 78) # 12 ^L form feed
507 char \X0D (1 34 78) # 13 ^M carriage return
508 char \X0E (1 345 78) # 14 ^N shift out
509 char \X0F (1 3 5 78) # 15 ^O shift in
510 char \X10 (1234 78) # 16 ^P data link escape
511 char \X11 (12345 78) # 17 ^Q device control one
512 char \X12 (123 5 78) # 18 ^R device control two
513 char \X13 ( 234 78) # 19 ^S device control three
514 char \X14 ( 2345 78) # 20 ^T device control four
515 char \X15 (1 3 678) # 21 ^U negative acknowledge
516 char \X16 (123 678) # 22 ^V synchronous idle
517 char \X17 ( 2 45678) # 23 ^W end of transmission block
518 char \X18 (1 34 678) # 24 ^X cancel
519 char \X19 (1 345678) # 25 ^Y end of medium
520 char \X1A (1 3 5678) # 26 ^Z substitute
521 char \X1B ( 2 4 678) # 27 ^[ escape
522 char \X1C (12 5678) # 28 ^\ file separator
523 char \X1D (12 45678) # 29 ^] group separator
524 char \X1E ( 45 78) # 30 ^^ record separator
525 char \X1F ( 45678) # 31 ^_ unit separator
526 
527 # Each of the characters within the extended control character block (128-159)
528 # is the same as its corresponding character within the basic control character
529 # block (0-31) except that only dot 8 is used.
530 
531  #Hex Dots Dec Char Description
532 char \X80 ( 4 8) # 128 ~@ <control>
533 char \X81 (1 8) # 129 ~A <control>
534 char \X82 (12 8) # 130 ~B break permitted here
535 char \X83 (1 4 8) # 131 ~C no break here
536 char \X84 (1 45 8) # 132 ~D <control>
537 char \X85 (1 5 8) # 133 ~E next line
538 char \X86 (12 4 8) # 134 ~F start of selected area
539 char \X87 (12 45 8) # 135 ~G end of selected area
540 char \X88 (12 5 8) # 136 ~H character tabulation set
541 char \X89 ( 2 4 8) # 137 ~I character tabulation with justification
542 char \X8A ( 2 45 8) # 138 ~J line tabulation set
543 char \X8B (1 3 8) # 139 ~K partial line down
544 char \X8C (123 8) # 140 ~L partial line up
545 char \X8D (1 34 8) # 141 ~M reverse line feed
546 char \X8E (1 345 8) # 142 ~N single shift two
547 char \X8F (1 3 5 8) # 143 ~O single shift three
548 char \X90 (1234 8) # 144 ~P device control string
549 char \X91 (12345 8) # 145 ~Q private use one
550 char \X92 (123 5 8) # 146 ~R private use two
551 char \X93 ( 234 8) # 147 ~S set transmit state
552 char \X94 ( 2345 8) # 148 ~T cancel character
553 char \X95 (1 3 6 8) # 149 ~U message waiting
554 char \X96 (123 6 8) # 150 ~V start of guarded area
555 char \X97 ( 2 456 8) # 151 ~W end of guarded area
556 char \X98 (1 34 6 8) # 152 ~X start of string
557 char \X99 (1 3456 8) # 153 ~Y <control>
558 char \X9A (1 3 56 8) # 154 ~Z single character introducer
559 char \X9B ( 2 4 6 8) # 155 ~[ control sequence introducer
560 char \X9C (12 56 8) # 156 ~\ string terminator
561 char \X9D (12 456 8) # 157 ~] operating system command
562 char \X9E ( 45 8) # 158 ~^ privacy message
563 char \X9F ( 456 8) # 159 ~_ application program command
564 
565 # Representations for the uppercase accented letters are drawn from the
566 # remaining combinations which use both dots 7 and 8. The representation for a
567 # lowercase accented letter is the same as its uppercase counterpart except
568 # that dot 7 isn't used. This scheme retains the use of dot 7 as the modifier
569 # for a capitalized letter. The only exception to these rules is that, due to
570 # the nature of the Latin1 character set, the German lowercase double-s is
571 # treated as though it were an uppercase y-dieresis (neither has an uppercase
572 # definition). These representations have been gathered, as much as possible,
573 # into logical groupings.
574 
575 # The 5 letters with a circumflex accent (^) use the [1-5] dot combinations:
576 
577  #Hex Dots Dec Char Description
578 char \XC2 ( 2 78) # 194 ? latin capital letter a with circumflex
579 char \XCA ( 23 78) # 202 ? latin capital letter e with circumflex
580 char \XCE ( 2 5 78) # 206 ? latin capital letter i with circumflex
581 char \XD4 ( 2 5678) # 212 ? latin capital letter o with circumflex
582 char \XDB ( 2 678) # 219 ? latin capital letter u with circumflex
583 char \XE2 ( 2 8) # 226 ? latin small letter a with circumflex
584 char \XEA ( 23 8) # 234 ? latin small letter e with circumflex
585 char \XEE ( 2 5 8) # 238 ? latin small letter i with circumflex
586 char \XF4 ( 2 56 8) # 244 ? latin small letter o with circumflex
587 char \XFB ( 2 6 8) # 251 ? latin small letter u with circumflex
588 
589 # The 5 letters with a grave accent (`) use the [6-0] dot combinations:
590 
591  #Hex Dots Dec Char Description
592 char \XC0 ( 23 5 78) # 192 ? latin capital letter a with grave
593 char \XC8 ( 23 5678) # 200 ? latin capital letter e with grave
594 char \XCC ( 23 678) # 204 ? latin capital letter i with grave
595 char \XD2 ( 3 5 78) # 210 ? latin capital letter o with grave
596 char \XD9 ( 3 5678) # 217 ? latin capital letter u with grave
597 char \XE0 ( 23 5 8) # 224 ? latin small letter a with grave
598 char \XE8 ( 23 56 8) # 232 ? latin small letter e with grave
599 char \XEC ( 23 6 8) # 236 ? latin small letter i with grave
600 char \XF2 ( 3 5 8) # 242 ? latin small letter o with grave
601 char \XF9 ( 3 56 8) # 249 ? latin small letter u with grave
602 
603 # The 6 letters with an acute accent (') use the [a-f] dot combinations with
604 # dots 3 and 6 added:
605 
606  #Hex Dots Dec Char Description
607 char \XC1 (1 678) # 193 ? latin capital letter a with acute
608 char \XC9 (12 678) # 201 ? latin capital letter e with acute
609 char \XCD (1 4 678) # 205 ? latin capital letter i with acute
610 char \XD3 (1 45678) # 211 ? latin capital letter o with acute
611 char \XDA (1 5678) # 218 ? latin capital letter u with acute
612 char \XDD (12 4 678) # 221 ? latin capital letter y with acute
613 char \XE1 (1 6 8) # 225 ? latin small letter a with acute
614 char \XE9 (12 6 8) # 233 ? latin small letter e with acute
615 char \XED (1 4 6 8) # 237 ? latin small letter i with acute
616 char \XF3 (1 456 8) # 243 ? latin small letter o with acute
617 char \XFA (1 56 8) # 250 ? latin small letter u with acute
618 char \XFD (12 4 6 8) # 253 ? latin small letter y with acute
619 
620 # The 6 letters with a dieresis accent (") use the [f-j] dot combinations with
621 # dots 3 and 6 added, and the number sign (because it fits the sequence
622 # reasonably well):
623 
624  #Hex Dots Dec Char Description
625 char \XC4 (1234 678) # 196 ? latin capital letter a with diaeresis
626 char \XCB (12345678) # 203 ? latin capital letter e with diaeresis
627 char \XCF (123 5678) # 207 ? latin capital letter i with diaeresis
628 char \XD6 ( 234 678) # 214 ? latin capital letter o with diaeresis
629 char \XDC ( 2345678) # 220 ? latin capital letter u with diaeresis
630 char \XE4 (1234 6 8) # 228 ? latin small letter a with diaeresis
631 char \XEB (123456 8) # 235 ? latin small letter e with diaeresis
632 char \XEF (123 56 8) # 239 ? latin small letter i with diaeresis
633 char \XF6 ( 234 6 8) # 246 ? latin small letter o with diaeresis
634 char \XFC ( 23456 8) # 252 ? latin small letter u with diaeresis
635 char \XFF ( 3456 8) # 255 ? latin small letter y with diaeresis
636 
637 # There is no uppercase y-dieresis in the Latin1 character set. The German
638 # lowercase double-s, which also doesn't have an uppercase counterpart in the
639 # Latin1 character set, uses its representation:
640 
641  #Hex Dots Dec Char Description
642 char \XDF ( 345678) # 223 ? latin small letter sharp s
643 
644 # The remaining accented letters are:
645 
646  #Hex Dots Dec Char Description
647 char \XC3 ( 5 78) # 195 ? latin capital letter a with tilde
648 char \XD1 ( 4 678) # 209 ? latin capital letter n with tilde
649 char \XD5 ( 5678) # 213 ? latin capital letter o with tilde
650 char \XC5 ( 345 78) # 197 ? latin capital letter a with ring above
651 char \XC7 ( 34 678) # 199 ? latin capital letter c with cedilla
652 char \XD8 ( 34 78) # 216 ? latin capital letter o with stroke
653 char \XC6 ( 3 78) # 198 ? latin capital letter ae
654 char \XD0 ( 678) # 208 ? latin capital letter eth
655 char \XDE ( 3 678) # 222 ? latin capital letter thorn
656 char \XE3 ( 5 8) # 227 ? latin small letter a with tilde
657 char \XF1 ( 4 6 8) # 241 ? latin small letter n with tilde
658 char \XF5 ( 56 8) # 245 ? latin small letter o with tilde
659 char \XE5 ( 345 8) # 229 ? latin small letter a with ring above
660 char \XE7 ( 34 6 8) # 231 ? latin small letter c with cedilla
661 char \XF8 ( 34 8) # 248 ? latin small letter o with stroke
662 char \XE6 ( 3 8) # 230 ? latin small letter ae
663 char \XF0 ( 6 8) # 240 ? latin small letter eth
664 char \XFE ( 3 6 8) # 254 ? latin small letter thorn
665 
666 # Some characters are the same as other characters which they resemble but with
667 # dot 7 added:
668 
669  #Hex Dots Dec Char Description
670 char \XAD ( 3 67 ) # 173 ? soft hyphen
671 char \XAB (12 67 ) # 171 ? left-pointing double angle quotation mark
672 char \XBB ( 345 7 ) # 187 ? right-pointing double angle quotation mark
673 char \XA6 (1 567 ) # 166 ? broken bar
674 char \XB9 ( 2 7 ) # 185 ? superscript one
675 char \XB2 ( 23 7 ) # 178 ? superscript two
676 char \XB3 ( 2 5 7 ) # 179 ? superscript three
677 char \XB1 ( 34 67 ) # 177 ? plus-minus sign
678 char \XD7 (1 67 ) # 215 ? multiplication sign
679 char \XF7 ( 34 7 ) # 247 ? division sign
680 char \XB7 ( 4 67 ) # 183 ? middle dot
681 char \XA1 ( 234 67 ) # 161 ? inverted exclamation mark
682 char \XBF (1 4567 ) # 191 ? inverted question mark
683 char \XA2 (12 4 67 ) # 162 ? cent sign
684 char \XA3 ( 34567 ) # 163 ? pound sign
685 
686 # A few more characters follow this same convention but their relationships
687 # to their base characters is a bit obscure:
688 
689  #Hex Dots Dec Char Description
690 char \XA4 (1 4 67 ) # 164 ? currency sign
691 char \XA5 (1234 67 ) # 165 ? yen sign
692 
693 # Some characters are represented by the first letters of their names lowered
694 # by one row of dots:
695 
696  #Hex Dots Dec Char Description
697 char \XAC ( 2 567 ) # 172 ? not sign
698 char \XB6 ( 23 5 7 ) # 182 ? pilcrow sign
699 char \XA9 ( 23 567 ) # 169 ? copyright sign
700 char \XAE ( 23 67 ) # 174 ? registered sign
701 char \XA7 ( 3 5 7 ) # 167 ? section sign
702 char \XB0 ( 3 567 ) # 176 ? degree sign
703 
704 # The three fraction characters use combinations of dots 1 and 4 (which
705 # progress from left to right as the value of the fraction increases) together
706 # with dots 2,3,5,6,7:
707 
708  #Hex Dots Dec Char Description
709 char \XBC (123 567 ) # 188 ? vulgar fraction one quarter
710 char \XBD (1234567 ) # 189 ? vulgar fraction one half
711 char \XBE ( 234567 ) # 190 ? vulgar fraction three quarters
712 
713 # Each of the three extended accent characters is the same as its conventional
714 # compose character but with dot 7 added:
715 
716  #Hex Dots Dec Char Description
717 char \XB4 ( 3 7 ) # 180 ? acute accent
718 char \XB8 ( 67 ) # 184 ? cedilla
719 char \XA8 ( 5 7 ) # 168 ? diaeresis
720 
721 # The two gender symbols are:
722 
723  #Hex Dots Dec Char Description
724 char \XBA ( 7 ) # 186 ? masculine ordinal indicator
725 char \XAA ( 8) # 170 ? feminine ordinal indicator
726 
727 # The three remaining characters are:
728 
729  #Hex Dots Dec Char Description
730 char \XAF ( 2 67 ) # 175 ? macron
731 char \XB5 ( 567 ) # 181 ? micro sign
732 char \XA0 ( 78) # 160 ~ no-break space
733 
734 # The nonbreaking space is dots 7 and 8 because this presents a sequence of
735 # nonbreaking spaces as a smooth low line segment.
736 
737  #Hex Dots Dec Char Description
738 char \X7F ( 4567 ) # 127 ^? delete
739 
740 */
741 
742 
743 //______________________________________________________________________________
744 /*
745 enum bsrCellKind2 {
746  // lower decimal digits
747  kCellLower1, // kDots2,
748  kCellLower2, // kDots23,
749  kCellLower3, // kDots25,
750  kCellLower4, // kDots256,
751  kCellLower5, // kDots26,
752  kCellLower6, // kDots235,
753  kCellLower7, // kDots2356,
754  kCellLower8, // kDots236,
755  kCellLower9, // kDots35,
756  kCellLower0, // kDots356;
757 
758 
759  // punctuation
760  kCellDot , // kDots256,
761  kCellComma , // kDots2,
762  kCellQuestionMark , // kDots26,
763  kCellSemicolon , // kDots23,
764  kCellColon , // kDots25,
765  kCellExclamationMark , // kDots235,
766  kCellLeftParenthesis , // kDots236,
767  kCellRightParenthesis, // kDots356,
768  kCellDoubleQuote , // kDots2356,
769  kCellDash , // kDots36,
770  kCellQuote , // kDots3;
771 
772  // other symbols
773  kCellSlash , // kDots34,
774  kCellVerseEnd, // kDots345,
775  kCellItalics , // kDots456,
776  kCellAsterisk, // kDots35,
777  kCellExponent, // kDots4;
778 
779  // intervals
780  kCellSecond , // kDots34,
781  kCellThird , // kDots346,
782  kCellFourth , // kDots3456,
783  kCellFifth , // kDots35,
784  kCellSixth , // kDots356,
785  kCellSeventh, // kDots25,
786  kCellEighth , // kDots36;
787 
788  // triplets
789  kCellTriplet , // kDots23;
790 
791  // keyboard hands
792  kCellRightHand, // { kDots46, kDots345 },
793  kCellLeftHand, // { kDots456, kDots345 };
794 
795  // bars
796  kCellFinalDoubleBar , // { kDots126, kDots13 },
797  kCellSectionalDoubleBar, // { kDots126, kDots13, kDots3 };
798 
799  // measure divisions
800  kCellMeasureDivisionSign, // { kDots46, kDots13 };
801 
802  // words
803  kCellWordSign , // kDots345,
804  kCellWordApostrophe, // kDots6;
805 
806  // capitals
807  kCellCapitalsSign, //, // kDots46;
808  kCellCapitalsSequenceSign, // { kCellCapitalsSign, kCellCapitalsSign };
809 
810  // parentheses
811  kCellLiteraryLeftParenthesis , //{ kDots5, kDots126 },
812  kCellLiteraryRightParenthesis , //{ kDots5, kDots345 },
813  kCellMusicParentheses , //{ kDots6, kDots3 },
814  kCellSpecialParentheses , //{ kDots2356, kDots2356 };
815 
816  // other symbols
817  kCellParagraph , //{ kDots5, kDots1234 },
818  kCellAmpersand , //{ kDots5, kDots123456 },
819  kCellUpsilon , //{ kDots45, kDots13456 }, // better name JMI ???
820  kCellEuro , //{ kDots45, kDots15 },
821  kCellDollar , //{ kDots45, kDots234 },
822  kCellPound , //{ kDots45, kDots123 },
823  kCellCopyright , //{ kDots5, kDots14 },
824  kCellRegisteredTradeMark , //{ kDots5, kDots1235 },
825  kCellTradeMark , //{ kDots5, kDots2345 },
826  kCellPercent , //{ kDots5, kDots346 },
827  kCellPerthousand , //{ kDots5, kDots346, kDots346 },
828  kCellPertenthousand , //{ kDots5, kDots346, kDots346, kDots346 };
829 
830  // fermatas
831  kCellFermataOnANote , //{ kDots146, kDots126, kDots123 },
832  kCellFermataBetweenNotes , //{ kDots5, kDots126, kDots123 },
833  kCellFermataOverABarline , //{ kDots456, kDots126, kDots123 };
834 };
835 */
836 
837 /*
838 U+2800 ⠀ e2 a0 80 BRAILLE PATTERN BLANK
839 U+2801 ⠁ e2 a0 81 BRAILLE PATTERN DOTS-1
840 U+2802 ⠂ e2 a0 82 BRAILLE PATTERN DOTS-2
841 U+2803 ⠃ e2 a0 83 BRAILLE PATTERN DOTS-12
842 U+2804 ⠄ e2 a0 84 BRAILLE PATTERN DOTS-3
843 U+2805 ⠅ e2 a0 85 BRAILLE PATTERN DOTS-13
844 U+2806 ⠆ e2 a0 86 BRAILLE PATTERN DOTS-23
845 U+2807 ⠇ e2 a0 87 BRAILLE PATTERN DOTS-123
846 U+2808 ⠈ e2 a0 88 BRAILLE PATTERN DOTS-4
847 U+2809 ⠉ e2 a0 89 BRAILLE PATTERN DOTS-14
848 U+280A ⠊ e2 a0 8a BRAILLE PATTERN DOTS-24
849 U+280B ⠋ e2 a0 8b BRAILLE PATTERN DOTS-124
850 U+280C ⠌ e2 a0 8c BRAILLE PATTERN DOTS-34
851 U+280D ⠍ e2 a0 8d BRAILLE PATTERN DOTS-134
852 U+280E ⠎ e2 a0 8e BRAILLE PATTERN DOTS-234
853 U+280F ⠏ e2 a0 8f BRAILLE PATTERN DOTS-1234
854 U+2810 ⠐ e2 a0 90 BRAILLE PATTERN DOTS-5
855 U+2811 ⠑ e2 a0 91 BRAILLE PATTERN DOTS-15
856 U+2812 ⠒ e2 a0 92 BRAILLE PATTERN DOTS-25
857 U+2813 ⠓ e2 a0 93 BRAILLE PATTERN DOTS-125
858 U+2814 ⠔ e2 a0 94 BRAILLE PATTERN DOTS-35
859 U+2815 ⠕ e2 a0 95 BRAILLE PATTERN DOTS-135
860 U+2816 ⠖ e2 a0 96 BRAILLE PATTERN DOTS-235
861 U+2817 ⠗ e2 a0 97 BRAILLE PATTERN DOTS-1235
862 U+2818 ⠘ e2 a0 98 BRAILLE PATTERN DOTS-45
863 U+2819 ⠙ e2 a0 99 BRAILLE PATTERN DOTS-145
864 U+281A ⠚ e2 a0 9a BRAILLE PATTERN DOTS-245
865 U+281B ⠛ e2 a0 9b BRAILLE PATTERN DOTS-1245
866 U+281C ⠜ e2 a0 9c BRAILLE PATTERN DOTS-345
867 U+281D ⠝ e2 a0 9d BRAILLE PATTERN DOTS-1345
868 U+281E ⠞ e2 a0 9e BRAILLE PATTERN DOTS-2345
869 U+281F ⠟ e2 a0 9f BRAILLE PATTERN DOTS-12345
870 U+2820 ⠠ e2 a0 a0 BRAILLE PATTERN DOTS-6
871 U+2821 ⠡ e2 a0 a1 BRAILLE PATTERN DOTS-16
872 U+2822 ⠢ e2 a0 a2 BRAILLE PATTERN DOTS-26
873 U+2823 ⠣ e2 a0 a3 BRAILLE PATTERN DOTS-126
874 U+2824 ⠤ e2 a0 a4 BRAILLE PATTERN DOTS-36
875 U+2825 ⠥ e2 a0 a5 BRAILLE PATTERN DOTS-136
876 U+2826 ⠦ e2 a0 a6 BRAILLE PATTERN DOTS-236
877 U+2827 ⠧ e2 a0 a7 BRAILLE PATTERN DOTS-1236
878 U+2828 ⠨ e2 a0 a8 BRAILLE PATTERN DOTS-46
879 U+2829 ⠩ e2 a0 a9 BRAILLE PATTERN DOTS-146
880 U+282A ⠪ e2 a0 aa BRAILLE PATTERN DOTS-246
881 U+282B ⠫ e2 a0 ab BRAILLE PATTERN DOTS-1246
882 U+282C ⠬ e2 a0 ac BRAILLE PATTERN DOTS-346
883 U+282D ⠭ e2 a0 ad BRAILLE PATTERN DOTS-1346
884 U+282E ⠮ e2 a0 ae BRAILLE PATTERN DOTS-2346
885 U+282F ⠯ e2 a0 af BRAILLE PATTERN DOTS-12346
886 U+2830 ⠰ e2 a0 b0 BRAILLE PATTERN DOTS-56
887 U+2831 ⠱ e2 a0 b1 BRAILLE PATTERN DOTS-156
888 U+2832 ⠲ e2 a0 b2 BRAILLE PATTERN DOTS-256
889 U+2833 ⠳ e2 a0 b3 BRAILLE PATTERN DOTS-1256
890 U+2834 ⠴ e2 a0 b4 BRAILLE PATTERN DOTS-356
891 U+2835 ⠵ e2 a0 b5 BRAILLE PATTERN DOTS-1356
892 U+2836 ⠶ e2 a0 b6 BRAILLE PATTERN DOTS-2356
893 U+2837 ⠷ e2 a0 b7 BRAILLE PATTERN DOTS-12356
894 U+2838 ⠸ e2 a0 b8 BRAILLE PATTERN DOTS-456
895 U+2839 ⠹ e2 a0 b9 BRAILLE PATTERN DOTS-1456
896 U+283A ⠺ e2 a0 ba BRAILLE PATTERN DOTS-2456
897 U+283B ⠻ e2 a0 bb BRAILLE PATTERN DOTS-12456
898 U+283C ⠼ e2 a0 bc BRAILLE PATTERN DOTS-3456
899 U+283D ⠽ e2 a0 bd BRAILLE PATTERN DOTS-13456
900 U+283E ⠾ e2 a0 be BRAILLE PATTERN DOTS-23456
901 U+283F ⠿ e2 a0 bf BRAILLE PATTERN DOTS-123456
902 
903 
904 FROM http://unicode.org/faq/utf_bom.html#BOM :
905 
906 
907 Q: How do I write a UTF converter?
908 
909 A: The freely available open source project International Components for Unicode (ICU) has UTF conversion built into it. The latest version may be downloaded from the ICU Project web site.
910 *
911 http://site.icu-project.org
912 
913 
914 Bytes Encoding Form
915 00 00 FE FF UTF-32, big-endian
916 FF FE 00 00 UTF-32, little-endian
917 FE FF UTF-16, big-endian
918 FF FE UTF-16, little-endian
919 EF BB BF UTF-8
920 
921 
922 Q: Is there a standard method to package a Unicode character so it fits an 8-Bit ASCII stream?
923 
924 A: There are three or four options for making Unicode fit into an 8-bit format.
925 
926 a) Use UTF-8. This preserves ASCII, but not Latin-1, because the characters >127 are different from Latin-1. UTF-8 uses the bytes in the ASCII only for ASCII characters. Therefore, it works well in any environment where ASCII characters have a significance as syntax characters, e.g. file name syntaxes, markup languages, etc., but where the all other characters may use arbitrary bytes.
927 Example: “Latin Small Letter s with Acute” (015B) would be encoded as two bytes: C5 9B.
928 
929 b) Use Java or C style escapes, of the form \uXXXXX or \xXXXXX. This format is not standard for text files, but well defined in the framework of the languages in question, primarily for source files.
930 Example: The Polish word “wyjście” with character “Latin Small Letter s with Acute” (015B) in the middle (ś is one character) would look like: “wyj\u015Bcie".
931 
932 c) Use the &#xXXXX; or &#DDDDD; numeric character escapes as in HTML or XML. Again, these are not standard for plain text files, but well defined within the framework of these markup languages.
933 Example: “wyjście” would look like “wyj&#x015B;cie"
934 
935 d) Use SCSU. This format compresses Unicode into 8-bit format, preserving most of ASCII, but using some of the control codes as commands for the decoder. However, while ASCII text will look like ASCII text after being encoded in SCSU, other characters may occasionally be encoded with the same byte values, making SCSU unsuitable for 8-bit channels that blindly interpret any of the bytes as ASCII characters.
936 Example: “<SC2> wyjÛcie” where <SC2> indicates the byte 0x12 and “Û” corresponds to byte 0xDB. [AF]
937 
938 
939 A: The following table summarizes some of the properties of each of the UTFs.
940 
941 Name UTF-8 UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE
942 Smallest code point 0000 0000 0000 0000 0000 0000 0000
943 Largest code point 10FFFF 10FFFF 10FFFF 10FFFF 10FFFF 10FFFF 10FFFF
944 Code unit size 8 bits 16 bits 16 bits 16 bits 32 bits 32 bits 32 bits
945 Byte order N/A <BOM> big-endian little-endian <BOM> big-endian little-endian
946 Fewest bytes per character 1 2 2 2 4 4 4
947 Most bytes per character 4 4 4 4 4 4 4
948 
949 
950 
951 Q: What’s the algorithm to convert from UTF-16 to character codes?
952 
953 A: The Unicode Standard used to contain a short algorithm, now there is just a bit distribution table. Here are three short code snippets that translate the information from the bit distribution table into C code that will convert to and from UTF-16.
954 
955 Using the following type definitions
956 
957 typedef unsigned int16 UTF16;
958 typedef unsigned int32 UTF32;
959 the first snippet calculates the high (or leading) surrogate from a character code C.
960 
961 const UTF16 HI_SURROGATE_START, // 0xD800
962 UTF16 X, // (UTF16) C;
963 UTF32 U, // (C >> 16) & ((1 << 5) - 1);
964 UTF16 W, // (UTF16) U - 1;
965 UTF16 HiSurrogate, // HI_SURROGATE_START | (W << 6) | X >> 10;
966 where X, U and W correspond to the labels used in Table 3-5 UTF-16 Bit Distribution. The next snippet does the same for the low surrogate.
967 
968 const UTF16 LO_SURROGATE_START, // 0xDC00
969 UTF16 X, // (UTF16) C;
970 UTF16 LoSurrogate, // (UTF16) (LO_SURROGATE_START | X & ((1 << 10) - 1));
971 Finally, the reverse, where hi and lo are the high and low surrogate, and C the resulting character
972 
973 UTF32 X, // (hi & ((1 << 6) -1)) << 10 | lo & ((1 << 10) -1);
974 UTF32 W, // (hi >> 6) & ((1 << 5) - 1);
975 UTF32 U, // W + 1;
976 UTF32 C, // U << 16 | X;
977 A caller would need to ensure that C, hi, and lo are in the appropriate ranges. [AF]
978 
979 Q: Isn’t there a simpler way to do this?
980 
981 A: There is a much simpler computation that does not try to follow the bit distribution table.
982 
983 // constants
984 const UTF32 LEAD_OFFSET, // 0xD800 - (0x10000 >> 10);
985 const UTF32 SURROGATE_OFFSET, // 0x10000 - (0xD800 << 10) - 0xDC00;
986 
987 // computations
988 UTF16 lead, // LEAD_OFFSET + (codepoint >> 10);
989 UTF16 trail, // 0xDC00 + (codepoint & 0x3FF);
990 
991 UTF32 codepoint, // (lead << 10) + trail + SURROGATE_OFFSET;
992 
993 
994 */