LibMusicXML 3.18
bsrBasicTypes.h
1/*
2 MusicXML Library
3 Copyright (C) Grame 2006-2013
4
5 This Source Code Form is subject to the terms of the Mozilla Public
6 License, v. 2.0. If a copy of the MPL was not distributed with this
7 file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
9 Grame Research Laboratory, 11, cours de Verdun Gensoul 69002 Lyon - France
10 research@grame.fr
11*/
12
13#ifndef ___bsrBasicTypes___
14#define ___bsrBasicTypes___
15
16#include <list>
17#include <map>
18
19#include "msrBasicTypes.h"
20
21
22namespace MusicXML2
23{
24
25// cell kinds
26//______________________________________________________________________________
27enum bsrCellKind {
28 kCellUnknown,
29
30 // non 6dots values
31 kCellEOL , // L'\u000a'
32 kCellEOP , // L'\u000c'
33
34 // 6dots values for Braille music
35 kDotsNone , // L'\u2800'
36 kDots1 , // L'\u2801'
37 kDots2 , // L'\u2802'
38 kDots12 , // L'\u2803'
39 kDots3 , // L'\u2804'
40 kDots13 , // L'\u2805'
41 kDots23 , // L'\u2806'
42 kDots123 , // L'\u2807'
43 kDots4 , // L'\u2808'
44 kDots14 , // L'\u2809'
45 kDots24 , // L'\u280a'
46 kDots124 , // L'\u280b'
47 kDots34 , // L'\u280c'
48 kDots134 , // L'\u280d'
49 kDots234 , // L'\u280e'
50 kDots1234 , // L'\u280f'
51
52 kDots5 , // L'\u2810'
53 kDots15 , // L'\u2811'
54 kDots25 , // L'\u2812'
55 kDots125 , // L'\u2813'
56 kDots35 , // L'\u2814'
57 kDots135 , // L'\u2815'
58 kDots235 , // L'\u2816'
59 kDots1235 , // L'\u2817'
60 kDots45 , // L'\u2818'
61 kDots145 , // L'\u2819'
62 kDots245 , // L'\u281a'
63 kDots1245 , // L'\u281b'
64 kDots345 , // L'\u281c'
65 kDots1345 , // L'\u281d'
66 kDots2345 , // L'\u281e'
67 kDots12345 , // L'\u281f'
68
69 kDots6 , // L'\u2820'
70 kDots16 , // L'\u2821'
71 kDots26 , // L'\u2822'
72 kDots126 , // L'\u2823'
73 kDots36 , // L'\u2824'
74 kDots136 , // L'\u2825'
75 kDots236 , // L'\u2826'
76 kDots1236 , // L'\u2827'
77 kDots46 , // L'\u2828'
78 kDots146 , // L'\u2829'
79 kDots246 , // L'\u282a'
80 kDots1246 , // L'\u282b'
81 kDots346 , // L'\u282c'
82 kDots1346 , // L'\u282d'
83 kDots2346 , // L'\u282e'
84 kDots12346 , // L'\u282f'
85
86 kDots56 , // L'\u2830'
87 kDots156 , // L'\u2831'
88 kDots256 , // L'\u2832'
89 kDots1256 , // L'\u2833'
90 kDots356 , // L'\u2834'
91 kDots1356 , // L'\u2835'
92 kDots2356 , // L'\u2836'
93 kDots12356 , // L'\u2837'
94 kDots456 , // L'\u2838'
95 kDots1456 , // L'\u2839'
96 kDots2456 , // L'\u283a'
97 kDots12456 , // L'\u283b'
98 kDots3456 , // L'\u283c'
99 kDots13456 , // L'\u283d'
100 kDots23456 , // L'\u283e'
101 kDots123456 // L'\u283f'
102};
103
104//______________________________________________________________________________
105string bsrCellKindAsShortString (bsrCellKind cellKind);
106
107string bsrCellKindAsString (bsrCellKind cellKind);
108
109// lower case letters
110//______________________________________________________________________________
111const bsrCellKind
112 kCellA = kDots1,
113 kCellB = kDots12,
114 kCellC = kDots14,
115 kCellD = kDots145,
116 kCellE = kDots15,
117 kCellF = kDots124,
118 kCellG = kDots1245,
119 kCellH = kDots125,
120 kCellI = kDots24,
121 kCellJ = kDots245,
122
123 kCellK = kDots13,
124 kCellL = kDots123,
125 kCellM = kDots134,
126 kCellN = kDots1345,
127 kCellO = kDots135,
128 kCellP = kDots1234,
129 kCellQ = kDots12345,
130 kCellR = kDots1235,
131 kCellS = kDots234,
132 kCellT = kDots2345,
133
134 kCellU = kDots136,
135 kCellV = kDots1236,
136 kCellW = kDots2456,
137 kCellX = kDots1346,
138 kCellY = kDots13456,
139 kCellZ = kDots1356;
140
141// capitals
142//______________________________________________________________________________
143const bsrCellKind
144 kCellCapitalsSign = kDots46;
145// kCellCapitalsSequenceSign, // { kCellCapitalsSign, kCellCapitalsSign };
146
147// decimal digits
148//______________________________________________________________________________
149const bsrCellKind
150 kCellNumberSign = kDots3456,
151 kCell1 = kCellA,
152 kCell2 = kCellB,
153 kCell3 = kCellC,
154 kCell4 = kCellD,
155 kCell5 = kCellE,
156 kCell6 = kCellF,
157 kCell7 = kCellG,
158 kCell8 = kCellH,
159 kCell9 = kCellI,
160 kCell0 = kCellJ;
161
162// lower decimal digits
163//______________________________________________________________________________
164const bsrCellKind
165 kCellLower1 = kDots2,
166 kCellLower2 = kDots23,
167 kCellLower3 = kDots25,
168 kCellLower4 = kDots256,
169 kCellLower5 = kDots26,
170 kCellLower6 = kDots235,
171 kCellLower7 = kDots2356,
172 kCellLower8 = kDots236,
173 kCellLower9 = kDots35,
174 kCellLower0 = kDots356;
175
176// alterations
177//______________________________________________________________________________
178const bsrCellKind
179 kCellFlat = kDots126,
180 kCellNatural = kDots16,
181 kCellSharp = kDots146;
182
183// augmentation dots
184//______________________________________________________________________________
185const bsrCellKind
186 kCellAugmentationDot = kDots3;
187
188// arithmetic operators
189//______________________________________________________________________________
190const bsrCellKind
191 kCell_ac_plus = kDots235,
192 kCell_ac_minus = kDots36,
193 kCell_ac_times = kDots35,
194 kCell_ac_dividedBy = kDots25,
195 kCell_ac_equals = kDots2356;
196
197// words
198//______________________________________________________________________________
199const bsrCellKind
200 kCellWordSign = kDots345,
201
202 kCellWordApostrophe = kDots6,
203
204 kCellParenthesis = kDots2356,
205 kCellQuestionMark = kDots26;
206
207// braille cells
208//______________________________________________________________________________
209void brailleCellKindAsUTF8 (bsrCellKind cellKind, ostream& os);
210void brailleCellKindAsUTF16 (bsrCellKind cellKind, ostream& os);
211
212void brailleCellKind (bsrCellKind cellKind, ostream& os);
213
214// braille output kinds
215//______________________________________________________________________________
216enum bsrBrailleOutputKind {
217 kBrailleOutputAscii, // default value
218 kBrailleOutputUTF8, kBrailleOutputUTF16 };
219
220string bsrBrailleOutputKindAsString (
221 bsrBrailleOutputKind brailleOutputKind);
222
223extern map<string, bsrBrailleOutputKind>
224 gBsrBrailleOutputKindsMap;
225
226string existingBsrBrailleOutputKinds (int namesListMaxLength);
227
228void initializeBsrBrailleOutputKindsMap ();
229
230// chords languages
231//______________________________________________________________________________
232enum bsrTextsLanguageKind {
233 kTextsEnglish, // BANA's default
234 kTextsGerman, kTextsItalian, kTextsFrench };
235
236string bsrTextsLanguageKindAsString (
237 bsrTextsLanguageKind languageKind);
238
239extern map<string, bsrTextsLanguageKind>
240 gBsrTextsLanguageKindsMap;
241
242string existingBsrTextsLanguageKinds (int namesListMaxLength);
243
244void initializeBsrTextsLanguageKindsMap ();
245
246
247/*
248//______________________________________________________________________________
249// brailling numbers
250wstring braille (int n);
251
252//______________________________________________________________________________
253// brailling characters and strings
254bsrDot6Cell braille (char ch);
255
256wstring braille (string str);
257
258//______________________________________________________________________________
259// writing UTF-16 to ostreams
260void write_bsrDot6Cell (ostream& os, bsrDot6Cell cell);
261
262void write_bsrDot6Cell ( bsrDot6Cell cell );
263
264EXP ostream& operator<< (ostream& os, const bsrDot6Cell cell);
265
266void write_wstring (ostream& os, wstring wstr );
267
268EXP ostream& operator<< (ostream& os, const wstring& wstr);
269*/
270
271// warnings and errors
272//______________________________________________________________________________
273EXP void bsrMusicXMLWarning (
274 std::string inputSourceName,
275 int inputLineNumber,
276 std::string message);
277
278EXP void bsrInternalError (
279 std::string inputSourceName,
280 int inputLineNumber,
281 std::string sourceCodeFileName,
282 int sourceCodeLineNumber,
283 std::string message);
284
285// initialization
286//______________________________________________________________________________
287void initializeBSRBasicTypes ();
288
289
290} // namespace MusicXML2
291
292
293#endif
294
295
296/*
297from https://brltty.app/pipermail/brltty/2012-October/009556.html :
298
299[BRLTTY] Braille Code
300
301Dave Mielke dave at mielke.cc
302Wed Oct 10 14:15:08 EDT 2012
303Previous message: [BRLTTY] Braille Code
304Next message: [BRLTTY] Braille Code
305Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
306[quoted lines by Dave Mielke on 2012/10/10 at 14:13 -0400]
307
308>Each language has its own mapping. That being said, the NABCC (North American
309>Braille Computer Code) is probably as good a place to start as any. You can
310>find a table for that mapping within the file Tables/en-nabcc.ttb in brltty's
311>source tree. I've attached it to this message, as well.
312
313I forgot to attach the table. Here it is.
314
315--
316Dave Mielke | 2213 Fox Crescent | The Bible is the very Word of God.
317Phone: 1-613-726-0014 | Ottawa, Ontario | 2011 May 21 is the End of Salvation.
318EMail: dave at mielke.cc | Canada K2A 1H7 | http://Mielke.cc/now.html
319http://FamilyRadio.com/ | http://Mielke.cc/bible/
320-------------- next part --------------
321###############################################################################
322# BRLTTY - A background process providing access to the console screen (when in
323# text mode) for a blind person using a refreshable braille display.
324#
325# Copyright (C) 1995-2012 by The BRLTTY Developers.
326#
327# BRLTTY comes with ABSOLUTELY NO WARRANTY.
328#
329# This is free software, placed under the terms of the
330# GNU Lesser General Public License, as published by the Free Software
331# Foundation; either version 2.1 of the License, or (at your option) any
332# later version. Please see the file LICENSE-LGPL for details.
333#
334# Web Page: http://mielke.cc/brltty/
335#
336# This software is maintained by Dave Mielke <dave at mielke.cc>.
337###############################################################################
338
339# BRLTTY Text Table - English (North American Braille Computer Code)
340
341# This is a description of the default text table used by BRLTTY.
342# It's based on the North American Braille Computer Code, but defines the full
343# Latin1 (ISO-8859-1) character set.
344
345# The 95 printable characters of the standard 7-bit US-ASCII character set
346# (32-126) are identical to their representations within the North American
347# Braille Computer Code (these are the only characters which the NABCC actually
348# defines). Characters from literary braille, symbols from The Nemeth Braille
349# Code for Mathematics and Science Notation, and a bit of human imagination
350# have all been combined to create an easy-to-remember, one-to-one mapping
351# between each character and its braille counterpart. All possible combinations
352# involving only the original 6 braille dots are used, but that only allows for
353# 64 out of the required 95 character representations. The presence or absence
354# of dot 7 is used to differentiate between pairs of characters which either
355# are very closely related in meaning or, in a few cases where a more intuitive
356# reason couldn't be found, have a very close logical relationship within the
357# US-ASCII code. Dot 8 isn't used at all.
358
359# The space and the 26 lowercase letters (a-z) are the same as in literary
360# braille:
361
362 #Hex Dots Dec Char Description
363char \X20 ( ) # 32 space
364char \X61 (1 ) # 97 a latin small letter a
365char \X62 (12 ) # 98 b latin small letter b
366char \X63 (1 4 ) # 99 c latin small letter c
367char \X64 (1 45 ) # 100 d latin small letter d
368char \X65 (1 5 ) # 101 e latin small letter e
369char \X66 (12 4 ) # 102 f latin small letter f
370char \X67 (12 45 ) # 103 g latin small letter g
371char \X68 (12 5 ) # 104 h latin small letter h
372char \X69 ( 2 4 ) # 105 i latin small letter i
373char \X6A ( 2 45 ) # 106 j latin small letter j
374char \X6B (1 3 ) # 107 k latin small letter k
375char \X6C (123 ) # 108 l latin small letter l
376char \X6D (1 34 ) # 109 m latin small letter m
377char \X6E (1 345 ) # 110 n latin small letter n
378char \X6F (1 3 5 ) # 111 o latin small letter o
379char \X70 (1234 ) # 112 p latin small letter p
380char \X71 (12345 ) # 113 q latin small letter q
381char \X72 (123 5 ) # 114 r latin small letter r
382char \X73 ( 234 ) # 115 s latin small letter s
383char \X74 ( 2345 ) # 116 t latin small letter t
384char \X75 (1 3 6 ) # 117 u latin small letter u
385char \X76 (123 6 ) # 118 v latin small letter v
386char \X77 ( 2 456 ) # 119 w latin small letter w
387char \X78 (1 34 6 ) # 120 x latin small letter x
388char \X79 (1 3456 ) # 121 y latin small letter y
389char \X7A (1 3 56 ) # 122 z latin small letter z
390
391# The 26 uppercase letters (A-Z) are the same as their lowercase counterparts
392# except that dot 7 is added:
393
394 #Hex Dots Dec Char Description
395char \X41 (1 7 ) # 65 A latin capital letter a
396char \X42 (12 7 ) # 66 B latin capital letter b
397char \X43 (1 4 7 ) # 67 C latin capital letter c
398char \X44 (1 45 7 ) # 68 D latin capital letter d
399char \X45 (1 5 7 ) # 69 E latin capital letter e
400char \X46 (12 4 7 ) # 70 F latin capital letter f
401char \X47 (12 45 7 ) # 71 G latin capital letter g
402char \X48 (12 5 7 ) # 72 H latin capital letter h
403char \X49 ( 2 4 7 ) # 73 I latin capital letter i
404char \X4A ( 2 45 7 ) # 74 J latin capital letter j
405char \X4B (1 3 7 ) # 75 K latin capital letter k
406char \X4C (123 7 ) # 76 L latin capital letter l
407char \X4D (1 34 7 ) # 77 M latin capital letter m
408char \X4E (1 345 7 ) # 78 N latin capital letter n
409char \X4F (1 3 5 7 ) # 79 O latin capital letter o
410char \X50 (1234 7 ) # 80 P latin capital letter p
411char \X51 (12345 7 ) # 81 Q latin capital letter q
412char \X52 (123 5 7 ) # 82 R latin capital letter r
413char \X53 ( 234 7 ) # 83 S latin capital letter s
414char \X54 ( 2345 7 ) # 84 T latin capital letter t
415char \X55 (1 3 67 ) # 85 U latin capital letter u
416char \X56 (123 67 ) # 86 V latin capital letter v
417char \X57 ( 2 4567 ) # 87 W latin capital letter w
418char \X58 (1 34 67 ) # 88 X latin capital letter x
419char \X59 (1 34567 ) # 89 Y latin capital letter y
420char \X5A (1 3 567 ) # 90 Z latin capital letter z
421
422# The 10 decimal digits (0-9) are the same as in the Nemeth Code:
423
424 #Hex Dots Dec Char Description
425char \X30 ( 3 56 ) # 48 0 digit zero
426char \X31 ( 2 ) # 49 1 digit one
427char \X32 ( 23 ) # 50 2 digit two
428char \X33 ( 2 5 ) # 51 3 digit three
429char \X34 ( 2 56 ) # 52 4 digit four
430char \X35 ( 2 6 ) # 53 5 digit five
431char \X36 ( 23 5 ) # 54 6 digit six
432char \X37 ( 23 56 ) # 55 7 digit seven
433char \X38 ( 23 6 ) # 56 8 digit eight
434char \X39 ( 3 5 ) # 57 9 digit nine
435
436# Common symbols used within mathematical expressions by popular computer
437# programming languages are the same as in the Nemeth Code:
438
439 #Hex Dots Dec Char Description
440char \X2E ( 4 6 ) # 46 . full stop
441char \X2B ( 34 6 ) # 43 + plus sign
442char \X2D ( 3 6 ) # 45 - hyphen-minus
443char \X2A (1 6 ) # 42 * asterisk
444char \X2F ( 34 ) # 47 / solidus
445char \X28 (123 56 ) # 40 ( left parenthesis
446char \X29 ( 23456 ) # 41 ) right parenthesis
447
448# With all of these major considerations having been taken into account,
449# convenient representations were still available, and are used, for some of
450# the remaining characters:
451
452 #Hex Dots Dec Char Description
453char \X26 (1234 6 ) # 38 & ampersand
454char \X23 ( 3456 ) # 35 # number sign
455
456# The remaining characters are what they are. Dot 7 isn't used either within
457# the number block (32-63) or, with the exception of the DEL control character
458# (127), within the lowercase block (96-127). With the exception of the
459# underscore (95), dot 7 is used for every character within the uppercase block
460# (64-95). Adding dot 7 to any character within the lowercase block (96-127)
461# yields its corresponding character within the uppercase block (64-95) except
462# that removing dot 7 from the DEL control character yields the underscore.
463
464 #Hex Dots Dec Char Description
465char \X2C ( 6 ) # 44 , comma
466char \X3B ( 56 ) # 59 ; semicolon
467char \X3A (1 56 ) # 58 : colon
468char \X21 ( 234 6 ) # 33 ! exclamation mark
469char \X3F (1 456 ) # 63 ? question mark
470char \X22 ( 5 ) # 34 " quotation mark
471char \X27 ( 3 ) # 39 ' apostrophe
472char \X60 ( 4 ) # 96 ` grave accent
473char \X5E ( 45 7 ) # 94 ^ circumflex accent
474char \X7E ( 45 ) # 126 ~ tilde
475char \X5B ( 2 4 67 ) # 91 [ left square bracket
476char \X5D (12 4567 ) # 93 ] right square bracket
477char \X7B ( 2 4 6 ) # 123 { left curly bracket
478char \X7D (12 456 ) # 125 } right curly bracket
479char \X3D (123456 ) # 61 = equals sign
480char \X3C (12 6 ) # 60 < less-than sign
481char \X3E ( 345 ) # 62 > greater-than sign
482char \X24 (12 4 6 ) # 36 $ dollar sign
483char \X25 (1 4 6 ) # 37 % percent sign
484char \X40 ( 4 7 ) # 64 @ commercial at
485char \X7C (12 56 ) # 124 | vertical line
486char \X5C (12 567 ) # 92 \ reverse solidus
487char \X5F ( 456 ) # 95 _ low line
488
489# Each of the characters within the basic control character block (0-31) is the
490# same as its corresponding character within both the uppercase block (64-95)
491# and the lowercase block (96-127) except that dots 7 and 8 are both used.
492
493 #Hex Dots Dec Char Description
494char \X00 ( 4 78) # 0 ^@ null
495char \X01 (1 78) # 1 ^A start of heading
496char \X02 (12 78) # 2 ^B start of text
497char \X03 (1 4 78) # 3 ^C end of text
498char \X04 (1 45 78) # 4 ^D end of transmission
499char \X05 (1 5 78) # 5 ^E enquiry
500char \X06 (12 4 78) # 6 ^F acknowledge
501char \X07 (12 45 78) # 7 ^G bell
502char \X08 (12 5 78) # 8 ^H backspace
503char \X09 ( 2 4 78) # 9 ^I horizontal tabulation
504char \X0A ( 2 45 78) # 10 ^J line feed
505char \X0B (1 3 78) # 11 ^K vertical tabulation
506char \X0C (123 78) # 12 ^L form feed
507char \X0D (1 34 78) # 13 ^M carriage return
508char \X0E (1 345 78) # 14 ^N shift out
509char \X0F (1 3 5 78) # 15 ^O shift in
510char \X10 (1234 78) # 16 ^P data link escape
511char \X11 (12345 78) # 17 ^Q device control one
512char \X12 (123 5 78) # 18 ^R device control two
513char \X13 ( 234 78) # 19 ^S device control three
514char \X14 ( 2345 78) # 20 ^T device control four
515char \X15 (1 3 678) # 21 ^U negative acknowledge
516char \X16 (123 678) # 22 ^V synchronous idle
517char \X17 ( 2 45678) # 23 ^W end of transmission block
518char \X18 (1 34 678) # 24 ^X cancel
519char \X19 (1 345678) # 25 ^Y end of medium
520char \X1A (1 3 5678) # 26 ^Z substitute
521char \X1B ( 2 4 678) # 27 ^[ escape
522char \X1C (12 5678) # 28 ^\ file separator
523char \X1D (12 45678) # 29 ^] group separator
524char \X1E ( 45 78) # 30 ^^ record separator
525char \X1F ( 45678) # 31 ^_ unit separator
526
527# Each of the characters within the extended control character block (128-159)
528# is the same as its corresponding character within the basic control character
529# block (0-31) except that only dot 8 is used.
530
531 #Hex Dots Dec Char Description
532char \X80 ( 4 8) # 128 ~@ <control>
533char \X81 (1 8) # 129 ~A <control>
534char \X82 (12 8) # 130 ~B break permitted here
535char \X83 (1 4 8) # 131 ~C no break here
536char \X84 (1 45 8) # 132 ~D <control>
537char \X85 (1 5 8) # 133 ~E next line
538char \X86 (12 4 8) # 134 ~F start of selected area
539char \X87 (12 45 8) # 135 ~G end of selected area
540char \X88 (12 5 8) # 136 ~H character tabulation set
541char \X89 ( 2 4 8) # 137 ~I character tabulation with justification
542char \X8A ( 2 45 8) # 138 ~J line tabulation set
543char \X8B (1 3 8) # 139 ~K partial line down
544char \X8C (123 8) # 140 ~L partial line up
545char \X8D (1 34 8) # 141 ~M reverse line feed
546char \X8E (1 345 8) # 142 ~N single shift two
547char \X8F (1 3 5 8) # 143 ~O single shift three
548char \X90 (1234 8) # 144 ~P device control string
549char \X91 (12345 8) # 145 ~Q private use one
550char \X92 (123 5 8) # 146 ~R private use two
551char \X93 ( 234 8) # 147 ~S set transmit state
552char \X94 ( 2345 8) # 148 ~T cancel character
553char \X95 (1 3 6 8) # 149 ~U message waiting
554char \X96 (123 6 8) # 150 ~V start of guarded area
555char \X97 ( 2 456 8) # 151 ~W end of guarded area
556char \X98 (1 34 6 8) # 152 ~X start of string
557char \X99 (1 3456 8) # 153 ~Y <control>
558char \X9A (1 3 56 8) # 154 ~Z single character introducer
559char \X9B ( 2 4 6 8) # 155 ~[ control sequence introducer
560char \X9C (12 56 8) # 156 ~\ string terminator
561char \X9D (12 456 8) # 157 ~] operating system command
562char \X9E ( 45 8) # 158 ~^ privacy message
563char \X9F ( 456 8) # 159 ~_ application program command
564
565# Representations for the uppercase accented letters are drawn from the
566# remaining combinations which use both dots 7 and 8. The representation for a
567# lowercase accented letter is the same as its uppercase counterpart except
568# that dot 7 isn't used. This scheme retains the use of dot 7 as the modifier
569# for a capitalized letter. The only exception to these rules is that, due to
570# the nature of the Latin1 character set, the German lowercase double-s is
571# treated as though it were an uppercase y-dieresis (neither has an uppercase
572# definition). These representations have been gathered, as much as possible,
573# into logical groupings.
574
575# The 5 letters with a circumflex accent (^) use the [1-5] dot combinations:
576
577 #Hex Dots Dec Char Description
578char \XC2 ( 2 78) # 194 ? latin capital letter a with circumflex
579char \XCA ( 23 78) # 202 ? latin capital letter e with circumflex
580char \XCE ( 2 5 78) # 206 ? latin capital letter i with circumflex
581char \XD4 ( 2 5678) # 212 ? latin capital letter o with circumflex
582char \XDB ( 2 678) # 219 ? latin capital letter u with circumflex
583char \XE2 ( 2 8) # 226 ? latin small letter a with circumflex
584char \XEA ( 23 8) # 234 ? latin small letter e with circumflex
585char \XEE ( 2 5 8) # 238 ? latin small letter i with circumflex
586char \XF4 ( 2 56 8) # 244 ? latin small letter o with circumflex
587char \XFB ( 2 6 8) # 251 ? latin small letter u with circumflex
588
589# The 5 letters with a grave accent (`) use the [6-0] dot combinations:
590
591 #Hex Dots Dec Char Description
592char \XC0 ( 23 5 78) # 192 ? latin capital letter a with grave
593char \XC8 ( 23 5678) # 200 ? latin capital letter e with grave
594char \XCC ( 23 678) # 204 ? latin capital letter i with grave
595char \XD2 ( 3 5 78) # 210 ? latin capital letter o with grave
596char \XD9 ( 3 5678) # 217 ? latin capital letter u with grave
597char \XE0 ( 23 5 8) # 224 ? latin small letter a with grave
598char \XE8 ( 23 56 8) # 232 ? latin small letter e with grave
599char \XEC ( 23 6 8) # 236 ? latin small letter i with grave
600char \XF2 ( 3 5 8) # 242 ? latin small letter o with grave
601char \XF9 ( 3 56 8) # 249 ? latin small letter u with grave
602
603# The 6 letters with an acute accent (') use the [a-f] dot combinations with
604# dots 3 and 6 added:
605
606 #Hex Dots Dec Char Description
607char \XC1 (1 678) # 193 ? latin capital letter a with acute
608char \XC9 (12 678) # 201 ? latin capital letter e with acute
609char \XCD (1 4 678) # 205 ? latin capital letter i with acute
610char \XD3 (1 45678) # 211 ? latin capital letter o with acute
611char \XDA (1 5678) # 218 ? latin capital letter u with acute
612char \XDD (12 4 678) # 221 ? latin capital letter y with acute
613char \XE1 (1 6 8) # 225 ? latin small letter a with acute
614char \XE9 (12 6 8) # 233 ? latin small letter e with acute
615char \XED (1 4 6 8) # 237 ? latin small letter i with acute
616char \XF3 (1 456 8) # 243 ? latin small letter o with acute
617char \XFA (1 56 8) # 250 ? latin small letter u with acute
618char \XFD (12 4 6 8) # 253 ? latin small letter y with acute
619
620# The 6 letters with a dieresis accent (") use the [f-j] dot combinations with
621# dots 3 and 6 added, and the number sign (because it fits the sequence
622# reasonably well):
623
624 #Hex Dots Dec Char Description
625char \XC4 (1234 678) # 196 ? latin capital letter a with diaeresis
626char \XCB (12345678) # 203 ? latin capital letter e with diaeresis
627char \XCF (123 5678) # 207 ? latin capital letter i with diaeresis
628char \XD6 ( 234 678) # 214 ? latin capital letter o with diaeresis
629char \XDC ( 2345678) # 220 ? latin capital letter u with diaeresis
630char \XE4 (1234 6 8) # 228 ? latin small letter a with diaeresis
631char \XEB (123456 8) # 235 ? latin small letter e with diaeresis
632char \XEF (123 56 8) # 239 ? latin small letter i with diaeresis
633char \XF6 ( 234 6 8) # 246 ? latin small letter o with diaeresis
634char \XFC ( 23456 8) # 252 ? latin small letter u with diaeresis
635char \XFF ( 3456 8) # 255 ? latin small letter y with diaeresis
636
637# There is no uppercase y-dieresis in the Latin1 character set. The German
638# lowercase double-s, which also doesn't have an uppercase counterpart in the
639# Latin1 character set, uses its representation:
640
641 #Hex Dots Dec Char Description
642char \XDF ( 345678) # 223 ? latin small letter sharp s
643
644# The remaining accented letters are:
645
646 #Hex Dots Dec Char Description
647char \XC3 ( 5 78) # 195 ? latin capital letter a with tilde
648char \XD1 ( 4 678) # 209 ? latin capital letter n with tilde
649char \XD5 ( 5678) # 213 ? latin capital letter o with tilde
650char \XC5 ( 345 78) # 197 ? latin capital letter a with ring above
651char \XC7 ( 34 678) # 199 ? latin capital letter c with cedilla
652char \XD8 ( 34 78) # 216 ? latin capital letter o with stroke
653char \XC6 ( 3 78) # 198 ? latin capital letter ae
654char \XD0 ( 678) # 208 ? latin capital letter eth
655char \XDE ( 3 678) # 222 ? latin capital letter thorn
656char \XE3 ( 5 8) # 227 ? latin small letter a with tilde
657char \XF1 ( 4 6 8) # 241 ? latin small letter n with tilde
658char \XF5 ( 56 8) # 245 ? latin small letter o with tilde
659char \XE5 ( 345 8) # 229 ? latin small letter a with ring above
660char \XE7 ( 34 6 8) # 231 ? latin small letter c with cedilla
661char \XF8 ( 34 8) # 248 ? latin small letter o with stroke
662char \XE6 ( 3 8) # 230 ? latin small letter ae
663char \XF0 ( 6 8) # 240 ? latin small letter eth
664char \XFE ( 3 6 8) # 254 ? latin small letter thorn
665
666# Some characters are the same as other characters which they resemble but with
667# dot 7 added:
668
669 #Hex Dots Dec Char Description
670char \XAD ( 3 67 ) # 173 ? soft hyphen
671char \XAB (12 67 ) # 171 ? left-pointing double angle quotation mark
672char \XBB ( 345 7 ) # 187 ? right-pointing double angle quotation mark
673char \XA6 (1 567 ) # 166 ? broken bar
674char \XB9 ( 2 7 ) # 185 ? superscript one
675char \XB2 ( 23 7 ) # 178 ? superscript two
676char \XB3 ( 2 5 7 ) # 179 ? superscript three
677char \XB1 ( 34 67 ) # 177 ? plus-minus sign
678char \XD7 (1 67 ) # 215 ? multiplication sign
679char \XF7 ( 34 7 ) # 247 ? division sign
680char \XB7 ( 4 67 ) # 183 ? middle dot
681char \XA1 ( 234 67 ) # 161 ? inverted exclamation mark
682char \XBF (1 4567 ) # 191 ? inverted question mark
683char \XA2 (12 4 67 ) # 162 ? cent sign
684char \XA3 ( 34567 ) # 163 ? pound sign
685
686# A few more characters follow this same convention but their relationships
687# to their base characters is a bit obscure:
688
689 #Hex Dots Dec Char Description
690char \XA4 (1 4 67 ) # 164 ? currency sign
691char \XA5 (1234 67 ) # 165 ? yen sign
692
693# Some characters are represented by the first letters of their names lowered
694# by one row of dots:
695
696 #Hex Dots Dec Char Description
697char \XAC ( 2 567 ) # 172 ? not sign
698char \XB6 ( 23 5 7 ) # 182 ? pilcrow sign
699char \XA9 ( 23 567 ) # 169 ? copyright sign
700char \XAE ( 23 67 ) # 174 ? registered sign
701char \XA7 ( 3 5 7 ) # 167 ? section sign
702char \XB0 ( 3 567 ) # 176 ? degree sign
703
704# The three fraction characters use combinations of dots 1 and 4 (which
705# progress from left to right as the value of the fraction increases) together
706# with dots 2,3,5,6,7:
707
708 #Hex Dots Dec Char Description
709char \XBC (123 567 ) # 188 ? vulgar fraction one quarter
710char \XBD (1234567 ) # 189 ? vulgar fraction one half
711char \XBE ( 234567 ) # 190 ? vulgar fraction three quarters
712
713# Each of the three extended accent characters is the same as its conventional
714# compose character but with dot 7 added:
715
716 #Hex Dots Dec Char Description
717char \XB4 ( 3 7 ) # 180 ? acute accent
718char \XB8 ( 67 ) # 184 ? cedilla
719char \XA8 ( 5 7 ) # 168 ? diaeresis
720
721# The two gender symbols are:
722
723 #Hex Dots Dec Char Description
724char \XBA ( 7 ) # 186 ? masculine ordinal indicator
725char \XAA ( 8) # 170 ? feminine ordinal indicator
726
727# The three remaining characters are:
728
729 #Hex Dots Dec Char Description
730char \XAF ( 2 67 ) # 175 ? macron
731char \XB5 ( 567 ) # 181 ? micro sign
732char \XA0 ( 78) # 160 ~ no-break space
733
734# The nonbreaking space is dots 7 and 8 because this presents a sequence of
735# nonbreaking spaces as a smooth low line segment.
736
737 #Hex Dots Dec Char Description
738char \X7F ( 4567 ) # 127 ^? delete
739
740*/
741
742
743//______________________________________________________________________________
744/*
745enum bsrCellKind2 {
746 // lower decimal digits
747 kCellLower1, // kDots2,
748 kCellLower2, // kDots23,
749 kCellLower3, // kDots25,
750 kCellLower4, // kDots256,
751 kCellLower5, // kDots26,
752 kCellLower6, // kDots235,
753 kCellLower7, // kDots2356,
754 kCellLower8, // kDots236,
755 kCellLower9, // kDots35,
756 kCellLower0, // kDots356;
757
758
759 // punctuation
760 kCellDot , // kDots256,
761 kCellComma , // kDots2,
762 kCellQuestionMark , // kDots26,
763 kCellSemicolon , // kDots23,
764 kCellColon , // kDots25,
765 kCellExclamationMark , // kDots235,
766 kCellLeftParenthesis , // kDots236,
767 kCellRightParenthesis, // kDots356,
768 kCellDoubleQuote , // kDots2356,
769 kCellDash , // kDots36,
770 kCellQuote , // kDots3;
771
772 // other symbols
773 kCellSlash , // kDots34,
774 kCellVerseEnd, // kDots345,
775 kCellItalics , // kDots456,
776 kCellAsterisk, // kDots35,
777 kCellExponent, // kDots4;
778
779 // intervals
780 kCellSecond , // kDots34,
781 kCellThird , // kDots346,
782 kCellFourth , // kDots3456,
783 kCellFifth , // kDots35,
784 kCellSixth , // kDots356,
785 kCellSeventh, // kDots25,
786 kCellEighth , // kDots36;
787
788 // triplets
789 kCellTriplet , // kDots23;
790
791 // keyboard hands
792 kCellRightHand, // { kDots46, kDots345 },
793 kCellLeftHand, // { kDots456, kDots345 };
794
795 // bars
796 kCellFinalDoubleBar , // { kDots126, kDots13 },
797 kCellSectionalDoubleBar, // { kDots126, kDots13, kDots3 };
798
799 // measure divisions
800 kCellMeasureDivisionSign, // { kDots46, kDots13 };
801
802 // words
803 kCellWordSign , // kDots345,
804 kCellWordApostrophe, // kDots6;
805
806 // capitals
807 kCellCapitalsSign, //, // kDots46;
808 kCellCapitalsSequenceSign, // { kCellCapitalsSign, kCellCapitalsSign };
809
810 // parentheses
811 kCellLiteraryLeftParenthesis , //{ kDots5, kDots126 },
812 kCellLiteraryRightParenthesis , //{ kDots5, kDots345 },
813 kCellMusicParentheses , //{ kDots6, kDots3 },
814 kCellSpecialParentheses , //{ kDots2356, kDots2356 };
815
816 // other symbols
817 kCellParagraph , //{ kDots5, kDots1234 },
818 kCellAmpersand , //{ kDots5, kDots123456 },
819 kCellUpsilon , //{ kDots45, kDots13456 }, // better name JMI ???
820 kCellEuro , //{ kDots45, kDots15 },
821 kCellDollar , //{ kDots45, kDots234 },
822 kCellPound , //{ kDots45, kDots123 },
823 kCellCopyright , //{ kDots5, kDots14 },
824 kCellRegisteredTradeMark , //{ kDots5, kDots1235 },
825 kCellTradeMark , //{ kDots5, kDots2345 },
826 kCellPercent , //{ kDots5, kDots346 },
827 kCellPerthousand , //{ kDots5, kDots346, kDots346 },
828 kCellPertenthousand , //{ kDots5, kDots346, kDots346, kDots346 };
829
830 // fermatas
831 kCellFermataOnANote , //{ kDots146, kDots126, kDots123 },
832 kCellFermataBetweenNotes , //{ kDots5, kDots126, kDots123 },
833 kCellFermataOverABarline , //{ kDots456, kDots126, kDots123 };
834};
835*/
836
837/*
838U+2800 ⠀ e2 a0 80 BRAILLE PATTERN BLANK
839U+2801 ⠁ e2 a0 81 BRAILLE PATTERN DOTS-1
840U+2802 ⠂ e2 a0 82 BRAILLE PATTERN DOTS-2
841U+2803 ⠃ e2 a0 83 BRAILLE PATTERN DOTS-12
842U+2804 ⠄ e2 a0 84 BRAILLE PATTERN DOTS-3
843U+2805 ⠅ e2 a0 85 BRAILLE PATTERN DOTS-13
844U+2806 ⠆ e2 a0 86 BRAILLE PATTERN DOTS-23
845U+2807 ⠇ e2 a0 87 BRAILLE PATTERN DOTS-123
846U+2808 ⠈ e2 a0 88 BRAILLE PATTERN DOTS-4
847U+2809 ⠉ e2 a0 89 BRAILLE PATTERN DOTS-14
848U+280A ⠊ e2 a0 8a BRAILLE PATTERN DOTS-24
849U+280B ⠋ e2 a0 8b BRAILLE PATTERN DOTS-124
850U+280C ⠌ e2 a0 8c BRAILLE PATTERN DOTS-34
851U+280D ⠍ e2 a0 8d BRAILLE PATTERN DOTS-134
852U+280E ⠎ e2 a0 8e BRAILLE PATTERN DOTS-234
853U+280F ⠏ e2 a0 8f BRAILLE PATTERN DOTS-1234
854U+2810 ⠐ e2 a0 90 BRAILLE PATTERN DOTS-5
855U+2811 ⠑ e2 a0 91 BRAILLE PATTERN DOTS-15
856U+2812 ⠒ e2 a0 92 BRAILLE PATTERN DOTS-25
857U+2813 ⠓ e2 a0 93 BRAILLE PATTERN DOTS-125
858U+2814 ⠔ e2 a0 94 BRAILLE PATTERN DOTS-35
859U+2815 ⠕ e2 a0 95 BRAILLE PATTERN DOTS-135
860U+2816 ⠖ e2 a0 96 BRAILLE PATTERN DOTS-235
861U+2817 ⠗ e2 a0 97 BRAILLE PATTERN DOTS-1235
862U+2818 ⠘ e2 a0 98 BRAILLE PATTERN DOTS-45
863U+2819 ⠙ e2 a0 99 BRAILLE PATTERN DOTS-145
864U+281A ⠚ e2 a0 9a BRAILLE PATTERN DOTS-245
865U+281B ⠛ e2 a0 9b BRAILLE PATTERN DOTS-1245
866U+281C ⠜ e2 a0 9c BRAILLE PATTERN DOTS-345
867U+281D ⠝ e2 a0 9d BRAILLE PATTERN DOTS-1345
868U+281E ⠞ e2 a0 9e BRAILLE PATTERN DOTS-2345
869U+281F ⠟ e2 a0 9f BRAILLE PATTERN DOTS-12345
870U+2820 ⠠ e2 a0 a0 BRAILLE PATTERN DOTS-6
871U+2821 ⠡ e2 a0 a1 BRAILLE PATTERN DOTS-16
872U+2822 ⠢ e2 a0 a2 BRAILLE PATTERN DOTS-26
873U+2823 ⠣ e2 a0 a3 BRAILLE PATTERN DOTS-126
874U+2824 ⠤ e2 a0 a4 BRAILLE PATTERN DOTS-36
875U+2825 ⠥ e2 a0 a5 BRAILLE PATTERN DOTS-136
876U+2826 ⠦ e2 a0 a6 BRAILLE PATTERN DOTS-236
877U+2827 ⠧ e2 a0 a7 BRAILLE PATTERN DOTS-1236
878U+2828 ⠨ e2 a0 a8 BRAILLE PATTERN DOTS-46
879U+2829 ⠩ e2 a0 a9 BRAILLE PATTERN DOTS-146
880U+282A ⠪ e2 a0 aa BRAILLE PATTERN DOTS-246
881U+282B ⠫ e2 a0 ab BRAILLE PATTERN DOTS-1246
882U+282C ⠬ e2 a0 ac BRAILLE PATTERN DOTS-346
883U+282D ⠭ e2 a0 ad BRAILLE PATTERN DOTS-1346
884U+282E ⠮ e2 a0 ae BRAILLE PATTERN DOTS-2346
885U+282F ⠯ e2 a0 af BRAILLE PATTERN DOTS-12346
886U+2830 ⠰ e2 a0 b0 BRAILLE PATTERN DOTS-56
887U+2831 ⠱ e2 a0 b1 BRAILLE PATTERN DOTS-156
888U+2832 ⠲ e2 a0 b2 BRAILLE PATTERN DOTS-256
889U+2833 ⠳ e2 a0 b3 BRAILLE PATTERN DOTS-1256
890U+2834 ⠴ e2 a0 b4 BRAILLE PATTERN DOTS-356
891U+2835 ⠵ e2 a0 b5 BRAILLE PATTERN DOTS-1356
892U+2836 ⠶ e2 a0 b6 BRAILLE PATTERN DOTS-2356
893U+2837 ⠷ e2 a0 b7 BRAILLE PATTERN DOTS-12356
894U+2838 ⠸ e2 a0 b8 BRAILLE PATTERN DOTS-456
895U+2839 ⠹ e2 a0 b9 BRAILLE PATTERN DOTS-1456
896U+283A ⠺ e2 a0 ba BRAILLE PATTERN DOTS-2456
897U+283B ⠻ e2 a0 bb BRAILLE PATTERN DOTS-12456
898U+283C ⠼ e2 a0 bc BRAILLE PATTERN DOTS-3456
899U+283D ⠽ e2 a0 bd BRAILLE PATTERN DOTS-13456
900U+283E ⠾ e2 a0 be BRAILLE PATTERN DOTS-23456
901U+283F ⠿ e2 a0 bf BRAILLE PATTERN DOTS-123456
902
903
904FROM http://unicode.org/faq/utf_bom.html#BOM :
905
906
907Q: How do I write a UTF converter?
908
909A: The freely available open source project International Components for Unicode (ICU) has UTF conversion built into it. The latest version may be downloaded from the ICU Project web site.
910*
911http://site.icu-project.org
912
913
914Bytes Encoding Form
91500 00 FE FF UTF-32, big-endian
916FF FE 00 00 UTF-32, little-endian
917FE FF UTF-16, big-endian
918FF FE UTF-16, little-endian
919EF BB BF UTF-8
920
921
922Q: Is there a standard method to package a Unicode character so it fits an 8-Bit ASCII stream?
923
924A: There are three or four options for making Unicode fit into an 8-bit format.
925
926a) Use UTF-8. This preserves ASCII, but not Latin-1, because the characters >127 are different from Latin-1. UTF-8 uses the bytes in the ASCII only for ASCII characters. Therefore, it works well in any environment where ASCII characters have a significance as syntax characters, e.g. file name syntaxes, markup languages, etc., but where the all other characters may use arbitrary bytes.
927Example: “Latin Small Letter s with Acute” (015B) would be encoded as two bytes: C5 9B.
928
929b) Use Java or C style escapes, of the form \uXXXXX or \xXXXXX. This format is not standard for text files, but well defined in the framework of the languages in question, primarily for source files.
930Example: The Polish word “wyjście” with character “Latin Small Letter s with Acute” (015B) in the middle (ś is one character) would look like: “wyj\u015Bcie".
931
932c) Use the &#xXXXX; or &#DDDDD; numeric character escapes as in HTML or XML. Again, these are not standard for plain text files, but well defined within the framework of these markup languages.
933Example: “wyjście” would look like “wyj&#x015B;cie"
934
935d) Use SCSU. This format compresses Unicode into 8-bit format, preserving most of ASCII, but using some of the control codes as commands for the decoder. However, while ASCII text will look like ASCII text after being encoded in SCSU, other characters may occasionally be encoded with the same byte values, making SCSU unsuitable for 8-bit channels that blindly interpret any of the bytes as ASCII characters.
936Example: “<SC2> wyjÛcie” where <SC2> indicates the byte 0x12 and “Û” corresponds to byte 0xDB. [AF]
937
938
939A: The following table summarizes some of the properties of each of the UTFs.
940
941Name UTF-8 UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE
942Smallest code point 0000 0000 0000 0000 0000 0000 0000
943Largest code point 10FFFF 10FFFF 10FFFF 10FFFF 10FFFF 10FFFF 10FFFF
944Code unit size 8 bits 16 bits 16 bits 16 bits 32 bits 32 bits 32 bits
945Byte order N/A <BOM> big-endian little-endian <BOM> big-endian little-endian
946Fewest bytes per character 1 2 2 2 4 4 4
947Most bytes per character 4 4 4 4 4 4 4
948
949
950
951Q: What’s the algorithm to convert from UTF-16 to character codes?
952
953A: The Unicode Standard used to contain a short algorithm, now there is just a bit distribution table. Here are three short code snippets that translate the information from the bit distribution table into C code that will convert to and from UTF-16.
954
955Using the following type definitions
956
957typedef unsigned int16 UTF16;
958typedef unsigned int32 UTF32;
959the first snippet calculates the high (or leading) surrogate from a character code C.
960
961const UTF16 HI_SURROGATE_START, // 0xD800
962UTF16 X, // (UTF16) C;
963UTF32 U, // (C >> 16) & ((1 << 5) - 1);
964UTF16 W, // (UTF16) U - 1;
965UTF16 HiSurrogate, // HI_SURROGATE_START | (W << 6) | X >> 10;
966where X, U and W correspond to the labels used in Table 3-5 UTF-16 Bit Distribution. The next snippet does the same for the low surrogate.
967
968const UTF16 LO_SURROGATE_START, // 0xDC00
969UTF16 X, // (UTF16) C;
970UTF16 LoSurrogate, // (UTF16) (LO_SURROGATE_START | X & ((1 << 10) - 1));
971Finally, the reverse, where hi and lo are the high and low surrogate, and C the resulting character
972
973UTF32 X, // (hi & ((1 << 6) -1)) << 10 | lo & ((1 << 10) -1);
974UTF32 W, // (hi >> 6) & ((1 << 5) - 1);
975UTF32 U, // W + 1;
976UTF32 C, // U << 16 | X;
977A caller would need to ensure that C, hi, and lo are in the appropriate ranges. [AF]
978
979Q: Isn’t there a simpler way to do this?
980
981A: There is a much simpler computation that does not try to follow the bit distribution table.
982
983// constants
984const UTF32 LEAD_OFFSET, // 0xD800 - (0x10000 >> 10);
985const UTF32 SURROGATE_OFFSET, // 0x10000 - (0xD800 << 10) - 0xDC00;
986
987// computations
988UTF16 lead, // LEAD_OFFSET + (codepoint >> 10);
989UTF16 trail, // 0xDC00 + (codepoint & 0x3FF);
990
991UTF32 codepoint, // (lead << 10) + trail + SURROGATE_OFFSET;
992
993
994*/