File indexing completed on 2024-05-17 22:39:36
dc727fa7b… Alex*0001
fb270ddca… Alex*0002
0003
0004
0005
0006
0799c1a78… Alex*0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
360a3f914… Jona*0019
0799c1a78… Alex*0020
fb270ddca… Alex*0021
dc727fa7b… Alex*0022 use strict;
8cdb593f5… Alex*0023 use XML::LibXML;
3ec7c467c… Alex*0024 use Digest::SHA;
8cdb593f5… Alex*0025 use Encode;
3ec7c467c… Alex*0026 use Time::Local ;
dc727fa7b… Alex*0027
c54765958… Alex*0028 my $UNIVERSION = "15.1.0";
da387a9b5… Alex*0029 my $CLDRVERSION = "45";
50c5eb31c… Alex*0030 my $ISO639VERSION = "20230123";
6e9d21409… Alex*0031 my $TZVERSION = "2024a";
cfaa28933… Alex*0032
0033 my %data_files =
0034 (
3ccb5f238… Alex*0035 ucd => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip", name => "UCD-$UNIVERSION.zip",
c54765958… Alex*0036 sha => "cb1c663d053926500cd501229736045752713a066bd75802098598b7a7056177" },
3ccb5f238… Alex*0037 unihan => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/Unihan.zip", name => "Unihan-$UNIVERSION.zip",
c54765958… Alex*0038 sha => "a0226610e324bcf784ac380e11f4cbf533ee1e6b3d028b0991bf8c0dc3f85853" },
3ccb5f238… Alex*0039 idna => { url => "https://www.unicode.org/Public/idna/$UNIVERSION/IdnaMappingTable.txt", name => "IdnaMappingTable-$UNIVERSION.txt",
c54765958… Alex*0040 sha => "402cbd285f1f952fcd0834b63541d54f69d3d8f1b8f8599bf71a1a14935f82c4" },
3ccb5f238… Alex*0041 cldr => { url => "https://github.com/unicode-org/cldr/archive/refs/tags/release-$CLDRVERSION.zip",
da387a9b5… Alex*0042 sha => "29f4dafa918017e74cb84edf19fb707894cbce1cf7da1dd89fff24c107a66ecb" },
3ccb5f238… Alex*0043 cldr33 => { url => "https://www.unicode.org/Public/cldr/33/cldr-common-33.0.zip",
0044 sha => "fa3490082c086d21257153609642f54fcf788fcfda4966fe67f3f6daca0d58b9" },
0045 sorting => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows 10 Sorting Weight Table.txt",
0046 sha => "81fcfa1e5ed3e3a94d329959ff7d97d522ddf9d653d2c4d6ddcccc5cd4df663f" },
0047 codepages => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows Supported Code Page Data Files.zip",
0048 sha => "5074e6dd253056ba61fc6c870c9a955467855129c6ad3a51761c386b301b125a" },
0049 iso639 => { url => "https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_$ISO639VERSION.zip",
50c5eb31c… Alex*0050 sha => "884faa6cc5ac5181ed7969eed75355c1bc665447614cf4c06c62e87b38fe6a97" },
3ccb5f238… Alex*0051 ksx1001 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/KSX1001.TXT",
0052 sha => "d8d2a35206ac0ea2865f5d801c9d6717f735bf46f263a658a64a960abe59e371" },
0053 jis0208 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT",
0054 sha => "1c571870457f19c97720631fa83ee491549a96ba1436da1296786a67d8632e87" },
0055 jis0212 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT",
0056 sha => "477820bb3055bbcc90880d788cd95607d221dc94457bae249231adecf13c12e6" },
0057 tzdata => { url => "https://data.iana.org/time-zones/releases/tzdata$TZVERSION.tar.gz",
6e9d21409… Alex*0058 sha => "0d0434459acbd2059a7a8da1f3304a84a86591f6ed69c6248fffa502b6edffe3" },
cfaa28933… Alex*0059 );
0060
3ec7c467c… Alex*0061
fb270ddca… Alex*0062
dc727fa7b… Alex*0063 my $DEF_CHAR = ord '?';
fb270ddca… Alex*0064
148f564d1… Alex*0065
0066 my $MAX_CHAR = 0x10ffff;
0067
630f605c2… Alex*0068 my $nlskey = "-SYSTEM\\-CurrentControlSet\\-Control\\-Nls";
3ec7c467c… Alex*0069 my $zonekey = "-Software\\-Microsoft\\-Windows NT\\-CurrentVersion\\Time Zones";
630f605c2… Alex*0070
dc727fa7b… Alex*0071 my @allfiles =
fb270ddca… Alex*0072 (
5b4bdb9fd… Alex*0073 "CodpageFiles/037.txt",
0074 "CodpageFiles/437.txt",
0075 "CodpageFiles/500.txt",
1ca4536f7… Niko*0076 "CodpageFiles/708.txt",
a1bae9f28… Fran*0077 "CodpageFiles/720.txt",
5b4bdb9fd… Alex*0078 "CodpageFiles/737.txt",
0079 "CodpageFiles/775.txt",
0080 "CodpageFiles/850.txt",
0081 "CodpageFiles/852.txt",
0082 "CodpageFiles/855.txt",
0083 "CodpageFiles/857.txt",
0084 "CodpageFiles/860.txt",
0085 "CodpageFiles/861.txt",
0086 "CodpageFiles/862.txt",
0087 "CodpageFiles/863.txt",
0088 "CodpageFiles/864.txt",
0089 "CodpageFiles/865.txt",
0090 "CodpageFiles/866.txt",
0091 "CodpageFiles/869.txt",
0092 "CodpageFiles/874.txt",
0093 "CodpageFiles/875.txt",
0094 "CodpageFiles/932.txt",
0095 "CodpageFiles/936.txt",
0096 "CodpageFiles/949.txt",
0097 "CodpageFiles/950.txt",
0098 "CodpageFiles/1026.txt",
0099 "CodpageFiles/1250.txt",
0100 "CodpageFiles/1251.txt",
0101 "CodpageFiles/1252.txt",
0102 "CodpageFiles/1253.txt",
0103 "CodpageFiles/1254.txt",
0104 "CodpageFiles/1255.txt",
0105 "CodpageFiles/1256.txt",
0106 "CodpageFiles/1257.txt",
0107 "CodpageFiles/1258.txt",
0108 "CodpageFiles/1361.txt",
0109 "CodpageFiles/10000.txt",
0110 "CodpageFiles/10001.txt",
0111 "CodpageFiles/10002.txt",
0112 "CodpageFiles/10003.txt",
0113 "CodpageFiles/10004.txt",
0114 "CodpageFiles/10005.txt",
0115 "CodpageFiles/10006.txt",
0116 "CodpageFiles/10007.txt",
0117 "CodpageFiles/10008.txt",
0118 "CodpageFiles/10010.txt",
0119 "CodpageFiles/10017.txt",
0120 "CodpageFiles/10021.txt",
0121 "CodpageFiles/10029.txt",
0122 "CodpageFiles/10079.txt",
0123 "CodpageFiles/10081.txt",
0124 "CodpageFiles/10082.txt",
0125 "CodpageFiles/20127.txt",
0126 "CodpageFiles/20866.txt",
0127 "CodpageFiles/21866.txt",
0128 "CodpageFiles/28591.txt",
0129 "CodpageFiles/28592.txt",
0130 "CodpageFiles/28593.txt",
0131 "CodpageFiles/28594.txt",
0132 "CodpageFiles/28595.txt",
0133 "CodpageFiles/28596.txt",
0134 "CodpageFiles/28597.txt",
0135 "CodpageFiles/28598.txt",
0136 "CodpageFiles/28599.txt",
0137 "CodpageFiles/28603.txt",
0138 "CodpageFiles/28605.txt",
fb270ddca… Alex*0139 );
0140
3ec7c467c… Alex*0141 my @timezone_files = ;
c97bb4c5d… Alex*0142
dc727fa7b… Alex*0143 my %ctype =
c97bb4c5d… Alex*0144 (
d87d4a4a0… Alex*0145
c97bb4c5d… Alex*0146 "upper" => 0x0001,
0147 "lower" => 0x0002,
0148 "digit" => 0x0004,
0149 "space" => 0x0008,
0150 "punct" => 0x0010,
0151 "cntrl" => 0x0020,
0152 "blank" => 0x0040,
0153 "xdigit" => 0x0080,
d87d4a4a0… Alex*0154 "alpha" => 0x0100 | 0x80000000,
0155 "defin" => 0x0200,
0156
0157 "nonspacing" => 0x00010000,
0158 "diacritic" => 0x00020000,
0159 "vowelmark" => 0x00040000,
0160 "symbol" => 0x00080000,
0161 "katakana" => 0x00100000,
0162 "hiragana" => 0x00200000,
0163 "halfwidth" => 0x00400000,
0164 "fullwidth" => 0x00800000,
0165 "ideograph" => 0x01000000,
0166 "kashida" => 0x02000000,
0167 "lexical" => 0x04000000,
0168 "highsurrogate" => 0x08000000,
0169 "lowsurrogate" => 0x10000000,
c97bb4c5d… Alex*0170 );
0171
2a12c6a7d… Aric*0172 my %bracket_types =
0173 (
0174 "o" => 0x0000,
0175 "c" => 0x0001,
0176 );
0177
7339c04b7… Aric*0178 my %indic_types =
0179 (
0180 "Other" => 0x0000,
0181 "Bindu" => 0x0001,
0182 "Visarga" => 0x0002,
0183 "Avagraha" => 0x0003,
0184 "Nukta" => 0x0004,
0185 "Virama" => 0x0005,
0186 "Vowel_Independent" => 0x0006,
0187 "Vowel_Dependent" => 0x0007,
0188 "Vowel" => 0x0008,
0189 "Consonant_Placeholder" => 0x0009,
0190 "Consonant" => 0x000a,
0191 "Consonant_Dead" => 0x000b,
96ff3daaa… Alex*0192 "Consonant_Succeeding_Repha" => 0x000c,
7339c04b7… Aric*0193 "Consonant_Subjoined" => 0x000d,
0194 "Consonant_Medial" => 0x000e,
0195 "Consonant_Final" => 0x000f,
0196 "Consonant_Head_Letter" => 0x0010,
0197 "Modifying_Letter" => 0x0011,
0198 "Tone_Letter" => 0x0012,
0199 "Tone_Mark" => 0x0013,
96ff3daaa… Alex*0200 "Register_Shifter" => 0x0014,
0201 "Consonant_Preceding_Repha" => 0x0015,
0202 "Pure_Killer" => 0x0016,
0203 "Invisible_Stacker" => 0x0017,
0204 "Gemination_Mark" => 0x0018,
0205 "Cantillation_Mark" => 0x0019,
0206 "Non_Joiner" => 0x001a,
0207 "Joiner" => 0x001b,
0208 "Number_Joiner" => 0x001c,
0209 "Number" => 0x001d,
e8c1cf440… Niko*0210 "Brahmi_Joining_Number" => 0x001e,
0211 "Consonant_With_Stacker" => 0x001f,
0212 "Consonant_Prefixed" => 0x0020,
0213 "Syllable_Modifier" => 0x0021,
d4eeb162b… Niko*0214 "Consonant_Killer" => 0x0022,
0215 "Consonant_Initial_Postfixed" => 0x0023,
7339c04b7… Aric*0216 );
0217
0218 my %matra_types =
0219 (
0220 "Right" => 0x01,
0221 "Left" => 0x02,
0222 "Visual_Order_Left" => 0x03,
0223 "Left_And_Right" => 0x04,
0224 "Top" => 0x05,
0225 "Bottom" => 0x06,
0226 "Top_And_Bottom" => 0x07,
0227 "Top_And_Right" => 0x08,
0228 "Top_And_Left" => 0x09,
0229 "Top_And_Left_And_Right" => 0x0a,
0230 "Bottom_And_Right" => 0x0b,
0231 "Top_And_Bottom_And_Right" => 0x0c,
0232 "Overstruck" => 0x0d,
b736dbe93… Jens*0233 "Invisible" => 0x0e,
b83af7c76… Alex*0234 "Bottom_And_Left" => 0x0f,
0235 "Top_And_Bottom_And_Left" => 0x10,
7339c04b7… Aric*0236 );
0237
95166855b… Aric*0238 my %break_types =
0239 (
0240 "BK" => 0x0001,
0241 "CR" => 0x0002,
0242 "LF" => 0x0003,
0243 "CM" => 0x0004,
0244 "SG" => 0x0005,
0245 "GL" => 0x0006,
0246 "CB" => 0x0007,
0247 "SP" => 0x0008,
0248 "ZW" => 0x0009,
0249 "NL" => 0x000a,
0250 "WJ" => 0x000b,
0251 "JL" => 0x000c,
0252 "JV" => 0x000d,
0253 "JT" => 0x000e,
0254 "H2" => 0x000f,
0255 "H3" => 0x0010,
0256 "XX" => 0x0011,
0257 "OP" => 0x0012,
0258 "CL" => 0x0013,
0259 "CP" => 0x0014,
0260 "QU" => 0x0015,
0261 "NS" => 0x0016,
0262 "EX" => 0x0017,
0263 "SY" => 0x0018,
0264 "IS" => 0x0019,
0265 "PR" => 0x001a,
0266 "PO" => 0x001b,
0267 "NU" => 0x001c,
0268 "AL" => 0x001d,
0269 "ID" => 0x001e,
0270 "IN" => 0x001f,
0271 "HY" => 0x0020,
0272 "BB" => 0x0021,
0273 "BA" => 0x0022,
0274 "SA" => 0x0023,
0275 "AI" => 0x0024,
1f44de3f6… Alex*0276 "B2" => 0x0025,
0277 "HL" => 0x0026,
0278 "CJ" => 0x0027,
0279 "RI" => 0x0028,
58e0972c5… Niko*0280 "EB" => 0x0029,
0281 "EM" => 0x002a,
0282 "ZWJ" => 0x002b,
c54765958… Alex*0283 "AK" => 0x002c,
0284 "AP" => 0x002d,
0285 "AS" => 0x002e,
0286 "VF" => 0x002f,
0287 "VI" => 0x0030,
95166855b… Aric*0288 );
0289
742cde4fa… Aric*0290 my %vertical_types =
0291 (
0292 "R" => 0x0000,
0293 "U" => 0x0001,
0294 "Tr" => 0x0002,
0295 "Tu" => 0x0003,
0296 );
0297
dc727fa7b… Alex*0298 my %categories =
c97bb4c5d… Alex*0299 (
f2b8873b1… Aric*0300 "Lu" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"},
0301 "Ll" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"lower"},
0302 "Lt" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}|$ctype{"lower"},
d87d4a4a0… Alex*0303 "Mn" => $ctype{"defin"}|$ctype{"nonspacing"},
f2b8873b1… Aric*0304 "Mc" => $ctype{"defin"},
0305 "Me" => $ctype{"defin"},
0306 "Nd" => $ctype{"defin"}|$ctype{"digit"},
0307 "Nl" => $ctype{"defin"}|$ctype{"alpha"},
0308 "No" => $ctype{"defin"},
0309 "Zs" => $ctype{"defin"}|$ctype{"space"},
0310 "Zl" => $ctype{"defin"}|$ctype{"space"},
0311 "Zp" => $ctype{"defin"}|$ctype{"space"},
0312 "Cc" => $ctype{"defin"}|$ctype{"cntrl"},
0313 "Cf" => $ctype{"defin"}|$ctype{"cntrl"},
0314 "Cs" => $ctype{"defin"},
0315 "Co" => $ctype{"defin"},
0316 "Cn" => $ctype{"defin"},
0317 "Lm" => $ctype{"defin"}|$ctype{"alpha"},
0318 "Lo" => $ctype{"defin"}|$ctype{"alpha"},
0319 "Pc" => $ctype{"defin"}|$ctype{"punct"},
0320 "Pd" => $ctype{"defin"}|$ctype{"punct"},
0321 "Ps" => $ctype{"defin"}|$ctype{"punct"},
0322 "Pe" => $ctype{"defin"}|$ctype{"punct"},
0323 "Pi" => $ctype{"defin"}|$ctype{"punct"},
0324 "Pf" => $ctype{"defin"}|$ctype{"punct"},
0325 "Po" => $ctype{"defin"}|$ctype{"punct"},
d87d4a4a0… Alex*0326 "Sm" => $ctype{"defin"}|$ctype{"symbol"},
0327 "Sc" => $ctype{"defin"}|$ctype{"symbol"},
0328 "Sk" => $ctype{"defin"}|$ctype{"symbol"},
0329 "So" => $ctype{"defin"}|$ctype{"symbol"}
c97bb4c5d… Alex*0330 );
0331
0332
dc727fa7b… Alex*0333 my %special_categories =
c97bb4c5d… Alex*0334 (
0335 "xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
0336 0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
04aa6a88b… Robe*0337 "space" => [ 0x09..0x0d, 0x85 ],
0338 "blank" => [ 0x09, 0x20, 0xa0, 0x3000, 0xfeff ],
84561d76e… Aric*0339 "cntrl" => [ 0x070f, 0x200c, 0x200d,
04aa6a88b… Robe*0340 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e,
0341 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
459fb56af… Aric*0342 0xfff9, 0xfffa, 0xfffb ],
0343 "punct" => [ 0x24, 0x2b, 0x3c..0x3e, 0x5e, 0x60, 0x7c, 0x7e, 0xa2..0xbe,
46a5b4671… Aric*0344 0xd7, 0xf7 ],
0345 "digit" => [ 0xb2, 0xb3, 0xb9 ],
d87d4a4a0… Alex*0346 "lower" => [ 0xaa, 0xba, 0x2071, 0x207f ],
0347 "nonspacing" => [ 0xc0..0xc5, 0xc7..0xcf, 0xd1..0xd6, 0xd8..0xdd, 0xe0..0xe5, 0xe7..0xef,
0348 0xf1..0xf6, 0xf8..0xfd, 0xff, 0x6de, 0x1929..0x192b, 0x302e..0x302f ],
0349 "diacritic" => [ 0x5e, 0x60, 0xb7, 0xd8, 0xf8 ],
0350 "symbol" => [ 0x09..0x0d, 0x20..0x23, 0x25, 0x26, 0x28..0x2a, 0x2c, 0x2e..0x2f, 0x3a..0x40,
0351 0x5b..0x60, 0x7b..0x7e, 0xa0..0xa9, 0xab..0xb1, 0xb4..0xb8, 0xbb, 0xbf,
0352 0x02b9..0x02ba, 0x02c6..0x02cf ],
0353 "halfwidth" => [ 0x20..0x7e, 0xa2..0xa3, 0xa5..0xa6, 0xac, 0xaf, 0x20a9 ],
0354 "fullwidth" => [ 0x2018..0x2019, 0x201c..0x201d, 0x3000..0x3002, 0x300c..0x300d, 0x309b..0x309c,
0355 0x30a1..0x30ab, 0x30ad, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9,
0356 0x30bb, 0x30bd, 0x30bf, 0x30c1, 0x30c3, 0x30c4, 0x30c6, 0x30c8, 0x30ca..0x30cf,
0357 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de..0x30ed, 0x30ef, 0x30f2..0x30f3, 0x30fb,
0358 0x3131..0x3164 ],
0359 "ideograph" => [ 0x3006..0x3007 ],
0360 "lexical" => [ 0x22, 0x24, 0x27, 0x2d, 0x2f, 0x3d, 0x40, 0x5c, 0x5e..0x60, 0x7e,
0361 0xa8, 0xaa, 0xad, 0xaf, 0xb4, 0xb8, 0xba,
0362 0x02b0..0x02b8, 0x02bc, 0x02c7, 0x02ca..0x02cb, 0x02cf, 0x02d8..0x02dd, 0x02e0..0x02e3,
0363 0x037a, 0x0384..0x0385, 0x0387, 0x0559..0x055a, 0x0640, 0x1fbd..0x1fc1,
0364 0x1fcd..0x1fcf, 0x1fdd..0x1fdf, 0x1fed..0x1fef, 0x1ffd..0x1ffe, 0x2010..0x2015,
0365 0x2032..0x2034, 0x2038, 0x2043..0x2044, 0x207b..0x207c, 0x207f, 0x208b..0x208c,
0366 0x2212, 0x2215..0x2216, 0x2500, 0x2504..0x2505, 0x2508..0x2509, 0x254c..0x254d,
0367 0x3003, 0x301c, 0x3030..0x3035, 0x309b..0x309e, 0x30fd..0x30fe, 0xfe31..0xfe32,
0368 0xfe58, 0xfe63, 0xfe66, 0xfe68..0xfe69, 0xfe6b, 0xff04, 0xff07, 0xff0d, 0xff0f,
0369 0xff1d, 0xff20, 0xff3c, 0xff3e, 0xff40, 0xff5e ],
0370 "kashida" => [ 0x0640 ],
c97bb4c5d… Alex*0371 );
0372
dc727fa7b… Alex*0373 my %directions =
c97bb4c5d… Alex*0374 (
0375 "L" => 1,
0376 "R" => 2,
dc0e712d1… Alex*0377 "AL" => 12,
c97bb4c5d… Alex*0378 "EN" => 3,
0379 "ES" => 4,
0380 "ET" => 5,
0381 "AN" => 6,
0382 "CS" => 7,
dc0e712d1… Alex*0383 "NSM" => 13,
0384 "BN" => 14,
c97bb4c5d… Alex*0385 "B" => 8,
0386 "S" => 9,
0387 "WS" => 10,
b5d9fc323… Aric*0388 "ON" => 11,
0389 "LRE" => 15,
0390 "LRO" => 15,
0391 "RLE" => 15,
0392 "RLO" => 15,
0393 "PDF" => 15,
0394 "LRI" => 15,
0395 "RLI" => 15,
0396 "FSI" => 15,
0397 "PDI" => 15
c97bb4c5d… Alex*0398 );
0399
02a04ba05… Alex*0400 my %c2_types =
0401 (
0402 "L" => 1,
0403 "R" => 2,
0404 "AL" => 2,
0405 "EN" => 3,
0406 "ES" => 4,
0407 "ET" => 5,
0408 "AN" => 6,
0409 "CS" => 7,
95aeb41c8… Alex*0410 "NSM" => 11,
02a04ba05… Alex*0411 "BN" => 0,
0412 "B" => 8,
0413 "S" => 9,
0414 "WS" => 10,
0415 "ON" => 11,
0416 "LRE" => 11,
0417 "LRO" => 11,
0418 "RLE" => 11,
0419 "RLO" => 11,
0420 "PDF" => 11,
0421 "LRI" => 11,
0422 "RLI" => 11,
0423 "FSI" => 11,
0424 "PDI" => 11
0425 );
0426
d68f8d7b3… Alex*0427 my %bidi_types =
0428 (
0429 "ON" => 0,
0430 "L" => 1,
0431 "R" => 2,
0432 "AN" => 3,
0433 "EN" => 4,
0434 "AL" => 5,
0435 "NSM" => 6,
0436 "CS" => 7,
0437 "ES" => 8,
0438 "ET" => 9,
0439 "BN" => 10,
0440 "S" => 11,
0441 "WS" => 12,
0442 "B" => 13,
0443 "RLO" => 14,
0444 "RLE" => 15,
0445 "LRO" => 16,
0446 "LRE" => 17,
0447 "PDF" => 18,
0448 "LRI" => 19,
0449 "RLI" => 20,
0450 "FSI" => 21,
0451 "PDI" => 22
0452 );
0453
a5fe68870… Alex*0454 my %joining_types =
0455 (
f734d98a9… Niko*0456 "U" => 0,
0457 "L" => 1,
0458 "R" => 2,
0459 "D" => 3,
0460 "C" => 3,
0461 "ALAPH" => 4,
0462 "DALATH RISH" => 5,
0463 "T" => 6,
a5fe68870… Alex*0464 );
0465
8cdb593f5… Alex*0466 my @locales =
0467 (
0468 { name => "", lcid => 0x0000007f, file => "root", territory => "IV", sabbrevlangname => "IVL", sopentypelang =>"dflt" },
50c5eb31c… Alex*0469 { name => "aa", sopentypelang => "AFR" },
0470 { name => "aa-DJ" },
0471 { name => "aa-ER" },
0472 { name => "aa-ET" },
8cdb593f5… Alex*0473 { name => "af", lcid => 0x00000036, oemcp => 850, sabbrevlangname => "AFK", sopentypelang => "AFK" },
0474 { name => "af-NA" },
0475 { name => "af-ZA", lcid => 0x00000436 },
0476 { name => "agq" },
0477 { name => "agq-CM" },
0478 { name => "ak", sopentypelang => "TWI" },
0479 { name => "ak-GH" },
0480 { name => "am", lcid => 0x0000005e, sabbrevlangname => "AMH" },
0481 { name => "am-ET", lcid => 0x0000045e },
0482 { name => "ar", lcid => 0x00000001, territory => "SA", oemcp => 720, group => 13 },
0483 { name => "ar-001" },
0484 { name => "ar-AE", lcid => 0x00003801, sabbrevlangname => "ARU" },
0485 { name => "ar-BH", lcid => 0x00003c01, sabbrevlangname => "ARH" },
0486 { name => "ar-DJ" },
4172c0482… Alex*0487 { name => "ar-DZ", lcid => 0x00001401, sabbrevlangname => "ARG", nativedigits => "0123456789" },
8cdb593f5… Alex*0488 { name => "ar-EG", lcid => 0x00000c01, sabbrevlangname => "ARE" },
0489 { name => "ar-EH" },
0490 { name => "ar-ER" },
0491 { name => "ar-IL" },
0492 { name => "ar-IQ", lcid => 0x00000801, sabbrevlangname => "ARI" },
0493 { name => "ar-JO", lcid => 0x00002c01, sabbrevlangname => "ARJ" },
0494 { name => "ar-KM" },
0495 { name => "ar-KW", lcid => 0x00003401, sabbrevlangname => "ARK" },
0496 { name => "ar-LB", lcid => 0x00003001, sabbrevlangname => "ARB" },
4172c0482… Alex*0497 { name => "ar-LY", lcid => 0x00001001, sabbrevlangname => "ARL", nativedigits => "0123456789" },
0498 { name => "ar-MA", lcid => 0x00001801, sabbrevlangname => "ARM", nativedigits => "0123456789" },
8cdb593f5… Alex*0499 { name => "ar-MR" },
0500 { name => "ar-OM", lcid => 0x00002001, sabbrevlangname => "ARO" },
0501 { name => "ar-PS" },
0502 { name => "ar-QA", lcid => 0x00004001, sabbrevlangname => "ARQ" },
0503 { name => "ar-SA", lcid => 0x00000401, sabbrevlangname => "ARA" },
0504 { name => "ar-SD" },
0505 { name => "ar-SO" },
0506 { name => "ar-SS" },
0507 { name => "ar-SY", lcid => 0x00002801, sabbrevlangname => "ARS" },
0508 { name => "ar-TD" },
4172c0482… Alex*0509 { name => "ar-TN", lcid => 0x00001c01, sabbrevlangname => "ART", nativedigits => "0123456789" },
8cdb593f5… Alex*0510 { name => "ar-YE", lcid => 0x00002401, sabbrevlangname => "ARY" },
50c5eb31c… Alex*0511 { name => "arn", lcid => 0x0000007a, oemcp => 850, ebcdiccp => 20284, slist => ",", sabbrevlangname => "MPD", sopentypelang => "MAP" },
0512 { name => "arn-CL", lcid => 0x0000047a },
8cdb593f5… Alex*0513 { name => "arn-Latn", alias => "arn" },
0514 { name => "arn-Latn-CL", alias => "arn-CL" },
0515 { name => "as", lcid => 0x0000004d, slist => ",", group => 15 },
0516 { name => "as-IN", lcid => 0x0000044d },
0517 { name => "asa" },
0518 { name => "asa-TZ" },
0519 { name => "ast" },
0520 { name => "ast-ES" },
0521 { name => "az", lcid => 0x0000002c, oemcp => 857, ebcdiccp => 20905, group => 2 },
0522 { name => "az-Cyrl", lcid => 0x0000742c, oemcp => 866, ebcdiccp => 20880, group => 5, sabbrevlangname => "AZC" },
0523 { name => "az-Cyrl-AZ", lcid => 0x0000082c },
0524 { name => "az-Latn", lcid => 0x0000782c },
0525 { name => "az-Latn-AZ", lcid => 0x0000042c },
50c5eb31c… Alex*0526 { name => "ba", lcid => 0x0000006d, oemcp => 866, group => 5, sabbrevlangname => "BAS", sopentypelang => "BSH" },
8cdb593f5… Alex*0527 { name => "ba-Cyrl", alias => "ba" },
0528 { name => "ba-Cyrl-RU", alias => "ba-RU" },
50c5eb31c… Alex*0529 { name => "ba-RU", lcid => 0x0000046d },
8cdb593f5… Alex*0530 { name => "bas" },
0531 { name => "bas-CM" },
0532 { name => "be", lcid => 0x00000023, oemcp => 866, ebcdiccp => 500, group => 5 },
0533 { name => "be-BY", lcid => 0x00000423 },
0534 { name => "bem" },
0535 { name => "bem-ZM" },
6d046dd98… Alex*0536 { name => "bew" },
0537 { name => "bew-ID" },
8cdb593f5… Alex*0538 { name => "bez" },
0539 { name => "bez-TZ" },
0540 { name => "bg", lcid => 0x00000002, oemcp => 866, ebcdiccp => 21025, group => 5, sabbrevlangname => "BGR", sopentypelang => "BGR" },
0541 { name => "bg-BG", lcid => 0x00000402 },
0542 { name => "bin", lcid => 0x00000066, oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "EDO" },
0543 { name => "bin-NG", lcid => 0x00000466, file => "bin", dir => "exemplars" },
6d046dd98… Alex*0544 { name => "blo" },
0545 { name => "blo-BJ" },
8cdb593f5… Alex*0546 { name => "bm", sopentypelang => "BMB" },
0547 { name => "bm-Latn", file => "bm" },
0548 { name => "bm-Latn-ML", file => "bm_ML" },
0549 { name => "bm-ML", alias => "bm-Latn-ML" },
0550 { name => "bn", lcid => 0x00000045, slist => ",", group => 15, sabbrevlangname => "BNB" },
0551 { name => "bn-BD", lcid => 0x00000845 },
0552 { name => "bn-IN", lcid => 0x00000445, sabbrevlangname => "BNG" },
0553 { name => "bo", lcid => 0x00000051, slist => ",", group => 15, sabbrevlangname => "BOB", sopentypelang => "TIB" },
0554 { name => "bo-CN", lcid => 0x00000451 },
0555 { name => "bo-IN", slist => "," },
0556 { name => "bo-Tibt", alias => "bo" },
0557 { name => "bo-Tibt-CN", alias => "bo-CN" },
0558 { name => "bo-Tibt-IN", alias => "bo-IN" },
0559 { name => "br", lcid => 0x0000007e, oemcp => 850, ebcdiccp => 20297 },
0560 { name => "br-FR", lcid => 0x0000047e },
0561 { name => "br-Latn", alias => "br" },
0562 { name => "br-Latn-FR", alias => "br-FR" },
0563 { name => "brx" },
0564 { name => "brx-IN" },
0565 { name => "bs", lcid => 0x0000781a, oemcp => 852, maccp => 10082, ebcdiccp => 870, group => 2, sabbrevlangname => "BSB" },
0566 { name => "bs-Cyrl", lcid => 0x0000641a, oemcp => 855, group => 5, sabbrevlangname => "BSC" },
0567 { name => "bs-Cyrl-BA", lcid => 0x0000201a },
0568 { name => "bs-Latn", lcid => 0x0000681a },
0569 { name => "bs-Latn-BA", lcid => 0x0000141a },
50c5eb31c… Alex*0570 { name => "byn", sopentypelang => "BIL" },
0571 { name => "byn-ER" },
8cdb593f5… Alex*0572 { name => "ca", lcid => 0x00000003, oemcp => 850 },
0573 { name => "ca-AD", maccp => 65001 },
0574 { name => "ca-ES", lcid => 0x00000403 },
0575 { name => "ca-ES-valencia", lcid => 0x00000803, file => "ca_ES_VALENCIA", sabbrevlangname => "VAL" },
0576 { name => "ca-FR", maccp => 65001 },
0577 { name => "ca-IT", maccp => 65001 },
0578 { name => "ccp" },
0579 { name => "ccp-BD", alias => "ccp-Cakm-BD" },
0580 { name => "ccp-Cakm", file => "ccp" },
0581 { name => "ccp-Cakm-BD", file => "ccp_BD" },
0582 { name => "ccp-Cakm-IN", file => "ccp_IN" },
0583 { name => "ccp-IN", alias => "ccp-Cakm-IN" },
0584 { name => "ce" },
0585 { name => "ce-RU" },
0586 { name => "ceb" },
0587 { name => "ceb-Latn", file => "ceb" },
0588 { name => "ceb-Latn-PH", file => "ceb_PH" },
0589 { name => "ceb-PH", alias => "ceb-Latn-PH" },
0590 { name => "cgg" },
0591 { name => "cgg-UG" },
0592 { name => "chr", lcid => 0x0000005c, slist => ",", sabbrevlangname => "CRE" },
0593 { name => "chr-Cher", lcid => 0x00007c5c, file => "chr" },
0594 { name => "chr-Cher-US", lcid => 0x0000045c, file => "chr_US" },
0595 { name => "chr-US", alias => "chr-Cher-US" },
0596 { name => "ckb", alias => "ku" },
0597 { name => "ckb-IQ", alias => "ku-Arab-IQ" },
0598 { name => "ckb-IR", alias => "ku-Arab-IR" },
50c5eb31c… Alex*0599 { name => "co", lcid => 0x00000083, oemcp => 850, ebcdiccp => 20297 },
0600 { name => "co-FR", lcid => 0x00000483 },
8cdb593f5… Alex*0601 { name => "co-Latn", alias => "co" },
0602 { name => "co-Latn-FR", alias => "co-FR" },
0603 { name => "cs", lcid => 0x00000005, oemcp => 852, group => 2, sabbrevlangname => "CSY", sopentypelang => "CSY" },
0604 { name => "cs-CZ", lcid => 0x00000405 },
6d046dd98… Alex*0605 { name => "csw" },
0606 { name => "csw-CA" },
50c5eb31c… Alex*0607 { name => "cu", sopentypelang => "CSL" },
0608 { name => "cu-RU" },
8cdb593f5… Alex*0609 { name => "cy", lcid => 0x00000052, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "CYM", sopentypelang => "WEL" },
0610 { name => "cy-GB", lcid => 0x00000452 },
0611 { name => "da", lcid => 0x00000006, oemcp => 850, ebcdiccp => 20277 },
0612 { name => "da-DK", lcid => 0x00000406 },
0613 { name => "da-GL", maccp => 65001 },
0614 { name => "dav" },
0615 { name => "dav-KE" },
0616 { name => "de", lcid => 0x00000007, oemcp => 850, ebcdiccp => 20273 },
0617 { name => "de-AT", lcid => 0x00000c07, sabbrevlangname => "DEA" },
0618 { name => "de-BE" },
0619 { name => "de-CH", lcid => 0x00000807, sabbrevlangname => "DES" },
0620 { name => "de-DE", lcid => 0x00000407 },
0621 { name => "de-DE_phoneb", lcid => 0x00010407, alias => "de-DE" },
0622 { name => "de-DE-u-co-phonebk", alias => "de-DE_phoneb" },
0623 { name => "de-IT", oemcp => 65001 },
0624 { name => "de-LI", lcid => 0x00001407, sabbrevlangname => "DEC" },
0625 { name => "de-LU", lcid => 0x00001007, sabbrevlangname => "DEL" },
0626 { name => "dje", sopentypelang => "DJR" },
0627 { name => "dje-NE" },
b591b3167… Alex*0628 { name => "doi", sopentypelang => "DGR" },
0629 { name => "doi-IN", alias => "doi-Deva-IN" },
0630 { name => "doi-Deva", file => "doi" },
0631 { name => "doi-Deva-IN", file => "doi_IN" },
8cdb593f5… Alex*0632 { name => "dsb", lcid => 0x00007c2e, sparent => "hsb", oemcp => 850, ebcdiccp => 870, sabbrevlangname => "DSB", sopentypelang => "LSB" },
0633 { name => "dsb-DE", lcid => 0x0000082e },
0634 { name => "dua" },
0635 { name => "dua-CM" },
50c5eb31c… Alex*0636 { name => "dv", lcid => 0x00000065, slist => "\x{060c}", group => 13, nativedigits => "0123456789" },
0637 { name => "dv-MV", lcid => 0x00000465 },
8cdb593f5… Alex*0638 { name => "dyo" },
0639 { name => "dyo-SN" },
0640 { name => "dz", sopentypelang => "DZN" },
0641 { name => "dz-BT", lcid => 0x00000c51, sabbrevlangname => "ZZZ" },
0642 { name => "ebu" },
0643 { name => "ebu-KE" },
0644 { name => "ee" },
0645 { name => "ee-GH" },
0646 { name => "ee-TG" },
0647 { name => "el", lcid => 0x00000008, oemcp => 737, group => 4 },
0648 { name => "el-CY" },
0649 { name => "el-GR", lcid => 0x00000408 },
0650 { name => "en", lcid => 0x00000009, oemcp => 437, slist => ",", sabbrevlangname => "ENU" },
0651 { name => "en-001", oemcp => 850 },
ad02ef7be… Alex*0652 { name => "en-029", lcid => 0x00002409, file => "en", oemcp => 850, sabbrevlangname => "ENB" },
8cdb593f5… Alex*0653 { name => "en-150", oemcp => 65001 },
0654 { name => "en-AE", lcid => 0x00004c09, oemcp => 65001, sabbrevlangname => "ZZZ" },
0655 { name => "en-AG", oemcp => 850 },
0656 { name => "en-AI", oemcp => 850 },
0657 { name => "en-AS", oemcp => 850 },
0658 { name => "en-AT", oemcp => 65001 },
0659 { name => "en-AU", lcid => 0x00000c09, oemcp => 850, sabbrevlangname => "ENA" },
0660 { name => "en-BB", oemcp => 850 },
0661 { name => "en-BE", oemcp => 850 },
0662 { name => "en-BI", oemcp => 65001 },
0663 { name => "en-BM", oemcp => 850 },
0664 { name => "en-BS", oemcp => 850 },
0665 { name => "en-BW", oemcp => 850 },
0666 { name => "en-BZ", lcid => 0x00002809, oemcp => 850, sabbrevlangname => "ENL" },
0667 { name => "en-CA", lcid => 0x00001009, oemcp => 850, ebcdiccp => 37, sabbrevlangname => "ENC" },
0668 { name => "en-CC", oemcp => 850 },
0669 { name => "en-CH", oemcp => 65001 },
0670 { name => "en-CK", oemcp => 850 },
0671 { name => "en-CM", oemcp => 850 },
0672 { name => "en-CX", oemcp => 850 },
0673 { name => "en-CY", oemcp => 65001 },
0674 { name => "en-DE", oemcp => 65001 },
0675 { name => "en-DG", oemcp => 850 },
0676 { name => "en-DK", oemcp => 65001 },
0677 { name => "en-DM", oemcp => 850 },
0678 { name => "en-ER", oemcp => 850 },
0679 { name => "en-FI", oemcp => 65001 },
0680 { name => "en-FJ", oemcp => 850 },
0681 { name => "en-FK", oemcp => 850 },
0682 { name => "en-FM", oemcp => 850 },
0683 { name => "en-GB", lcid => 0x00000809, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "ENG" },
0684 { name => "en-GD", oemcp => 850 },
0685 { name => "en-GG", oemcp => 850 },
0686 { name => "en-GH", oemcp => 850 },
0687 { name => "en-GI", oemcp => 850 },
0688 { name => "en-GM", oemcp => 850 },
0689 { name => "en-GU", oemcp => 850 },
0690 { name => "en-GY", oemcp => 850 },
0691 { name => "en-HK", lcid => 0x00003c09, oemcp => 850, sabbrevlangname => "ENH" },
6d046dd98… Alex*0692 { name => "en-ID", lcid => 0x00003809, oemcp => 850, sabbrevlangname => "ZZZ" },
8cdb593f5… Alex*0693 { name => "en-IE", lcid => 0x00001809, oemcp => 850, sabbrevlangname => "ENI" },
0694 { name => "en-IL", oemcp => 65001 },
0695 { name => "en-IM", oemcp => 850 },
0696 { name => "en-IN", lcid => 0x00004009, sabbrevlangname => "ENN" },
0697 { name => "en-IO", oemcp => 850 },
0698 { name => "en-JE", oemcp => 850 },
0699 { name => "en-JM", lcid => 0x00002009, oemcp => 850, sabbrevlangname => "ENJ" },
0700 { name => "en-KE", oemcp => 850 },
0701 { name => "en-KI", oemcp => 850 },
0702 { name => "en-KN", oemcp => 850 },
0703 { name => "en-KY", oemcp => 850 },
0704 { name => "en-LC", oemcp => 850 },
0705 { name => "en-LR", oemcp => 850 },
0706 { name => "en-LS", oemcp => 850 },
0707 { name => "en-MG", oemcp => 850 },
0708 { name => "en-MH", oemcp => 850 },
0709 { name => "en-MO", oemcp => 850 },
0710 { name => "en-MP", oemcp => 850 },
0711 { name => "en-MS", oemcp => 850 },
0712 { name => "en-MT", oemcp => 850 },
0713 { name => "en-MU", oemcp => 850 },
0714 { name => "en-MW", oemcp => 850 },
0715 { name => "en-MY", lcid => 0x00004409, sabbrevlangname => "ENM" },
0716 { name => "en-NA", oemcp => 850 },
0717 { name => "en-NF", oemcp => 850 },
0718 { name => "en-NG", oemcp => 850 },
0719 { name => "en-NL", oemcp => 65001 },
0720 { name => "en-NR", oemcp => 850 },
0721 { name => "en-NU", oemcp => 850 },
0722 { name => "en-NZ", lcid => 0x00001409, oemcp => 850, sabbrevlangname => "ENZ" },
0723 { name => "en-PG", oemcp => 850 },
0724 { name => "en-PH", lcid => 0x00003409, ebcdiccp => 500, sabbrevlangname => "ENP" },
0725 { name => "en-PK", oemcp => 850 },
0726 { name => "en-PN", oemcp => 850 },
0727 { name => "en-PR", oemcp => 850 },
0728 { name => "en-PW", oemcp => 850 },
0729 { name => "en-RW", oemcp => 850 },
0730 { name => "en-SB", oemcp => 850 },
0731 { name => "en-SC", oemcp => 850 },
0732 { name => "en-SD", oemcp => 850 },
0733 { name => "en-SE", oemcp => 65001 },
0734 { name => "en-SG", lcid => 0x00004809, sabbrevlangname => "ENE" },
0735 { name => "en-SH", oemcp => 850 },
0736 { name => "en-SI", oemcp => 65001 },
0737 { name => "en-SL", oemcp => 850 },
0738 { name => "en-SS", oemcp => 850 },
0739 { name => "en-SX", oemcp => 850 },
0740 { name => "en-SZ", oemcp => 850 },
0741 { name => "en-TC", oemcp => 850 },
0742 { name => "en-TK", oemcp => 850 },
0743 { name => "en-TO", oemcp => 850 },
0744 { name => "en-TT", lcid => 0x00002c09, oemcp => 850, sabbrevlangname => "ENT" },
0745 { name => "en-TV", oemcp => 850 },
0746 { name => "en-TZ", oemcp => 850 },
0747 { name => "en-UG", oemcp => 850 },
0748 { name => "en-UM", oemcp => 850 },
0749 { name => "en-US", lcid => 0x00000409 },
0750 { name => "en-VC", oemcp => 850 },
0751 { name => "en-VG", oemcp => 850 },
0752 { name => "en-VI", oemcp => 850 },
0753 { name => "en-VU", oemcp => 850 },
0754 { name => "en-WS", oemcp => 850 },
0755 { name => "en-ZA", lcid => 0x00001c09, ebcdiccp => 500, sabbrevlangname => "ENS" },
0756 { name => "en-ZM", oemcp => 850 },
0757 { name => "en-ZW", lcid => 0x00003009, ebcdiccp => 500, sabbrevlangname => "ENW" },
0758 { name => "eo", sopentypelang => "NTO" },
0759 { name => "eo-001" },
0760 { name => "es", lcid => 0x0000000a, oemcp => 850, ebcdiccp => 20284, sabbrevlangname => "ESP", sopentypelang => "ESP" },
0761 { name => "es-419", lcid => 0x0000580a, sabbrevlangname => "ESJ" },
0762 { name => "es-AR", lcid => 0x00002c0a, sabbrevlangname => "ESS" },
0763 { name => "es-BO", lcid => 0x0000400a, sabbrevlangname => "ESB" },
0764 { name => "es-BR", oemcp => 65001 },
0765 { name => "es-BZ", oemcp => 65001 },
0766 { name => "es-CL", lcid => 0x0000340a, sabbrevlangname => "ESL" },
0767 { name => "es-CO", lcid => 0x0000240a, sabbrevlangname => "ESO" },
0768 { name => "es-CR", lcid => 0x0000140a, sabbrevlangname => "ESC" },
0769 { name => "es-CU", lcid => 0x00005c0a, sabbrevlangname => "ESK" },
0770 { name => "es-DO", lcid => 0x00001c0a, sabbrevlangname => "ESD" },
0771 { name => "es-EA" },
0772 { name => "es-EC", lcid => 0x0000300a, sabbrevlangname => "ESF" },
0773 { name => "es-ES", lcid => 0x00000c0a, sabbrevlangname => "ESN" },
0774 { name => "es-ES_tradnl", lcid => 0x0000040a, file => "es_ES" },
0775 { name => "es-ES-u-co-trad", alias => "es-ES_tradnl" },
0776 { name => "es-GQ" },
0777 { name => "es-GT", lcid => 0x0000100a, sabbrevlangname => "ESG" },
0778 { name => "es-HN", lcid => 0x0000480a, sabbrevlangname => "ESH" },
0779 { name => "es-IC" },
0780 { name => "es-MX", lcid => 0x0000080a, sabbrevlangname => "ESM" },
0781 { name => "es-NI", lcid => 0x00004c0a, sabbrevlangname => "ESI" },
0782 { name => "es-PA", lcid => 0x0000180a, sabbrevlangname => "ESA" },
0783 { name => "es-PE", lcid => 0x0000280a, sabbrevlangname => "ESR" },
0784 { name => "es-PH" },
0785 { name => "es-PR", lcid => 0x0000500a, sabbrevlangname => "ESU" },
0786 { name => "es-PY", lcid => 0x00003c0a, sabbrevlangname => "ESZ" },
0787 { name => "es-SV", lcid => 0x0000440a, sabbrevlangname => "ESE" },
0788 { name => "es-US", lcid => 0x0000540a, sabbrevlangname => "EST" },
0789 { name => "es-UY", lcid => 0x0000380a, sabbrevlangname => "ESY" },
0790 { name => "es-VE", lcid => 0x0000200a, sabbrevlangname => "ESV" },
0791 { name => "et", lcid => 0x00000025, oemcp => 775, group => 3, sabbrevlangname => "ETI", sopentypelang => "ETI" },
0792 { name => "et-EE", lcid => 0x00000425 },
0793 { name => "eu", lcid => 0x0000002d, oemcp => 850, maccp => 65001, sabbrevlangname => "EUQ", sopentypelang => "EUQ" },
0794 { name => "eu-ES", lcid => 0x0000042d },
0795 { name => "ewo" },
0796 { name => "ewo-CM" },
0797 { name => "fa", lcid => 0x00000029, inegnumber => 3, oemcp => 720, slist => "\x{061b}", group => 13, sabbrevlangname => "FAR", sopentypelang => "FAR" },
0798 { name => "fa-AF", alias => "prs-AF" },
0799 { name => "fa-IR", lcid => 0x00000429 },
0800 { name => "ff", lcid => 0x00000067, oemcp => 850, ebcdiccp => 20297 },
0801 { name => "ff-CM", alias => "ff-Latn-CM" },
0802 { name => "ff-GN", alias => "ff-Latn-GN" },
0803 { name => "ff-MR", alias => "ff-Latn-MR" },
0804 { name => "ff-NG", alias => "ff-Latn-NG" },
0805 { name => "ff-SN", alias => "ff-Latn-SN" },
b591b3167… Alex*0806 { name => "ff-Adlm", oemcp => 65001 },
8cdb593f5… Alex*0807 { name => "ff-Adlm-BF" },
0808 { name => "ff-Adlm-CM" },
0809 { name => "ff-Adlm-GH" },
0810 { name => "ff-Adlm-GM" },
0811 { name => "ff-Adlm-GN" },
0812 { name => "ff-Adlm-GW" },
0813 { name => "ff-Adlm-LR" },
0814 { name => "ff-Adlm-MR" },
0815 { name => "ff-Adlm-NE" },
0816 { name => "ff-Adlm-NG" },
0817 { name => "ff-Adlm-SL" },
0818 { name => "ff-Adlm-SN" },
0819 { name => "ff-Latn", lcid => 0x00007c67 },
0820 { name => "ff-Latn-BF", oemcp => 65001 },
0821 { name => "ff-Latn-CM" },
0822 { name => "ff-Latn-GH", oemcp => 65001 },
0823 { name => "ff-Latn-GM", oemcp => 65001 },
0824 { name => "ff-Latn-GN" },
0825 { name => "ff-Latn-GW", oemcp => 65001 },
0826 { name => "ff-Latn-LR", oemcp => 65001 },
0827 { name => "ff-Latn-MR" },
0828 { name => "ff-Latn-NE", oemcp => 65001 },
0829 { name => "ff-Latn-NG", lcid => 0x00000467, sabbrevlangname => "ZZZ" },
0830 { name => "ff-Latn-SL", oemcp => 65001 },
0831 { name => "ff-Latn-SN", lcid => 0x00000867 },
0832 { name => "fi", lcid => 0x0000000b, oemcp => 850, ebcdiccp => 20278 },
0833 { name => "fi-FI", lcid => 0x0000040b },
0834 { name => "fil", lcid => 0x00000064, oemcp => 437, ebcdiccp => 500, sabbrevlangname => "FPO", sopentypelang => "PIL" },
0835 { name => "fil-PH", lcid => 0x00000464 },
0836 { name => "fil-Latn", alias => "fil" },
0837 { name => "fil-Latn-PH", alias => "fil-PH" },
0838 { name => "fo", lcid => 0x00000038, oemcp => 850, maccp => 10079, ebcdiccp => 20277, sabbrevlangname => "FOS", sopentypelang => "FOS" },
0839 { name => "fo-DK", oemcp => 65001, maccp => 65001 },
0840 { name => "fo-FO", lcid => 0x00000438 },
0841 { name => "fr", lcid => 0x0000000c, oemcp => 850, ebcdiccp => 20297 },
ad02ef7be… Alex*0842 { name => "fr-029", lcid => 0x00001c0c, file => "fr", sabbrevlangname => "ZZZ" },
8cdb593f5… Alex*0843 { name => "fr-BE", lcid => 0x0000080c, sabbrevlangname => "FRB" },
0844 { name => "fr-BF" },
0845 { name => "fr-BI" },
0846 { name => "fr-BJ" },
0847 { name => "fr-BL" },
0848 { name => "fr-CA", lcid => 0x00000c0c, sabbrevlangname => "FRC" },
0849 { name => "fr-CD", lcid => 0x0000240c, sabbrevlangname => "FRD" },
0850 { name => "fr-CF" },
0851 { name => "fr-CG" },
0852 { name => "fr-CH", lcid => 0x0000100c, sabbrevlangname => "FRS" },
0853 { name => "fr-CI", lcid => 0x0000300c, sabbrevlangname => "FRI" },
0854 { name => "fr-CM", lcid => 0x00002c0c, sabbrevlangname => "FRE" },
0855 { name => "fr-DJ" },
0856 { name => "fr-DZ" },
0857 { name => "fr-FR", lcid => 0x0000040c },
0858 { name => "fr-GA" },
0859 { name => "fr-GF" },
0860 { name => "fr-GN" },
0861 { name => "fr-GP" },
0862 { name => "fr-GQ" },
0863 { name => "fr-HT", lcid => 0x00003c0c, sabbrevlangname => "FRH" },
0864 { name => "fr-KM" },
0865 { name => "fr-LU", lcid => 0x0000140c, sabbrevlangname => "FRL" },
0866 { name => "fr-MA", lcid => 0x0000380c, sabbrevlangname => "FRO" },
0867 { name => "fr-MC", lcid => 0x0000180c, sabbrevlangname => "FRM" },
0868 { name => "fr-MF" },
0869 { name => "fr-MG" },
0870 { name => "fr-ML", lcid => 0x0000340c, sabbrevlangname => "FRF" },
0871 { name => "fr-MQ" },
0872 { name => "fr-MR" },
0873 { name => "fr-MU" },
0874 { name => "fr-NC" },
0875 { name => "fr-NE" },
0876 { name => "fr-PF" },
0877 { name => "fr-PM" },
0878 { name => "fr-RE", lcid => 0x0000200c, sabbrevlangname => "FRR" },
0879 { name => "fr-RW" },
0880 { name => "fr-SC" },
0881 { name => "fr-SN", lcid => 0x0000280c, sabbrevlangname => "FRN" },
0882 { name => "fr-SY" },
0883 { name => "fr-TD" },
0884 { name => "fr-TG" },
0885 { name => "fr-TN" },
0886 { name => "fr-VU" },
0887 { name => "fr-WF" },
0888 { name => "fr-YT" },
0889 { name => "fur", sopentypelang => "FRL" },
0890 { name => "fur-IT" },
b591b3167… Alex*0891 { name => "fuv-NG", alias => "ff-Latn-NG" },
8cdb593f5… Alex*0892 { name => "fy", lcid => 0x00000062, oemcp => 850, sabbrevlangname => "FYN", sopentypelang => "FRI" },
0893 { name => "fy-NL", lcid => 0x00000462 },
0894 { name => "ga", lcid => 0x0000003c, oemcp => 850, sabbrevlangname => "IRE", sopentypelang => "IRI" },
b591b3167… Alex*0895 { name => "ga-GB", oemcp => 65001 },
8cdb593f5… Alex*0896 { name => "ga-IE", lcid => 0x0000083c },
0897 { name => "gd", lcid => 0x00000091, oemcp => 850, ebcdiccp => 20285, sopentypelang => "GAE" },
0898 { name => "gd-GB", lcid => 0x00000491 },
0899 { name => "gd-Latn", alias => "gd" },
0900 { name => "gl", lcid => 0x00000056, oemcp => 850, sabbrevlangname => "GLC", sopentypelang => "GAL" },
0901 { name => "gl-ES", lcid => 0x00000456 },
50c5eb31c… Alex*0902 { name => "gn", lcid => 0x00000074, oemcp => 850, ebcdiccp => 20284, slist => ",", sopentypelang => "GUA" },
0903 { name => "gn-PY", lcid => 0x00000474 },
8cdb593f5… Alex*0904 { name => "gsw", lcid => 0x00000084, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "ZZZ", sopentypelang => "ALS" },
0905 { name => "gsw-CH" },
0906 { name => "gsw-FR", lcid => 0x00000484, sabbrevlangname => "GSW" },
0907 { name => "gsw-LI" },
0908 { name => "gu", lcid => 0x00000047, slist => ",", group => 15 },
0909 { name => "gu-IN", lcid => 0x00000447 },
0910 { name => "guz" },
0911 { name => "guz-KE" },
0912 { name => "gv", sopentypelang => "MNX" },
b591b3167… Alex*0913 { name => "gv-GB", file => "gv" },
8cdb593f5… Alex*0914 { name => "gv-IM" },
0915 { name => "ha", lcid => 0x00000068, oemcp => 437 },
0916 { name => "ha-GH", alias => "ha-Latn-GH" },
0917 { name => "ha-Latn", lcid => 0x00007c68, file => "ha" },
0918 { name => "ha-Latn-GH", file => "ha_GH", ebcdiccp => 500 },
0919 { name => "ha-Latn-NE", file => "ha_NE", ebcdiccp => 500 },
0920 { name => "ha-Latn-NG", lcid => 0x00000468, file => "ha_NG" },
0921 { name => "ha-NE", alias => "ha-Latn-NE" },
0922 { name => "ha-NG", alias => "ha-Latn-NG" },
0923 { name => "haw", lcid => 0x00000075, oemcp => 437 },
0924 { name => "haw-Latn", alias => "haw" },
0925 { name => "haw-Latn-US", alias => "haw-US" },
0926 { name => "haw-US", lcid => 0x00000475 },
0927 { name => "he", lcid => 0x0000000d, oemcp => 862, slist => ",", group => 12, sopentypelang => "IWR" },
0928 { name => "he-IL", lcid => 0x0000040d },
0929 { name => "hi", lcid => 0x00000039, slist => ",", group => 15 },
0930 { name => "hi-IN", lcid => 0x00000439 },
0931 { name => "hr", lcid => 0x0000001a, inegnumber => 2, oemcp => 852, maccp => 10082, group => 2 },
0932 { name => "hr-BA", lcid => 0x0000101a, ebcdiccp => 870, inegnumber => 1, sabbrevlangname => "HRB" },
0933 { name => "hr-HR", lcid => 0x0000041a },
0934 { name => "hsb", lcid => 0x0000002e, oemcp => 850, ebcdiccp => 870, sopentypelang => "USB" },
0935 { name => "hsb-DE", lcid => 0x0000042e },
0936 { name => "hu", lcid => 0x0000000e, oemcp => 852, group => 2 },
0937 { name => "hu-HU", lcid => 0x0000040e },
0938 { name => "hu-HU_technl", lcid => 0x0001040e, alias => "hu-HU" },
0939 { name => "hy", lcid => 0x0000002b, slist => ",", group => 17 },
0940 { name => "hy-AM", lcid => 0x0000042b },
0941 { name => "ia" },
0942 { name => "ia-001" },
0943
0944
0945 { name => "id", lcid => 0x00000021, oemcp => 850 },
0946 { name => "id-ID", lcid => 0x00000421 },
6d046dd98… Alex*0947 { name => "ie" },
0948 { name => "ie-EE" },
8cdb593f5… Alex*0949 { name => "ig", lcid => 0x00000070, oemcp => 437 },
0950 { name => "ig-Latn", alias => "ig" },
0951 { name => "ig-Latn-NG", alias => "ig-NG" },
0952 { name => "ig-NG", lcid => 0x00000470 },
0953 { name => "ii", lcid => 0x00000078, group => 9, sopentypelang => "YIM" },
0954 { name => "ii-CN", lcid => 0x00000478 },
0955 { name => "ii-Yiii", alias => "ii" },
0956 { name => "ii-Yiii-CN", alias => "ii-CN" },
0957 { name => "is", lcid => 0x0000000f, oemcp => 850, maccp => 10079, ebcdiccp => 20871 },
0958 { name => "is-IS", lcid => 0x0000040f },
0959 { name => "it", lcid => 0x00000010, oemcp => 850, ebcdiccp => 20280 },
0960 { name => "it-CH", lcid => 0x00000810, ebcdiccp => 500, sabbrevlangname => "ITS" },
0961 { name => "it-IT", lcid => 0x00000410 },
0962 { name => "it-SM" },
0963 { name => "it-VA", oemcp => 65001 },
50c5eb31c… Alex*0964 { name => "iu", lcid => 0x0000005d, oemcp => 437, slist => ",", sortlocale => "iu-Latn-CA", sabbrevlangname => "IUK", sopentypelang => "INU" },
0965 { name => "iu-Cans", lcid => 0x0000785d, file => "iu", oemcp => 65001, sabbrevlangname => "IUS" },
0966 { name => "iu-Cans-CA", lcid => 0x0000045d, file => "iu_CA" },
0967 { name => "iu-Latn", lcid => 0x00007c5d },
0968 { name => "iu-Latn-CA", lcid => 0x0000085d },
8cdb593f5… Alex*0969 { name => "ja", lcid => 0x00000011, ireadinglayout => 2, oemcp => 932, slist => ",", sscripts => "Hani Hira Jpan Kana", group => 7, sopentypelang => "JAN" },
0970 { name => "ja-JP", lcid => 0x00000411 },
0971 { name => "ja-JP_radstr", lcid => 0x00040411, alias => "ja-JP" },
0972 { name => "ja-JP-u-co-unihan", alias => "ja-JP_radstr" },
0973 { name => "jgo" },
0974 { name => "jgo-CM" },
0975 { name => "jmc" },
0976 { name => "jmc-TZ" },
4172c0482… Alex*0977 { name => "jv", oemcp => 850, nativedigits => "0123456789" },
8cdb593f5… Alex*0978 { name => "jv-ID", alias => "jv-Latn-ID" },
0979
0980
0981 { name => "jv-Latn", file => "jv" },
0982 { name => "jv-Latn-ID", file => "jv_ID" },
0983 { name => "ka", lcid => 0x00000037, group => 16 },
0984 { name => "ka-GE", lcid => 0x00000437 },
0985 { name => "ka-GE_modern", lcid => 0x00010437, alias => "ka-GE" },
0986 { name => "kab", sopentypelang => "KAB0" },
0987 { name => "kab-DZ" },
0988 { name => "kam", sopentypelang => "KMB" },
0989 { name => "kam-KE" },
0990 { name => "kde" },
0991 { name => "kde-TZ" },
0992 { name => "kea" },
0993 { name => "kea-CV" },
0994 { name => "kgp" },
0995 { name => "kgp-BR" },
0996 { name => "khq" },
0997 { name => "khq-ML" },
0998 { name => "ki" },
0999 { name => "ki-KE" },
1000 { name => "kk", lcid => 0x0000003f, group => 5, sabbrevlangname => "KKZ" },
1001 { name => "kk-Cyrl", alias => "kk" },
1002 { name => "kk-Cyrl-KZ", alias => "kk-KZ" },
1003 { name => "kk-KZ", lcid => 0x0000043f },
1004 { name => "kkj" },
1005 { name => "kkj-CM" },
1006 { name => "kl", lcid => 0x0000006f, oemcp => 850, ebcdiccp => 20277, sopentypelang => "GRN" },
1007 { name => "kl-GL", lcid => 0x0000046f },
1008 { name => "kln", sopentypelang => "KAL" },
1009 { name => "kln-KE" },
1010 { name => "km", lcid => 0x00000053, inegnumber => 2, slist => ",", group => 15 },
1011 { name => "km-KH", lcid => 0x00000453 },
1012 { name => "kn", lcid => 0x0000004b, slist => ",", group => 15, sabbrevlangname => "KDI" },
1013 { name => "kn-IN", lcid => 0x0000044b },
1014 { name => "ko", lcid => 0x00000012, ireadinglayout => 2, slist => ",", oemcp => 949, ebcdiccp => 20833, sscripts => "Hang Hani Kore", group => 8 },
1015 { name => "ko-KP", oemcp => 65001 },
1016 { name => "ko-KR", lcid => 0x00000412 },
1017 { name => "kok", lcid => 0x00000057, slist => ",", group => 15, sabbrevlangname => "KNK" },
1018 { name => "kok-IN", lcid => 0x00000457 },
1019 { name => "kr", lcid => 0x00000071, sortlocale => "kr-Latn-NG", oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "KNR" },
1020 { name => "kr-Latn", file => "kr", dir => "exemplars" },
1021 { name => "kr-Latn-NG", lcid => 0x00000471, file => "kr", dir => "exemplars" },
1022 { name => "kr-NG", alias => "kr-Latn-NG" },
1023 { name => "ks", lcid => 0x00000060, group => 15, sabbrevlangname => "ZZZ", sopentypelang => "KSH" },
1024 { name => "ks-Arab", lcid => 0x00000460 },
1025 { name => "ks-Arab-IN" },
6644b6b66… Alex*1026 { name => "ks-Deva", slist => "," },
1027 { name => "ks-Deva-IN", lcid => 0x00000860 },
8cdb593f5… Alex*1028 { name => "ks-IN", alias => "ks-Arab-IN" },
1029 { name => "ksb" },
1030 { name => "ksb-TZ" },
1031 { name => "ksf" },
1032 { name => "ksf-CM" },
1033 { name => "ksh", sopentypelang => "KSH0" },
1034 { name => "ksh-DE" },
1035 { name => "ku", lcid => 0x00000092, file => "ckb", slist => "\x{061b}", sortlocale => "ku-Arab-IQ", oemcp => 720 },
1036 { name => "ku-Arab", lcid => 0x00007c92, file => "ckb", group => 13 },
1037 { name => "ku-Arab-IQ", lcid => 0x00000492, file => "ckb_IQ" },
1038 { name => "ku-Arab-IR", file => "ckb_IR", oemcp => 65001 },
1039 { name => "kw" },
1040 { name => "kw-GB" },
1041 { name => "ky", lcid => 0x00000040, oemcp => 866, group => 5, sabbrevlangname => "KYR" },
1042 { name => "ky-Cyrl", alias => "ky" },
1043 { name => "ky-Cyrl-KG", alias => "ky-KG" },
1044 { name => "ky-KG", lcid => 0x00000440 },
50c5eb31c… Alex*1045 { name => "la", lcid => 0x00000076, oemcp => 437, slist => ",", sabbrevlangname => "ZZZ" },
1046 { name => "la-VA", lcid => 0x00000476 },
b591b3167… Alex*1047 { name => "la-001", alias => "la-VA" },
8cdb593f5… Alex*1048 { name => "lag" },
1049 { name => "lag-TZ" },
1050 { name => "lb", lcid => 0x0000006e, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "LBX" },
1051 { name => "lb-LU", lcid => 0x0000046e },
1052 { name => "lg" },
1053 { name => "lg-UG" },
1054 { name => "lkt" },
1055 { name => "lkt-US" },
1056 { name => "ln" },
1057 { name => "ln-AO" },
1058 { name => "ln-CD" },
1059 { name => "ln-CF" },
1060 { name => "ln-CG" },
1061 { name => "lo", lcid => 0x00000054, group => 15 },
1062 { name => "lo-LA", lcid => 0x00000454 },
1063 { name => "lrc" },
1064 { name => "lrc-IQ" },
1065 { name => "lrc-IR" },
1066 { name => "lt", lcid => 0x00000027, oemcp => 775, group => 3, sabbrevlangname => "LTH", sopentypelang => "LTH" },
1067 { name => "lt-LT", lcid => 0x00000427 },
1068 { name => "lu" },
1069 { name => "lu-CD" },
1070 { name => "luo" },
1071 { name => "luo-KE" },
1072 { name => "luy", sopentypelang => "LUH" },
1073 { name => "luy-KE" },
1074 { name => "lv", lcid => 0x00000026, oemcp => 775, group => 3, sabbrevlangname => "LVI", sopentypelang => "LVI" },
1075 { name => "lv-LV", lcid => 0x00000426 },
1076 { name => "mai" },
1077 { name => "mai-IN" },
1078 { name => "mas" },
1079 { name => "mas-KE" },
1080 { name => "mas-TZ" },
1081 { name => "mer" },
1082 { name => "mer-KE" },
1083 { name => "mfe" },
1084 { name => "mfe-MU" },
1085 { name => "mg" },
1086 { name => "mg-MG" },
1087 { name => "mgh" },
1088 { name => "mgh-MZ" },
1089 { name => "mgo" },
1090 { name => "mgo-CM" },
1091 { name => "mi", lcid => 0x00000081, slist => "," },
1092 { name => "mi-Latn", alias => "mi" },
1093 { name => "mi-Latn-NZ", alias => "mi-NZ" },
1094 { name => "mi-NZ", lcid => 0x00000481 },
6d046dd98… Alex*1095 { name => "mic" },
1096 { name => "mic-CA" },
8cdb593f5… Alex*1097 { name => "mk", lcid => 0x0000002f, oemcp => 866, ebcdiccp => 500, group => 5, sabbrevlangname => "MKI" },
1098 { name => "mk-MK", lcid => 0x0000042f },
1099 { name => "ml", lcid => 0x0000004c, group => 15, sabbrevlangname => "MYM", sopentypelang => "MLR" },
1100 { name => "ml-IN", lcid => 0x0000044c },
1101 { name => "mn", lcid => 0x00000050, oemcp => 866, sopentypelang => "MNG" },
1102 { name => "mn-Cyrl", lcid => 0x00007850, file => "mn", sabbrevlangname => "MNN" },
1103 { name => "mn-Cyrl-MN", alias => "mn-MN" },
1104 { name => "mn-MN", lcid => 0x00000450, sparent => "mn-Cyrl", group => 5 },
50c5eb31c… Alex*1105 { name => "mn-Mong", lcid => 0x00007c50, oemcp => 65001, slist => ",", group => 15, sabbrevlangname => "MNG", nativedigits => "0123456789" },
1106 { name => "mn-Mong-CN", lcid => 0x00000850 },
1107 { name => "mn-Mong-MN", lcid => 0x00000c50, sabbrevlangname => "MNM" },
8cdb593f5… Alex*1108 { name => "mni", lcid => 0x00000058, slist => ",", sabbrevlangname => "ZZZ" },
1109 { name => "mni-IN", lcid => 0x00000458, file => "mni_Beng_IN" },
b591b3167… Alex*1110 { name => "mni-Beng" },
1111 { name => "mni-Beng-IN", alias => "mni-IN" },
50c5eb31c… Alex*1112 { name => "moh", lcid => 0x0000007c, oemcp => 850, ebcdiccp => 37, slist => ",", sabbrevlangname => "MWK" },
1113 { name => "moh-CA", lcid => 0x0000047c },
8cdb593f5… Alex*1114 { name => "moh-Latn", alias => "moh" },
1115 { name => "moh-Latn-CA", alias => "moh-CA" },
1116 { name => "mr", lcid => 0x0000004e, slist => ",", group => 15 },
1117 { name => "mr-IN", lcid => 0x0000044e },
1118 { name => "ms", lcid => 0x0000003e, oemcp => 850, sabbrevlangname => "MSL", sopentypelang => "MLY" },
1119 { name => "ms-BN", lcid => 0x0000083e, sabbrevlangname => "MSB" },
1120 { name => "ms-ID" },
1121 { name => "ms-Latn", alias => "ms" },
1122 { name => "ms-Latn-BN", alias => "ms-BN" },
1123 { name => "ms-Latn-MY", alias => "ms-MY" },
1124 { name => "ms-Latn-SG", alias => "ms-SG" },
1125 { name => "ms-MY", lcid => 0x0000043e },
1126 { name => "ms-SG" },
1127 { name => "mt", lcid => 0x0000003a, sopentypelang => "MTS" },
1128 { name => "mt-MT", lcid => 0x0000043a },
1129 { name => "mua" },
1130 { name => "mua-CM" },
1131 { name => "my", lcid => 0x00000055, sopentypelang => "BRM" },
1132 { name => "my-MM", lcid => 0x00000455 },
1133 { name => "mzn" },
1134 { name => "mzn-IR" },
1135 { name => "naq" },
1136 { name => "naq-NA" },
1137 { name => "nb", lcid => 0x00007c14, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NOR", sopentypelang => "NOR" },
1138 { name => "nb-NO", lcid => 0x00000414 },
1139 { name => "nb-SJ" },
1140 { name => "nd", sopentypelang => "NDB" },
1141 { name => "nd-ZW" },
1142 { name => "nds" },
1143 { name => "nds-DE" },
1144 { name => "nds-NL" },
1145 { name => "ne", lcid => 0x00000061, slist => "," },
1146 { name => "ne-IN", lcid => 0x00000861, sabbrevlangname => "NEI" },
1147 { name => "ne-NP", lcid => 0x00000461, group => 15 },
1148 { name => "nl", lcid => 0x00000013, oemcp => 850 },
1149 { name => "nl-AW" },
1150 { name => "nl-BE", lcid => 0x00000813, sabbrevlangname => "NLB" },
1151 { name => "nl-BQ" },
1152 { name => "nl-CW" },
1153 { name => "nl-NL", lcid => 0x00000413 },
1154 { name => "nl-SR" },
1155 { name => "nl-SX" },
1156 { name => "nmg" },
1157 { name => "nmg-CM" },
1158 { name => "nn", lcid => 0x00007814, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NON", sopentypelang => "NYN" },
1159 { name => "nn-NO", lcid => 0x00000814 },
1160 { name => "nnh" },
1161 { name => "nnh-CM" },
1162 { name => "no", lcid => 0x00000014, oemcp => 850, ebcdiccp => 20277, sortlocale => "nb-NO" },
50c5eb31c… Alex*1163 { name => "nqo", idigits => 3, inegnumber => 3, slist => "\x{060c}", sopentypelang => "NKO" },
1164 { name => "nqo-GN" },
1165 { name => "nr", sopentypelang => "NDB" },
1166 { name => "nr-ZA" },
1167 { name => "nso", lcid => 0x0000006c, oemcp => 850, sopentypelang => "SOT" },
1168 { name => "nso-ZA", lcid => 0x0000046c },
8cdb593f5… Alex*1169 { name => "nus" },
1170 { name => "nus-SD", alias => "nus-SS" },
1171 { name => "nus-SS" },
1172 { name => "nyn", sopentypelang => "NKL" },
1173 { name => "nyn-UG" },
198de0dcb… Alex*1174 { name => "oc", lcid => 0x00000082, oemcp => 850, ebcdiccp => 20297 },
1175 { name => "oc-FR", lcid => 0x00000482 },
8cdb593f5… Alex*1176 { name => "oc-Latn", alias => "oc" },
1177 { name => "oc-Latn-FR", alias => "oc-FR" },
1178 { name => "om", lcid => 0x00000072, sopentypelang => "ORO" },
1179 { name => "om-ET", lcid => 0x00000472 },
1180 { name => "om-KE" },
1181 { name => "or", lcid => 0x00000048, slist => ",", group => 15 },
1182 { name => "or-IN", lcid => 0x00000448 },
1183 { name => "os" },
1184 { name => "os-GE" },
1185 { name => "os-RU" },
1186 { name => "pa", lcid => 0x00000046, slist => "," },
1187 { name => "pa-Arab", lcid => 0x00007c46, slist => ";", inegnumber => 2, oemcp => 720, group => 13, sabbrevlangname => "PAP" },
1188 { name => "pa-Arab-PK", lcid => 0x00000846 },
1189 { name => "pa-Guru" },
1190 { name => "pa-Guru-IN", alias => "pa-IN" },
1191 { name => "pa-IN", lcid => 0x00000446, sparent => "pa-Guru", file => "pa_Guru_IN", group => 15 },
50c5eb31c… Alex*1192 { name => "pap", lcid => 0x00000079, oemcp => 850, sopentypelang => "PAP0" },
8cdb593f5… Alex*1193
1194 { name => "pcm" },
b591b3167… Alex*1195 { name => "pcm-NG", alias => "pcm-Latn-NG" },
1196 { name => "pcm-Latn", file => "pcm" },
1197 { name => "pcm-Latn-NG", file => "pcm_NG" },
8cdb593f5… Alex*1198 { name => "pl", lcid => 0x00000015, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "PLK", sopentypelang => "PLK" },
1199 { name => "pl-PL", lcid => 0x00000415 },
50c5eb31c… Alex*1200 { name => "prg" },
6d046dd98… Alex*1201 { name => "prg-001", file => "prg" },
1202 { name => "prg-PL" },
8cdb593f5… Alex*1203 { name => "prs", lcid => 0x0000008c, file => "fa", inegnumber => 3, oemcp => 720, group => 13, sopentypelang => "DRI" },
1204 { name => "prs-AF", lcid => 0x0000048c, file => "fa_AF" },
1205 { name => "prs-Arab", alias => "prs" },
1206 { name => "prs-Arab-AF", alias => "prs-AF" },
1207 { name => "ps", lcid => 0x00000063, group => 13, sabbrevlangname => "PAS", sopentypelang => "PAS" },
1208 { name => "ps-AF", lcid => 0x00000463 },
1209 { name => "ps-PK" },
1210 { name => "pt", lcid => 0x00000016, oemcp => 850, sabbrevlangname => "PTB", sopentypelang => "PTG" },
1211 { name => "pt-AO" },
1212 { name => "pt-BR", lcid => 0x00000416 },
1213 { name => "pt-CH", oemcp => 65001 },
1214 { name => "pt-CV" },
1215 { name => "pt-GQ", oemcp => 65001 },
1216 { name => "pt-GW" },
1217 { name => "pt-LU", oemcp => 65001 },
1218 { name => "pt-MO" },
1219 { name => "pt-MZ" },
1220 { name => "pt-PT", lcid => 0x00000816, sabbrevlangname => "PTG" },
1221 { name => "pt-ST" },
1222 { name => "pt-TL" },
1223
1224
1225
1226
1227 { name => "qu", alias => "quz" },
1228 { name => "qu-BO", alias => "quz-BO" },
1229 { name => "qu-EC", alias => "quz-EC" },
1230 { name => "qu-PE", alias => "quz-PE" },
50c5eb31c… Alex*1231 { name => "quc", lcid => 0x00000086, oemcp => 850, ebcdiccp => 20284, slist => "," },
1232 { name => "quc-Latn", lcid => 0x00007c86, file => "quc" },
1233 { name => "quc-Latn-GT", lcid => 0x00000486, file => "quc_GT" },
8cdb593f5… Alex*1234 { name => "qut", alias => "quc" },
1235 { name => "qut-GT", alias => "quc-Latn-GT" },
1236 { name => "quz", lcid => 0x0000006b, file => "qu", territory => "BO", oemcp => 850, ebcdiccp => 20284, slist => "," },
1237 { name => "quz-BO", lcid => 0x0000046b, file => "qu_BO" },
1238 { name => "quz-EC", lcid => 0x0000086b, file => "qu_EC" },
1239 { name => "quz-Latn", alias => "quz" },
1240 { name => "quz-Latn-BO", alias => "quz-BO" },
1241 { name => "quz-Latn-EC", alias => "quz-EC" },
1242 { name => "quz-Latn-PE", alias => "quz-PE" },
1243 { name => "quz-PE", lcid => 0x00000c6b, file => "qu_PE" },
1244 { name => "rm", lcid => 0x00000017, oemcp => 850, ebcdiccp => 20273, sabbrevlangname => "RMC", sopentypelang => "RMS" },
1245 { name => "rm-CH", lcid => 0x00000417 },
1246 { name => "rn" },
1247 { name => "rn-BI" },
1248 { name => "ro", lcid => 0x00000018, oemcp => 852, ebcdiccp => 20880, sabbrevlangname => "ROM", sopentypelang => "ROM" },
1249 { name => "ro-MD", lcid => 0x00000818, maccp => 65001, sabbrevlangname => "ROD" },
1250 { name => "ro-RO", lcid => 0x00000418, group => 2 },
1251 { name => "rof" },
1252 { name => "rof-TZ" },
1253 { name => "ru", lcid => 0x00000019, oemcp => 866 },
1254 { name => "ru-BY", maccp => 65001 },
1255 { name => "ru-KG", maccp => 65001 },
1256 { name => "ru-KZ", maccp => 65001 },
1257 { name => "ru-MD", lcid => 0x00000819, maccp => 65001, sabbrevlangname => "RUM" },
1258 { name => "ru-RU", lcid => 0x00000419, group => 5 },
1259 { name => "ru-UA", maccp => 65001 },
1260 { name => "rw", lcid => 0x00000087, oemcp => 437, sopentypelang => "RUA" },
1261 { name => "rw-RW", lcid => 0x00000487 },
1262 { name => "rwk" },
1263 { name => "rwk-TZ" },
1264 { name => "sa", lcid => 0x0000004f, slist => ",", group => 15 },
1265 { name => "sa-Deva", alias => "sa" },
1266 { name => "sa-Deva-IN", alias => "sa-IN" },
1267 { name => "sa-IN", lcid => 0x0000044f },
1268 { name => "sah", lcid => 0x00000085, oemcp => 866, group => 5, sopentypelang => "YAK" },
1269 { name => "sah-Cyrl", alias => "sah" },
1270 { name => "sah-Cyrl-RU", alias => "sah-RU" },
1271 { name => "sah-RU", lcid => 0x00000485 },
1272 { name => "saq" },
1273 { name => "saq-KE" },
1274 { name => "sat" },
1275 { name => "sat-Olck" },
1276 { name => "sat-Olck-IN" },
1277 { name => "sbp" },
1278 { name => "sbp-TZ" },
1279 { name => "sc" },
1280 { name => "sc-IT" },
1281 { name => "sd", lcid => 0x00000059, inegnumber => 3, oemcp => 720, sabbrevlangname => "SIP" },
1282 { name => "sd-Arab", lcid => 0x00007c59, group => 13 },
1283 { name => "sd-Arab-PK", lcid => 0x00000859 },
1284 { name => "sd-Deva", inegnumber => 1, slist => ",", oemcp => 65001, group => 15 },
1285 { name => "sd-Deva-IN", lcid => 0x00000459, sabbrevlangname => "ZZZ" },
1286 { name => "sd-PK", alias => "sd-Arab-PK" },
1287 { name => "se", lcid => 0x0000003b, oemcp => 850, ebcdiccp => 20277, sopentypelang => "NSM" },
1288 { name => "se-FI", lcid => 0x00000c3b, ebcdiccp => 20278, sabbrevlangname => "SMG" },
1289 { name => "se-NO", lcid => 0x0000043b },
1290 { name => "se-SE", lcid => 0x0000083b, ebcdiccp => 20278, sabbrevlangname => "SMF" },
1291 { name => "se-Latn", alias => "se" },
1292 { name => "se-Latn-FI", alias => "se-FI" },
1293 { name => "se-Latn-NO", alias => "se-NO" },
1294 { name => "se-Latn-SE", alias => "se-SE" },
1295 { name => "seh" },
1296 { name => "seh-MZ" },
1297 { name => "ses" },
1298 { name => "ses-ML" },
1299 { name => "sg", sopentypelang => "SGO" },
1300 { name => "sg-CF" },
1301 { name => "shi" },
1302 { name => "shi-Latn" },
1303 { name => "shi-Latn-MA" },
1304 { name => "shi-Tfng" },
1305 { name => "shi-Tfng-MA" },
1306 { name => "si", lcid => 0x0000005b, group => 15, sopentypelang => "SNH" },
1307 { name => "si-LK", lcid => 0x0000045b },
1308 { name => "sk", lcid => 0x0000001b, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "SKY", sopentypelang => "SKY" },
1309 { name => "sk-SK", lcid => 0x0000041b },
6d046dd98… Alex*1310 { name => "skr" },
1311 { name => "skr-PK" },
8cdb593f5… Alex*1312 { name => "sl", lcid => 0x00000024, oemcp => 852, ebcdiccp => 20880, group => 2 },
1313 { name => "sl-SI", lcid => 0x00000424 },
50c5eb31c… Alex*1314 { name => "sma", lcid => 0x0000783b, sparent => "se", ebcdiccp => 20278, sabbrevlangname => "SMB", sopentypelang => "SSM" },
8cdb593f5… Alex*1315 { name => "sma-Latn", alias => "sma" },
1316 { name => "sma-Latn-NO", alias => "sma-NO" },
1317 { name => "sma-Latn-SE", alias => "sma-SE" },
50c5eb31c… Alex*1318 { name => "sma-NO", lcid => 0x0000183b, ebcdiccp => 20277, sabbrevlangname => "SMA" },
1319 { name => "sma-SE", lcid => 0x00001c3b },
1320 { name => "smj", lcid => 0x00007c3b, sparent => "se", ebcdiccp => 20278, sabbrevlangname => "SMK", sopentypelang => "LSM" },
8cdb593f5… Alex*1321 { name => "smj-Latn", alias => "smj" },
1322 { name => "smj-Latn-NO", alias => "smj-NO" },
1323 { name => "smj-Latn-SE", alias => "smj-SE" },
50c5eb31c… Alex*1324 { name => "smj-NO", lcid => 0x0000103b, ebcdiccp => 20277, sabbrevlangname => "SMJ" },
1325 { name => "smj-SE", lcid => 0x0000143b },
8cdb593f5… Alex*1326 { name => "smn", lcid => 0x0000703b, sparent => "se", ebcdiccp => 20278, sopentypelang => "ISM" },
1327 { name => "smn-FI", lcid => 0x0000243b },
1328 { name => "smn-Latn", alias => "smn" },
1329 { name => "smn-Latn-FI", alias => "smn-FI" },
198de0dcb… Alex*1330 { name => "sms", lcid => 0x0000743b, sparent => "se", ebcdiccp => 20278, sopentypelang => "SKS" },
1331 { name => "sms-FI", lcid => 0x0000203b },
8cdb593f5… Alex*1332 { name => "sms-Latn", alias => "sms" },
1333 { name => "sms-Latn-FI", alias => "sms-FI" },
1334 { name => "sn", sopentypelang => "SNA0" },
1335 { name => "sn-Latn", file => "sn" },
1336 { name => "sn-Latn-ZW", file => "sn_ZW" },
1337 { name => "sn-ZW", alias => "sn-Latn-ZW" },
1338 { name => "so", lcid => 0x00000077, sopentypelang => "SML" },
1339 { name => "so-DJ" },
1340 { name => "so-ET" },
1341 { name => "so-KE" },
1342 { name => "so-SO", lcid => 0x00000477 },
1343 { name => "sq", lcid => 0x0000001c, oemcp => 852, ebcdiccp => 20880, group => 2 },
1344 { name => "sq-AL", lcid => 0x0000041c },
1345 { name => "sq-MK" },
1346 { name => "sq-XK" },
1347 { name => "sr", lcid => 0x00007c1a, sortlocale => "sr-Latn-RS", oemcp => 852, group => 2, sabbrevlangname => "SRB", sopentypelang => "SRB" },
1348 { name => "sr-Cyrl", lcid => 0x00006c1a, oemcp => 855, ebcdiccp => 21025, group => 5, sabbrevlangname => "SRO" },
1349 { name => "sr-Cyrl-BA", lcid => 0x00001c1a, sabbrevlangname => "SRN" },
1350 { name => "sr-Cyrl-ME", lcid => 0x0000301a, sabbrevlangname => "SRQ" },
1351 { name => "sr-Cyrl-RS", lcid => 0x0000281a },
1352 { name => "sr-Cyrl-XK" },
1353 { name => "sr-Latn", lcid => 0x0000701a, sabbrevlangname => "SRM" },
1354 { name => "sr-Latn-BA", lcid => 0x0000181a, maccp => 10082, ebcdiccp => 870, sabbrevlangname => "SRS" },
1355 { name => "sr-Latn-ME", lcid => 0x00002c1a, sabbrevlangname => "SRP" },
1356 { name => "sr-Latn-RS", lcid => 0x0000241a, sabbrevlangname => "SRM" },
1357 { name => "sr-Latn-XK" },
1358
1359
50c5eb31c… Alex*1360 { name => "ss", sopentypelang => "SWZ" },
1361 { name => "ss-SZ" },
1362 { name => "ss-ZA" },
1363 { name => "ssy" },
1364 { name => "ssy-ER" },
1365 { name => "st", lcid => 0x00000030 },
1366 { name => "st-LS" },
1367 { name => "st-ZA", lcid => 0x00000430 },
8cdb593f5… Alex*1368 { name => "su" },
1369 { name => "su-Latn" },
1370 { name => "su-Latn-ID" },
1371 { name => "sv", lcid => 0x0000001d, oemcp => 850, ebcdiccp => 20278, sabbrevlangname => "SVE", sopentypelang => "SVE" },
1372 { name => "sv-AX" },
1373 { name => "sv-FI", lcid => 0x0000081d, sabbrevlangname => "SVF" },
1374 { name => "sv-SE", lcid => 0x0000041d, sabbrevlangname => "SVE" },
1375 { name => "sw", lcid => 0x00000041, territory => "KE", oemcp => 437, ebcdiccp => 500, sabbrevlangname => "SWK", sopentypelang => "SWK" },
1376 { name => "sw-CD" },
1377 { name => "sw-KE", lcid => 0x00000441 },
1378 { name => "sw-TZ" },
1379 { name => "sw-UG" },
1380 { name => "swc-CD", alias => "sw-CD" },
50c5eb31c… Alex*1381 { name => "syr", lcid => 0x0000005a, slist => ",", group => 13 },
1382 { name => "syr-SY", lcid => 0x0000045a },
8cdb593f5… Alex*1383 { name => "syr-Syrc", alias => "syr" },
1384 { name => "syr-Syrc-SY", alias => "syr-SY" },
1385 { name => "ta", lcid => 0x00000049, slist => ",", group => 15, sabbrevlangname => "TAI" },
1386 { name => "ta-IN", lcid => 0x00000449 },
1387 { name => "ta-LK", lcid => 0x00000849, sabbrevlangname => "TAM" },
1388 { name => "ta-MY" },
1389 { name => "ta-SG" },
1390 { name => "te", lcid => 0x0000004a, group => 15 },
1391 { name => "te-IN", lcid => 0x0000044a },
1392 { name => "teo" },
1393 { name => "teo-KE" },
1394 { name => "teo-UG" },
1395 { name => "tg", lcid => 0x00000028, oemcp => 866, group => 5, sabbrevlangname => "TAJ", sopentypelang => "TAJ" },
1396 { name => "tg-Cyrl", lcid => 0x00007c28, file => "tg" },
1397 { name => "tg-Cyrl-TJ", lcid => 0x00000428, file => "tg_TJ" },
1398 { name => "tg-TJ", alias => "tg-Cyrl-TJ" },
1399 { name => "th", lcid => 0x0000001e, oemcp => 874, ebcdiccp => 20838, slist => ",", group => 11 },
1400 { name => "th-TH", lcid => 0x0000041e },
1401 { name => "ti", lcid => 0x00000073, territory => "ER", sopentypelang => "TGY" },
1402 { name => "ti-ER", lcid => 0x00000873 },
1403 { name => "ti-ET", lcid => 0x00000473, sabbrevlangname => "TIE" },
50c5eb31c… Alex*1404 { name => "tig", sopentypelang => "TGR" },
1405 { name => "tig-ER" },
b591b3167… Alex*1406 { name => "tig-Ethi-ER", alias => "tig-ER" },
8cdb593f5… Alex*1407 { name => "tk", lcid => 0x00000042, oemcp => 852, ebcdiccp => 20880, group => 2, sopentypelang => "TKM" },
1408 { name => "tk-Latn", alias => "tk" },
1409 { name => "tk-Latn-TM", alias => "tk-TM" },
1410 { name => "tk-TM", lcid => 0x00000442 },
50c5eb31c… Alex*1411 { name => "tn", lcid => 0x00000032, oemcp => 850, sopentypelang => "TNA" },
1412 { name => "tn-BW", lcid => 0x00000832, sabbrevlangname => "TSB" },
1413 { name => "tn-ZA", lcid => 0x00000432 },
8cdb593f5… Alex*1414 { name => "to", sopentypelang => "TGN" },
1415 { name => "to-TO" },
1416 { name => "tr", lcid => 0x0000001f, oemcp => 857, ebcdiccp => 20905, group => 6, sabbrevlangname => "TRK", sopentypelang => "TRK" },
1417 { name => "tr-CY" },
1418 { name => "tr-TR", lcid => 0x0000041f },
50c5eb31c… Alex*1419 { name => "ts", lcid => 0x00000031, sopentypelang => "TSG" },
1420 { name => "ts-ZA", lcid => 0x00000431 },
8cdb593f5… Alex*1421 { name => "tt", lcid => 0x00000044, oemcp => 866, group => 5, sabbrevlangname => "TTT" },
1422 { name => "tt-Cyrl", alias => "tt" },
1423 { name => "tt-Cyrl-RU", alias => "tt-RU" },
1424 { name => "tt-RU", lcid => 0x00000444 },
1425 { name => "twq" },
1426 { name => "twq-NE" },
6d046dd98… Alex*1427 { name => "tyv" },
1428 { name => "tyv-RU" },
8cdb593f5… Alex*1429 { name => "tzm", lcid => 0x0000005f, sortlocale => "tzm-Latn-DZ", oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "TZA" },
1430 { name => "tzm-Latn", lcid => 0x00007c5f, territory => "DZ", file => "tzm" },
1431 { name => "tzm-Latn-MA", file => "tzm_MA", oemcp => 65001 },
1432 { name => "tzm-Latn-DZ", lcid => 0x0000085f, file => "tzm" },
1433 { name => "tzm-MA", alias => "tzm-Latn-MA" },
b591b3167… Alex*1434 { name => "tzm-DZ", alias => "tzm-Latn-DZ" },
8cdb593f5… Alex*1435
1436
1437
1438
4172c0482… Alex*1439 { name => "ug", lcid => 0x00000080, oemcp => 720, slist => ",", group => 13, sopentypelang => "UYG", nativedigits => "0123456789" },
8cdb593f5… Alex*1440 { name => "ug-Arab", alias => "ug" },
1441 { name => "ug-Arab-CN", alias => "ug-CN" },
1442 { name => "ug-CN", lcid => 0x00000480 },
1443 { name => "uk", lcid => 0x00000022, oemcp => 866, maccp => 10017, ebcdiccp => 500, group => 5 },
1444 { name => "uk-UA", lcid => 0x00000422 },
1445 { name => "ur", lcid => 0x00000020, oemcp => 720 },
1446 { name => "ur-IN", lcid => 0x00000820, maccp => 65001, sabbrevlangname => "URI" },
1447 { name => "ur-PK", lcid => 0x00000420, group => 13 },
1448 { name => "uz", lcid => 0x00000043, oemcp => 857, maccp => 10029, group => 2 },
1449 { name => "uz-Arab", oemcp => 65001, maccp => 65001 },
1450 { name => "uz-Arab-AF" },
1451 { name => "uz-Cyrl", lcid => 0x00007843, oemcp => 866, maccp => 10007, group => 5, sabbrevlangname => "UZC" },
1452 { name => "uz-Cyrl-UZ", lcid => 0x00000843 },
1453 { name => "uz-Latn", lcid => 0x00007c43 },
1454 { name => "uz-Latn-UZ", lcid => 0x00000443 },
1455 { name => "vai" },
1456 { name => "vai-Latn" },
1457 { name => "vai-Latn-LR" },
1458 { name => "vai-Vaii" },
1459 { name => "vai-Vaii-LR" },
50c5eb31c… Alex*1460 { name => "ve", lcid => 0x00000033, sabbrevlangname => "ZZZ" },
1461 { name => "ve-ZA", lcid => 0x00000433 },
8cdb593f5… Alex*1462 { name => "vi", lcid => 0x0000002a, oemcp => 1258, slist => ",", group => 14, sabbrevlangname => "VIT", sopentypelang => "VIT" },
1463 { name => "vi-VN", lcid => 0x0000042a },
6d046dd98… Alex*1464 { name => "vmw" },
1465 { name => "vmw-MZ" },
50c5eb31c… Alex*1466 { name => "vo" },
1467 { name => "vo-001" },
8cdb593f5… Alex*1468 { name => "vun" },
1469 { name => "vun-TZ" },
50c5eb31c… Alex*1470 { name => "wa", oemcp => 850 },
1471 { name => "wa-BE" },
8cdb593f5… Alex*1472 { name => "wae" },
1473 { name => "wae-CH" },
50c5eb31c… Alex*1474 { name => "wal" },
1475 { name => "wal-ET" },
8cdb593f5… Alex*1476 { name => "wo", lcid => 0x00000088, oemcp => 850, ebcdiccp => 20297, sopentypelang => "WLF" },
1477 { name => "wo-Latn", alias => "wo" },
1478 { name => "wo-Latn-SN", alias => "wo-SN" },
1479 { name => "wo-SN", lcid => 0x00000488 },
1480 { name => "x-IV_mathan", lcid => 0x0001007f, alias => "" },
1481 { name => "xh", lcid => 0x00000034, oemcp => 850, sopentypelang => "XHS" },
1482 { name => "xh-ZA", lcid => 0x00000434 },
6d046dd98… Alex*1483 { name => "xnr" },
1484 { name => "xnr-IN" },
8cdb593f5… Alex*1485 { name => "xog" },
1486 { name => "xog-UG" },
1487 { name => "yav" },
1488 { name => "yav-CM" },
1489 { name => "yi", lcid => 0x0000003d, sabbrevlangname => "ZZZ", sopentypelang => "JII" },
6d046dd98… Alex*1490 { name => "yi-001", lcid => 0x0000043d, file => "yi" },
1491 { name => "yi-UA" },
8cdb593f5… Alex*1492 { name => "yo", lcid => 0x0000006a, oemcp => 437, sopentypelang => "YBA" },
1493 { name => "yo-BJ", ebcdiccp => 500 },
1494 { name => "yo-Latn", alias => "yo" },
1495 { name => "yo-Latn-NG", alias => "yo-NG" },
1496 { name => "yo-NG", lcid => 0x0000046a },
1497 { name => "yrl" },
1498 { name => "yrl-BR" },
1499 { name => "yrl-CO" },
1500 { name => "yrl-VE" },
1501 { name => "yue" },
1502 { name => "yue-Hans" },
1503 { name => "yue-Hans-CN" },
1504 { name => "yue-Hant" },
1505 { name => "yue-Hant-HK" },
1506 { name => "zgh" },
1507 { name => "zgh-MA", alias => "zgh-Tfng-MA" },
1508 { name => "zgh-Tfng", file => "zgh" },
1509 { name => "zgh-Tfng-MA", file => "zgh_MA" },
6d046dd98… Alex*1510 { name => "za" },
1511 { name => "za-CN" },
4172c0482… Alex*1512 { name => "zh", lcid => 0x00007804, ireadinglayout => 2, oemcp => 936, slist => ",", sscripts => "Hani Hans", sabbrevlangname => "CHS", sopentypelang => "ZHS", nativedigits => "0123456789" },
8cdb593f5… Alex*1513 { name => "zh-CN", lcid => 0x00000804, file => "zh_Hans_CN", sparent => "zh-Hans" },
1514 { name => "zh-CN_phoneb", lcid => 0x00050804, alias => "zh-CN" },
1515 { name => "zh-CN_stroke", lcid => 0x00020804, alias => "zh-CN" },
1516 { name => "zh-Hans", lcid => 0x00000004, group => 10 },
1517 { name => "zh-Hans-CN", alias => "zh-CN" },
1518 { name => "zh-Hans-CN-u-co-phonebk", alias => "zh-CN_phoneb" },
1519 { name => "zh-Hans-CN-u-co-stroke", alias => "zh-CN_stroke" },
4172c0482… Alex*1520 { name => "zh-Hans-HK", slist => ";", nativedigits => "" },
1521 { name => "zh-Hans-MO", slist => ";", nativedigits => "" },
8cdb593f5… Alex*1522 { name => "zh-Hans-SG", alias => "zh-SG" },
1523 { name => "zh-Hans-SG-u-co-phonebk", alias => "zh-SG_phoneb" },
1524 { name => "zh-Hans-SG-u-co-stroke", alias => "zh-SG_stroke" },
1525 { name => "zh-Hant", lcid => 0x00007c04, sortlocale => "zh-HK", ireadinglayout => 2, oemcp => 950, slist => ",", sscripts => "Hani Hant", group => 9, sabbrevlangname => "CHT", sopentypelang => "ZHH" },
1526 { name => "zh-Hant-HK", alias => "zh-HK" },
1527 { name => "zh-Hant-HK-u-co-unihan", alias => "zh-HK_radstr" },
1528 { name => "zh-Hant-MO", alias => "zh-MO" },
1529 { name => "zh-Hant-MO-u-co-stroke", alias => "zh-MO_stroke" },
1530 { name => "zh-Hant-MO-u-co-unihan", alias => "zh-MO_radstr" },
1531 { name => "zh-Hant-TW", alias => "zh-TW" },
1532 { name => "zh-Hant-TW-u-co-phonetic", alias => "zh-TW_pronun" },
1533 { name => "zh-Hant-TW-u-co-unihan", alias => "zh-TW_radstr" },
1534 { name => "zh-HK", lcid => 0x00000c04, file => "zh_Hant_HK", sparent => "zh-Hant", sabbrevlangname => "ZHH" },
1535 { name => "zh-HK_radstr", lcid => 0x00040c04, alias => "zh-HK" },
1536 { name => "zh-MO", lcid => 0x00001404, file => "zh_Hant_MO", sparent => "zh-Hant", sabbrevlangname => "ZHM", sopentypelang => "ZHT" },
1537 { name => "zh-MO_radstr", lcid => 0x00041404, alias => "zh-MO" },
1538 { name => "zh-MO_stroke", lcid => 0x00021404, alias => "zh-MO" },
1539 { name => "zh-SG", lcid => 0x00001004, file => "zh_Hans_SG", sparent => "zh-Hans", sabbrevlangname => "ZHI" },
1540 { name => "zh-SG_phoneb", lcid => 0x00051004, alias => "zh-SG" },
1541 { name => "zh-SG_stroke", lcid => 0x00021004, alias => "zh-SG" },
1542 { name => "zh-TW", lcid => 0x00000404, file => "zh_Hant_TW", sparent => "zh-Hant", sopentypelang => "ZHT" },
1543 { name => "zh-TW_pronun", lcid => 0x00030404, alias => "zh-TW" },
1544 { name => "zh-TW_radstr", lcid => 0x00040404, alias => "zh-TW" },
1545 { name => "zu", lcid => 0x00000035, oemcp => 850 },
1546 { name => "zu-ZA", lcid => 0x00000435 },
1547 );
1548
65fc470df… Alex*1549 my @calendars =
1550 (
1551 { id => 1, name => "Gregorian", itwodigityearmax => 2049 },
1552 { id => 2, type => "gregorian", locale => "en-US", itwodigityearmax => 2049 },
1553 { id => 3, type => "japanese", locale => "ja-JP", eras => [ 232..236 ] },
1554 { id => 4, type => "roc", locale => "zh-TW", eras => [ 1 ] },
1555 { id => 5, type => "dangi", locale => "ko-KR", eras => [ 0 ] },
1556 { id => 6, type => "islamic", locale => "ar-SA", itwodigityearmax => 1451 },
1557 { id => 7, type => "buddhist", locale => "th-TH", eras => [ 0 ] },
1558 { id => 8, type => "hebrew", locale => "he-IL", itwodigityearmax => 5810 },
1559 { id => 9, type => "gregorian", locale => "fr-FR", itwodigityearmax => 2049 },
1560 { id => 10, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1561 { id => 11, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1562 { id => 12, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1563 { id => 13, name => "Julian", locale => "en-US", itwodigityearmax => 2049 },
1564 { id => 14, name => "Japanese Lunisolar" },
1565 { id => 15, name => "Chinese Lunisolar" },
1566 { id => 16, name => "Saka" },
1567 { id => 17, name => "Lunar ETO Chinese" },
1568 { id => 18, name => "Lunar ETO Korean" },
1569 { id => 19, name => "Lunar ETO Rokuyou" },
1570 { id => 20, name => "Korean Lunisolar" },
1571 { id => 21, name => "Taiwan Lunisolar" },
1572 { id => 22, type => "persian", locale => "prs-AF", itwodigityearmax => 1429 },
1573 { id => 23, type => "islamic-umalqura", locale => "ar-SA", itwodigityearmax => 1451 },
1574 );
1575
ad02ef7be… Alex*1576 my @geoids =
1577 (
1578 { id => 2, name => "AG" },
1579 { id => 3, name => "AF" },
1580 { id => 4, name => "DZ" },
1581 { id => 5, name => "AZ" },
1582 { id => 6, name => "AL" },
1583 { id => 7, name => "AM" },
1584 { id => 8, name => "AD" },
1585 { id => 9, name => "AO" },
1586 { id => 10, name => "AS" },
1587 { id => 11, name => "AR" },
1588 { id => 12, name => "AU" },
1589 { id => 14, name => "AT" },
1590 { id => 17, name => "BH" },
1591 { id => 18, name => "BB" },
1592 { id => 19, name => "BW" },
1593 { id => 20, name => "BM" },
1594 { id => 21, name => "BE" },
1595 { id => 22, name => "BS" },
1596 { id => 23, name => "BD" },
1597 { id => 24, name => "BZ" },
1598 { id => 25, name => "BA" },
1599 { id => 26, name => "BO" },
1600 { id => 27, name => "MM" },
1601 { id => 28, name => "BJ" },
1602 { id => 29, name => "BY" },
1603 { id => 30, name => "SB" },
1604 { id => 32, name => "BR" },
1605 { id => 34, name => "BT" },
1606 { id => 35, name => "BG" },
1607 { id => 37, name => "BN" },
1608 { id => 38, name => "BI" },
1609 { id => 39, name => "CA" },
1610 { id => 40, name => "KH" },
1611 { id => 41, name => "TD" },
1612 { id => 42, name => "LK" },
1613 { id => 43, name => "CG" },
1614 { id => 44, name => "CD" },
1615 { id => 45, name => "CN" },
1616 { id => 46, name => "CL" },
1617 { id => 49, name => "CM" },
1618 { id => 50, name => "KM" },
1619 { id => 51, name => "CO" },
1620 { id => 54, name => "CR" },
1621 { id => 55, name => "CF" },
1622 { id => 56, name => "CU" },
1623 { id => 57, name => "CV" },
1624 { id => 59, name => "CY" },
1625 { id => 61, name => "DK" },
1626 { id => 62, name => "DJ" },
1627 { id => 63, name => "DM" },
1628 { id => 65, name => "DO" },
1629 { id => 66, name => "EC" },
1630 { id => 67, name => "EG" },
1631 { id => 68, name => "IE" },
1632 { id => 69, name => "GQ" },
1633 { id => 70, name => "EE" },
1634 { id => 71, name => "ER" },
1635 { id => 72, name => "SV" },
1636 { id => 73, name => "ET" },
1637 { id => 75, name => "CZ" },
1638 { id => 77, name => "FI" },
1639 { id => 78, name => "FJ" },
1640 { id => 80, name => "FM" },
1641 { id => 81, name => "FO" },
1642 { id => 84, name => "FR" },
1643 { id => 86, name => "GM" },
1644 { id => 87, name => "GA" },
1645 { id => 88, name => "GE" },
1646 { id => 89, name => "GH" },
1647 { id => 90, name => "GI" },
1648 { id => 91, name => "GD" },
1649 { id => 93, name => "GL" },
1650 { id => 94, name => "DE" },
1651 { id => 98, name => "GR" },
1652 { id => 99, name => "GT" },
1653 { id => 100, name => "GN" },
1654 { id => 101, name => "GY" },
1655 { id => 103, name => "HT" },
1656 { id => 104, name => "HK" },
1657 { id => 106, name => "HN" },
1658 { id => 108, name => "HR" },
1659 { id => 109, name => "HU" },
1660 { id => 110, name => "IS" },
1661 { id => 111, name => "ID" },
1662 { id => 113, name => "IN" },
1663 { id => 114, name => "IO" },
1664 { id => 116, name => "IR" },
1665 { id => 117, name => "IL" },
1666 { id => 118, name => "IT" },
1667 { id => 119, name => "CI" },
1668 { id => 121, name => "IQ" },
1669 { id => 122, name => "JP" },
1670 { id => 124, name => "JM" },
1671 { id => 125, name => "SJ" },
1672 { id => 126, name => "JO" },
1673 { id => 127, parent => "UM" },
1674 { id => 129, name => "KE" },
1675 { id => 130, name => "KG" },
1676 { id => 131, name => "KP" },
1677 { id => 133, name => "KI" },
1678 { id => 134, name => "KR" },
1679 { id => 136, name => "KW" },
1680 { id => 137, name => "KZ" },
1681 { id => 138, name => "LA" },
1682 { id => 139, name => "LB" },
1683 { id => 140, name => "LV" },
1684 { id => 141, name => "LT" },
1685 { id => 142, name => "LR" },
1686 { id => 143, name => "SK" },
1687 { id => 145, name => "LI" },
1688 { id => 146, name => "LS" },
1689 { id => 147, name => "LU" },
1690 { id => 148, name => "LY" },
1691 { id => 149, name => "MG" },
1692 { id => 151, name => "MO" },
1693 { id => 152, name => "MD" },
1694 { id => 154, name => "MN" },
1695 { id => 156, name => "MW" },
1696 { id => 157, name => "ML" },
1697 { id => 158, name => "MC" },
1698 { id => 159, name => "MA" },
1699 { id => 160, name => "MU" },
1700 { id => 162, name => "MR" },
1701 { id => 163, name => "MT" },
1702 { id => 164, name => "OM" },
1703 { id => 165, name => "MV" },
1704 { id => 166, name => "MX" },
1705 { id => 167, name => "MY" },
1706 { id => 168, name => "MZ" },
1707 { id => 173, name => "NE" },
1708 { id => 174, name => "VU" },
1709 { id => 175, name => "NG" },
1710 { id => 176, name => "NL" },
1711 { id => 177, name => "NO" },
1712 { id => 178, name => "NP" },
1713 { id => 180, name => "NR" },
1714 { id => 181, name => "SR" },
1715 { id => 182, name => "NI" },
1716 { id => 183, name => "NZ" },
1717 { id => 184, name => "PS" },
1718 { id => 185, name => "PY" },
1719 { id => 187, name => "PE" },
1720 { id => 190, name => "PK" },
1721 { id => 191, name => "PL" },
1722 { id => 192, name => "PA" },
1723 { id => 193, name => "PT" },
1724 { id => 194, name => "PG" },
1725 { id => 195, name => "PW" },
1726 { id => 196, name => "GW" },
1727 { id => 197, name => "QA" },
1728 { id => 198, name => "RE" },
1729 { id => 199, name => "MH" },
1730 { id => 200, name => "RO" },
1731 { id => 201, name => "PH" },
1732 { id => 202, name => "PR" },
1733 { id => 203, name => "RU" },
1734 { id => 204, name => "RW" },
1735 { id => 205, name => "SA" },
1736 { id => 206, name => "PM" },
1737 { id => 207, name => "KN" },
1738 { id => 208, name => "SC" },
1739 { id => 209, name => "ZA" },
1740 { id => 210, name => "SN" },
1741 { id => 212, name => "SI" },
1742 { id => 213, name => "SL" },
1743 { id => 214, name => "SM" },
1744 { id => 215, name => "SG" },
1745 { id => 216, name => "SO" },
1746 { id => 217, name => "ES" },
1747 { id => 218, name => "LC" },
1748 { id => 219, name => "SD" },
1749 { id => 220, name => "SJ" },
1750 { id => 221, name => "SE" },
1751 { id => 222, name => "SY" },
1752 { id => 223, name => "CH" },
1753 { id => 224, name => "AE" },
1754 { id => 225, name => "TT" },
1755 { id => 227, name => "TH" },
1756 { id => 228, name => "TJ" },
1757 { id => 231, name => "TO" },
1758 { id => 232, name => "TG" },
1759 { id => 233, name => "ST" },
1760 { id => 234, name => "TN" },
1761 { id => 235, name => "TR" },
1762 { id => 236, name => "TV" },
1763 { id => 237, name => "TW" },
1764 { id => 238, name => "TM" },
1765 { id => 239, name => "TZ" },
1766 { id => 240, name => "UG" },
1767 { id => 241, name => "UA" },
1768 { id => 242, name => "GB" },
1769 { id => 244, name => "US" },
1770 { id => 245, name => "BF" },
1771 { id => 246, name => "UY" },
1772 { id => 247, name => "UZ" },
1773 { id => 248, name => "VC" },
1774 { id => 249, name => "VE" },
1775 { id => 251, name => "VN" },
1776 { id => 252, name => "VI" },
1777 { id => 253, name => "VA" },
1778 { id => 254, name => "NA" },
1779 { id => 257, name => "EH" },
1780 { id => 258, parent => "UM" },
1781 { id => 259, name => "WS" },
1782 { id => 260, name => "SZ" },
1783 { id => 261, name => "YE" },
1784 { id => 263, name => "ZM" },
1785 { id => 264, name => "ZW" },
1786 { id => 269, name => "CS" },
1787 { id => 270, name => "ME" },
1788 { id => 271, name => "RS" },
1789 { id => 273, name => "CW" },
1790 { id => 276, name => "SS" },
1791 { id => 300, name => "AI" },
1792 { id => 301, name => "AQ" },
1793 { id => 302, name => "AW" },
1794 { id => 303, parent => "SH" },
1795 { id => 304, parent => "053" },
1796 { id => 305, parent => "UM" },
1797 { id => 306, name => "BV" },
1798 { id => 307, name => "KY" },
1799 { id => 308, name => "830", parent => "155" },
1800 { id => 309, name => "CX" },
1801 { id => 310, parent => "009" },
1802 { id => 311, name => "CC" },
1803 { id => 312, name => "CK" },
1804 { id => 313, parent => "053" },
1805 { id => 314, parent => "IO" },
1806 { id => 315, name => "FK" },
1807 { id => 317, name => "GF" },
1808 { id => 318, name => "PF" },
1809 { id => 319, name => "TF" },
1810 { id => 321, name => "GP" },
1811 { id => 322, name => "GU" },
1812 { id => 323 },
1813 { id => 324, name => "GG" },
1814 { id => 325, name => "HM" },
1815 { id => 326, parent => "UM" },
1816 { id => 327, parent => "UM" },
1817 { id => 328, name => "JE" },
1818 { id => 329, parent => "UM" },
1819 { id => 330, name => "MQ" },
1820 { id => 331, name => "YT" },
1821 { id => 332, name => "MS" },
1822 { id => 333, name => "AN", region => 1 },
1823 { id => 334, name => "NC" },
1824 { id => 335, name => "NU" },
1825 { id => 336, name => "NF" },
1826 { id => 337, name => "MP" },
1827 { id => 338, parent => "UM" },
1828 { id => 339, name => "PN" },
1829 { id => 340, parent => "MP" },
1830 { id => 341, parent => "MP" },
1831 { id => 342, name => "GS" },
1832 { id => 343, name => "SH" },
1833 { id => 346, parent => "MP" },
1834 { id => 347, name => "TK" },
1835 { id => 348, parent => "SH" },
1836 { id => 349, name => "TC" },
1837 { id => 351, name => "VG" },
1838 { id => 352, name => "WF" },
1839 { id => 742, name => "002" },
1840 { id => 2129, name => "142" },
1841 { id => 10541, name => "150" },
1842 { id => 15126, name => "IM" },
1843 { id => 19618, name => "MK" },
1844 { id => 20900, name => "054" },
1845 { id => 21206, name => "057" },
1846 { id => 21242, parent => "UM" },
1847 { id => 23581, name => "021" },
1848 { id => 26286, name => "061" },
1849 { id => 27082, name => "013" },
1850 { id => 27114, name => "009" },
1851 { id => 30967, name => "SX" },
1852 { id => 31396, name => "005" },
1853 { id => 31706, name => "MF" },
1854 { id => 39070, name => "001" },
1855 { id => 42483, name => "011" },
1856 { id => 42484, name => "017" },
1857 { id => 42487, name => "015" },
1858 { id => 47590, name => "143" },
1859 { id => 47599, name => "035" },
1860 { id => 47600, name => "030" },
1861 { id => 47603, name => "014" },
1862 { id => 47609, name => "151" },
1863 { id => 47610, name => "039" },
1864 { id => 47611, name => "145" },
1865 { id => 47614, name => "034" },
1866 { id => 7299303, name => "TL" },
1867 { id => 9914689, name => "XK" },
1868 { id => 10026358, name => "019" },
1869 { id => 10028789, name => "AX" },
1870 { id => 10039880, name => "029", sintlsymbol => "XCD" },
1871 { id => 10039882, name => "154" },
1872 { id => 10039883, name => "018" },
1873 { id => 10210824, name => "155" },
1874 { id => 10210825, name => "053" },
1875 { id => 161832015, name => "BL" },
1876 { id => 161832256, name => "UM" },
1877 { id => 161832257, name => "419", parent => "019" },
1878 { id => 161832258, name => "BQ" },
1879 );
1880
dc727fa7b… Alex*1881 my @cp2uni = ();
09d97e968… Alex*1882 my @glyph2uni = ();
dc727fa7b… Alex*1883 my @lead_bytes = ();
1884 my @uni2cp = ();
1885 my @tolower_table = ();
1886 my @toupper_table = ();
1887 my @digitmap_table = ();
9e6d0e459… Alex*1888 my @halfwidth_table = ();
1889 my @fullwidth_table = ();
1890 my @cjk_compat_table = ();
1891 my @chinese_traditional_table = ();
1892 my @chinese_simplified_table = ();
575a97961… Alex*1893 my @category_table = ();
bea9c706e… Niko*1894 my @initial_joining_table = ();
dc727fa7b… Alex*1895 my @direction_table = ();
1896 my @decomp_table = ();
dc919db2b… Huw *1897 my @combining_class_table = ();
a646e4e6f… Alex*1898 my @decomp_compat_table = ();
2b9d720ea… Alex*1899 my @comp_exclusions = ();
f9f3e57cf… Alex*1900 my @idna_decomp_table = ();
1901 my @idna_disallowed = ();
712839d58… Alex*1902 my %registry_keys;
f54c2f65b… Alex*1903 my $default_char;
1904 my $default_wchar;
fb270ddca… Alex*1905
a5fe68870… Alex*1906 my %joining_forms =
1907 (
1908 "isolated" => [],
1909 "final" => [],
1910 "initial" => [],
1911 "medial" => []
1912 );
fb270ddca… Alex*1913
cfaa28933… Alex*1914 my $current_data_file;
1915
ce41b3211… Alex*1916 sub to_utf16(@)
0576fa43c… Alex*1917 {
ce41b3211… Alex*1918 my @ret;
1919 foreach my $ch (@_)
1920 {
1921 if ($ch < 0x10000)
1922 {
1923 push @ret, $ch;
1924 }
1925 else
1926 {
1927 my $val = $ch - 0x10000;
1928 push @ret, 0xd800 | ($val >> 10), 0xdc00 | ($val & 0x3ff);
1929 }
1930 }
1931 return @ret;
0576fa43c… Alex*1932 }
1933
65a82cb18… Alex*1934
1935
cfaa28933… Alex*1936 sub open_data_file($@)
65a82cb18… Alex*1937 {
cfaa28933… Alex*1938 my ($id, $name) = @_;
1939 my $data = $data_files{$id};
ec19bbf43… Alex*1940 my $cache = ($ENV{XDG_CACHE_HOME} || "$ENV{HOME}/.cache") . "/wine";
65a82cb18… Alex*1941 local *FILE;
eb7d7ef86… Alex*1942
cfaa28933… Alex*1943 my $url = $data->{url};
1944 my $filename = "$cache/" . ($data->{name} || ($url =~ s/.*\/([^\/]+)$/$1/r));
1945 unless (-f $filename)
65a82cb18… Alex*1946 {
cfaa28933… Alex*1947 print "Fetching $url...\n";
1948 system "mkdir", "-p", $cache;
1949 !system "wget", "-q", "-O", $filename, $url or die "cannot fetch $url";
eb7d7ef86… Alex*1950 }
cfaa28933… Alex*1951
3ccb5f238… Alex*1952 my $sha = Digest::SHA->new( "sha256" )->addfile( $filename )->hexdigest;
1953 die "invalid checksum $sha for $filename" unless $sha eq $data->{sha};
1954
cfaa28933… Alex*1955 if ($filename =~ /\.zip$/)
3ec7c467c… Alex*1956 {
cfaa28933… Alex*1957 open FILE, "-|", "unzip", "-p", $filename, $name or die "cannot extract $name from $filename";
1958 }
1959 elsif ($filename =~ /\.tar\.gz$/)
1960 {
1961 open FILE, "-|", "tar", "-x", "-f", $filename, "-O", $name or die "cannot extract $name from $filename";
3ec7c467c… Alex*1962 }
eb7d7ef86… Alex*1963 else
1964 {
cfaa28933… Alex*1965 open FILE, "<$filename" or die "cannot open $filename";
65a82cb18… Alex*1966 }
cfaa28933… Alex*1967 $current_data_file = $name ? "$url:$name" : $url;
65a82cb18… Alex*1968 return *FILE;
1969 }
1970
8cdb593f5… Alex*1971
1972
cfaa28933… Alex*1973 sub load_xml_data_file($@)
8cdb593f5… Alex*1974 {
cfaa28933… Alex*1975 my ($id, $name) = @_;
1976 my $FILE = open_data_file( $id, $name );
8cdb593f5… Alex*1977 my $xml = XML::LibXML->load_xml( IO => $FILE );
1978 close FILE;
1979 return $xml;
1980 }
1981
fb270ddca… Alex*1982
a646e4e6f… Alex*1983
1984 sub get_decomposition($$);
1985 sub get_decomposition($$)
1986 {
1987 my ($char, $table) = @_;
1988 my @ret;
1989
ce41b3211… Alex*1990 return $char unless defined $table->[$char];
1991 foreach my $ch (@{$table->[$char]})
a646e4e6f… Alex*1992 {
1993 push @ret, get_decomposition( $ch, $table );
1994 }
1995 return @ret;
1996 }
1997
ce41b3211… Alex*1998
1999
2000 sub get_composition($$)
2001 {
2002 my ($ch, $compat) = @_;
2003 return () unless defined $decomp_table[$ch];
2004 my @ret = @{$decomp_table[$ch]};
2005 return () if @ret < 2;
2b9d720ea… Alex*2006 return () if $comp_exclusions[$ch];
30f298f5d… Alex*2007 return () if $combining_class_table[$ch];
2008 return () if $combining_class_table[$ret[0]];
f9f3e57cf… Alex*2009 return () if $compat == 1 && !defined $decomp_table[$ret[0]] &&
2010 defined $decomp_compat_table[$ret[0]];
2011 return () if $compat == 2 && !defined $decomp_table[$ret[0]] &&
2012 defined $idna_decomp_table[$ret[0]];
2013 return () if $compat == 2 && defined $idna_decomp_table[$ret[0]] &&
2014 defined $idna_decomp_table[$idna_decomp_table[$ret[0]]->[0]];
2015 return () if $compat == 2 && defined $idna_decomp_table[$ret[1]];
ce41b3211… Alex*2016 return @ret;
2017 }
2018
a646e4e6f… Alex*2019
2020
2021 sub build_decompositions(@)
2022 {
2023 my @src = @_;
2024 my @dst;
2025
c65873197… Alex*2026 for (my $i = 0; $i < @src; $i++)
a646e4e6f… Alex*2027 {
2028 next unless defined $src[$i];
ce41b3211… Alex*2029 my @decomp = to_utf16( get_decomposition( $i, \@src ));
a646e4e6f… Alex*2030 $dst[$i] = \@decomp;
2031 }
2032 return @dst;
2033 }
2034
f9f3e57cf… Alex*2035
2036
2037 sub compose_hangul(@)
2038 {
2039 my $SBASE = 0xac00;
2040 my $LBASE = 0x1100;
2041 my $VBASE = 0x1161;
2042 my $TBASE = 0x11a7;
2043 my $LCOUNT = 19;
2044 my $VCOUNT = 21;
2045 my $TCOUNT = 28;
2046 my $NCOUNT = $VCOUNT * $TCOUNT;
2047 my $SCOUNT = $LCOUNT * $NCOUNT;
2048
2049 my @seq = @_;
2050 my @ret;
2051 my $i;
2052
2053 for ($i = 0; $i < @seq; $i++)
2054 {
2055 my $ch = $seq[$i];
2056 if ($ch >= $LBASE && $ch < $LBASE + $LCOUNT && $i < @seq - 1 &&
2057 $seq[$i+1] >= $VBASE && $seq[$i+1] < $VBASE + $VCOUNT)
2058 {
2059 $ch = $SBASE + (($seq[$i] - $LBASE) * $VCOUNT + ($seq[$i+1] - $VBASE)) * $TCOUNT;
2060 $i++;
2061 }
2062 if ($ch >= $SBASE && $ch < $SBASE + $SCOUNT && !(($ch - $SBASE) % $TCOUNT) && $i < @seq - 1 &&
2063 $seq[$i+1] > $TBASE && $seq[$i+1] < $TBASE + $TCOUNT)
2064 {
2065 $ch += $seq[$i+1] - $TBASE;
2066 $i++;
2067 }
2068 push @ret, $ch;
2069 }
2070 return @ret;
2071 }
2072
57a6033c0… Alex*2073
2074
2075 sub remove_linguistic_mappings($$)
2076 {
2077 my ($upper, $lower) = @_;
2078
2079
2080
2081 for (my $i = 0; $i < @{$upper}; $i++)
2082 {
2083 next unless defined ${$upper}[$i];
2084 my $ch = ${$upper}[$i];
2085 ${$upper}[$i] = undef unless defined ${$lower}[$ch] && ${$lower}[$ch] == $i;
2086 }
2087 for (my $i = 0; $i < @{$lower}; $i++)
2088 {
2089 next unless defined ${$lower}[$i];
2090 my $ch = ${$lower}[$i];
2091 ${$lower}[$i] = undef unless defined ${$upper}[$ch] && ${$upper}[$ch] == $i;
2092 }
2093 }
2094
a646e4e6f… Alex*2095
2096
cb524e09a… Alex*2097 sub load_data()
fb270ddca… Alex*2098 {
dc727fa7b… Alex*2099 my $start;
fb270ddca… Alex*2100
2101
2102
cfaa28933… Alex*2103 my $UNICODE_DATA = open_data_file( "ucd", "UnicodeData.txt" );
65a82cb18… Alex*2104 while (<$UNICODE_DATA>)
fb270ddca… Alex*2105 {
2106
dc727fa7b… Alex*2107 my ($code, $name, $cat, $comb, $bidi,
2108 $decomp, $dec, $dig, $num, $mirror,
2109 $oldname, $comment, $upper, $lower, $title) = split /;/;
c97bb4c5d… Alex*2110 my $src = hex $code;
261abcd5b… Alex*2111
c97bb4c5d… Alex*2112 die "unknown category $cat" unless defined $categories{$cat};
2113 die "unknown directionality $bidi" unless defined $directions{$bidi};
2114
2115 $category_table[$src] = $categories{$cat};
f09dade9c… Alex*2116 $direction_table[$src] = $bidi;
9c11fc642… Niko*2117 if ($cat eq "Mn" || $cat eq "Me" || $cat eq "Cf")
2118 {
bea9c706e… Niko*2119 $initial_joining_table[$src] = $joining_types{"T"};
9c11fc642… Niko*2120 }
2121 else
2122 {
bea9c706e… Niko*2123 $initial_joining_table[$src] = $joining_types{"U"};
9c11fc642… Niko*2124 }
c97bb4c5d… Alex*2125
2126 if ($lower ne "")
2127 {
2128 $tolower_table[$src] = hex $lower;
2129 }
2130 if ($upper ne "")
2131 {
2132 $toupper_table[$src] = hex $upper;
2133 }
2134 if ($dec ne "")
2135 {
2136 $category_table[$src] |= $ctype{"digit"};
2137 }
a02ce8108… Jon *2138 if ($dig ne "")
2139 {
2140 $digitmap_table[$src] = ord $dig;
2141 }
f9f3e57cf… Alex*2142 $combining_class_table[$src] = ($cat ne "Co") ? $comb : 0x100;
fb270ddca… Alex*2143
d87d4a4a0… Alex*2144 $category_table[$src] |= $ctype{"nonspacing"} if $bidi eq "NSM";
2145 $category_table[$src] |= $ctype{"diacritic"} if $name =~ /^(COMBINING)|(MODIFIER LETTER)\W/;
2146 $category_table[$src] |= $ctype{"vowelmark"} if $name =~ /\sVOWEL/ || $oldname =~ /\sVOWEL/;
2147 $category_table[$src] |= $ctype{"halfwidth"} if $name =~ /^HALFWIDTH\s/;
2148 $category_table[$src] |= $ctype{"fullwidth"} if $name =~ /^FULLWIDTH\s/;
2149 $category_table[$src] |= $ctype{"hiragana"} if $name =~ /(HIRAGANA)|(\WKANA\W)/;
2150 $category_table[$src] |= $ctype{"katakana"} if $name =~ /(KATAKANA)|(\WKANA\W)/;
2151 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^<CJK Ideograph/;
2152 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
2153 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^HANGZHOU/;
2154 $category_table[$src] |= $ctype{"highsurrogate"} if $name =~ /High Surrogate/;
2155 $category_table[$src] |= $ctype{"lowsurrogate"} if $name =~ /Low Surrogate/;
2156
c97bb4c5d… Alex*2157
2158 if ($name =~ /, First>/) { $start = $src; }
2159 if ($name =~ /, Last>/)
2160 {
2161 while ($start < $src)
2162 {
2163 $category_table[$start] = $category_table[$src];
2164 $direction_table[$start] = $direction_table[$src];
f9f3e57cf… Alex*2165 $combining_class_table[$start] = $combining_class_table[$src];
c97bb4c5d… Alex*2166 $start++;
2167 }
2168 }
7cae558bd… Alex*2169
c97bb4c5d… Alex*2170 next if $decomp eq "";
fb270ddca… Alex*2171
eceb69e17… Huw *2172 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)/)
2173 {
a646e4e6f… Alex*2174 my @seq = map { hex $_; } (split /\s+/, (split /\s+/, $decomp, 2)[1]);
2175 $decomp_compat_table[$src] = \@seq;
eceb69e17… Huw *2176 }
2177
b956620d8… Alex*2178 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
fb270ddca… Alex*2179 {
2180
b956620d8… Alex*2181 my $dst = hex $2;
2182 if ($1 eq "narrow")
2183 {
2184 $halfwidth_table[$dst] = $src;
2185 $fullwidth_table[$src] = $dst;
2186 }
2187 elsif ($1 eq "wide")
a5fe68870… Alex*2188 {
b956620d8… Alex*2189 next if $dst == 0x5c;
2190 $fullwidth_table[$dst] = $src;
2191 $halfwidth_table[$src] = $dst;
2192 }
2193 elsif ($1 eq "font" || $1 eq "square" || $1 eq "circle")
2194 {
2195 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2196 }
2197 elsif ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial")
2198 {
2199 ${joining_forms{$1}}[$dst] = $src;
a5fe68870… Alex*2200 }
fb270ddca… Alex*2201 }
2202 elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
2203 {
2204
2205 }
2206 elsif ($decomp =~ /^([0-9a-fA-F]+)/)
2207 {
d87d4a4a0… Alex*2208
e709cdbae… Alex*2209 if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
2210 {
a646e4e6f… Alex*2211 $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ];
e709cdbae… Alex*2212 }
db3ae2ca9… Huw *2213 elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
a02ce8108… Jon *2214 {
9e6d0e459… Alex*2215 my $dst = hex $1;
db3ae2ca9… Huw *2216
9e6d0e459… Alex*2217 $decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ];
b956620d8… Alex*2218 if ($name =~ /^CJK COMPATIBILITY IDEOGRAPH/)
2219 {
2220 $cjk_compat_table[$src] = $dst;
2221 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2222 }
a02ce8108… Jon *2223 }
fb270ddca… Alex*2224 }
2225 }
65a82cb18… Alex*2226 close $UNICODE_DATA;
c97bb4c5d… Alex*2227
2228
2229
d87d4a4a0… Alex*2230 for (my $i = 0; $i < @decomp_table; $i++)
2231 {
2232 next unless defined $decomp_table[$i];
2233 $category_table[$i] |= $category_table[$decomp_table[$i]->[0]];
2234 }
dc727fa7b… Alex*2235 foreach my $cat (keys %special_categories)
c97bb4c5d… Alex*2236 {
2237 my $flag = $ctype{$cat};
dc727fa7b… Alex*2238 foreach my $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; }
c97bb4c5d… Alex*2239 }
d87d4a4a0… Alex*2240 for (my $i = 0; $i < @decomp_compat_table; $i++)
2241 {
2242 next unless defined $decomp_compat_table[$i];
2243 next unless @{$decomp_compat_table[$i]} == 2;
2244 $category_table[$i] |= $category_table[$decomp_compat_table[$i]->[1]] & $ctype{"diacritic"};
2245 }
4b5c67ee0… Alex*2246
2b9d720ea… Alex*2247
2248
cfaa28933… Alex*2249 my $EXCL = open_data_file( "ucd", "CompositionExclusions.txt" );
2b9d720ea… Alex*2250 while (<$EXCL>)
2251 {
2252 s/\#.*//;
2253 if (/^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s*$/)
2254 {
2255 foreach my $i (hex $1 .. hex $2) { $comp_exclusions[$i] = 1; }
2256 }
2257 elsif (/^([0-9a-fA-F]+)\s*$/)
2258 {
2259 $comp_exclusions[hex $1] = 1;
2260 }
2261 }
2262 close $EXCL;
f9f3e57cf… Alex*2263
2264
2265
2266 @idna_decomp_table = @decomp_compat_table;
cfaa28933… Alex*2267 my $IDNA = open_data_file( "idna", "IdnaMappingTable.txt" );
f9f3e57cf… Alex*2268 while (<$IDNA>)
2269 {
2270 s/\#.*//;
2271 next if /^\s*$/;
2272 my ($char, $type, $mapping) = split /;/;
2273 my ($ch1, $ch2);
2274 if ($char =~ /([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)/)
2275 {
2276 $ch1 = hex $1;
2277 $ch2 = hex $2;
2278 }
2279 elsif ($char =~ /([0-9a-fA-F]+)/)
2280 {
2281 $ch1 = $ch2 = hex $1;
2282 }
2283
2284 if ($type =~ /mapped/ || $type =~ /deviation/)
2285 {
2286 $mapping =~ s/^\s*(([0-9a-fA-F]+\s+)+)\s*$/$1/;
2287 my @seq = map { hex $_; } split /\s+/, $mapping;
2288 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = @seq ? \@seq : [ 0 ]; }
2289 }
2290 elsif ($type =~ /valid/)
2291 {
2292 }
2293 elsif ($type =~ /ignored/)
2294 {
2295 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = [ 0 ]; }
2296 }
2297 elsif ($type =~ /disallowed/)
2298 {
2299 foreach my $i ($ch1 .. $ch2)
2300 {
2301 $idna_decomp_table[$i] = undef;
2302 $idna_disallowed[$i] = 1;
2303 }
2304 }
2305 }
2306 close $IDNA;
9e6d0e459… Alex*2307
2308
2309
cfaa28933… Alex*2310 my $UNIHAN = open_data_file( "unihan", "Unihan_Variants.txt" );
9e6d0e459… Alex*2311 while (<$UNIHAN>)
2312 {
2313 s/\#.*//;
2314 next if /^\s*$/;
b956620d8… Alex*2315 if (/^U\+([0-9a-fA-F]{4})\s+kTraditionalVariant\s+U\+([0-9a-fA-F]{4})$/)
9e6d0e459… Alex*2316 {
b956620d8… Alex*2317 next if hex $1 < 0x4dc0;
9e6d0e459… Alex*2318 $chinese_traditional_table[hex $1] = hex $2;
2319 }
b956620d8… Alex*2320 elsif (/^U\+([0-9a-fA-F]{4})\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]{4})$/)
9e6d0e459… Alex*2321 {
b956620d8… Alex*2322 next if hex $1 < 0x4dc0;
9e6d0e459… Alex*2323 $chinese_simplified_table[hex $1] = hex $2;
2324 }
2325 }
2326 close $UNIHAN;
b956620d8… Alex*2327 foreach my $i (0xf900..0xfaff)
2328 {
2329 next unless defined $cjk_compat_table[$i];
2330 next if defined $chinese_simplified_table[$cjk_compat_table[$i]];
2331 $chinese_simplified_table[$i] = $cjk_compat_table[$i];
2332 }
fb270ddca… Alex*2333 }
2334
2335
712839d58… Alex*2336
2337
630f605c2… Alex*2338 sub add_registry_key($$$)
2339 {
2340 my ($base, $key, $defval) = @_;
2341 $registry_keys{"$base\\$key"} = [ $defval ] unless defined $registry_keys{"$base\\$key"};
2342 }
2343
2344
2345
2346 sub add_registry_value($$$$)
712839d58… Alex*2347 {
630f605c2… Alex*2348 my ($base, $key, $name, $value) = @_;
2349 add_registry_key( $base, $key, undef );
2350 push @{$registry_keys{"$base\\$key"}}, "'$name' = $value";
712839d58… Alex*2351 }
2352
2353
630f605c2… Alex*2354
2355 sub add_registry_string_value($$$$)
712839d58… Alex*2356 {
630f605c2… Alex*2357 my ($base, $key, $name, $value) = @_;
2358 $value =~ s/\'/\'\'/g;
2359 add_registry_value( $base, $key, $name, "s '$value'" );
2360 }
2361
2362
2363
2364 sub add_registry_dword_value($$$$)
2365 {
2366 my ($base, $key, $name, $value) = @_;
2367 add_registry_value( $base, $key, $name, "d $value" );
2368 }
2369
2370
2371
2372 sub add_registry_binary_value($$$$)
2373 {
2374 my ($base, $key, $name, $value) = @_;
2375 add_registry_value( $base, $key, $name, "b " . join "", map { sprintf "%02x", $_; } unpack( "C*", $value ));
712839d58… Alex*2376 }
2377
fb270ddca… Alex*2378
f54c2f65b… Alex*2379
2380 sub add_lead_byte($)
2381 {
2382 my $ch = shift;
a9285c513… Alex*2383 return if defined $cp2uni[$ch];
f54c2f65b… Alex*2384 push @lead_bytes, $ch;
2385 $cp2uni[$ch] = 0;
2386 }
2387
2388
2389
2390 sub add_mapping($$)
2391 {
2392 my ($cp, $uni) = @_;
2393 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
2394 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
a9285c513… Alex*2395 if ($cp > 0xff) { add_lead_byte( $cp >> 8 ); }
f54c2f65b… Alex*2396 }
2397
e6df00aa4… Alex*2398
2399
2400 sub get_glyphs_mapping(@)
2401 {
09d97e968… Alex*2402 my @table = @_;
2403
2404 for (my $i = 0; $i < @glyph2uni; $i++)
2405 {
2406 $table[$i] = $glyph2uni[$i] if defined $glyph2uni[$i];
2407 }
2408 return @table;
e6df00aa4… Alex*2409 }
494c42d01… Alex*2410
141966b05… Alex*2411
d94b605a8… Alex*2412
5b4bdb9fd… Alex*2413 sub dump_eucjp_codepage()
141966b05… Alex*2414 {
5b4bdb9fd… Alex*2415 @cp2uni = ();
2416 @glyph2uni = ();
2417 @lead_bytes = ();
2418 @uni2cp = ();
d94b605a8… Alex*2419 $default_char = $DEF_CHAR;
2420 $default_wchar = 0x30fb;
141966b05… Alex*2421
2422
d94b605a8… Alex*2423 foreach my $i (0x00 .. 0x7f) { add_mapping( $i, $i ); }
2424
2425
2426 foreach my $i (0x8e, 0xa1 .. 0xfe) { add_lead_byte($i); }
141966b05… Alex*2427
2428
d94b605a8… Alex*2429 foreach my $i (0xa1 .. 0xdf) { add_mapping( 0x8e00 + $i, 0xfec0 + $i ); }
141966b05… Alex*2430
d94b605a8… Alex*2431
2432 foreach my $i (0x80 .. 0x8d, 0x8f .. 0x9f) { $cp2uni[$i] = $i; }
2433 $cp2uni[0xa0] = 0xf8f0;
2434 $cp2uni[0xff] = 0xf8f3;
2435
2436
2437 add_mapping( 0xa1c0, 0xff3c );
2438
2439
2440 my $private = 0xe000;
2441 foreach my $hi (0xf5 .. 0xfe)
141966b05… Alex*2442 {
d94b605a8… Alex*2443 foreach my $lo (0xa1 .. 0xfe)
2444 {
2445 add_mapping( ($hi << 8) + $lo, $private++ );
2446 }
141966b05… Alex*2447 }
d94b605a8… Alex*2448 foreach my $hi (0xf5 .. 0xfe)
141966b05… Alex*2449 {
d94b605a8… Alex*2450 foreach my $lo (0x21 .. 0x7e)
2451 {
2452 add_mapping( ($hi << 8) + $lo, $private++ );
2453 }
141966b05… Alex*2454 }
2455
cfaa28933… Alex*2456 my $INPUT = open_data_file( "jis0208" );
65a82cb18… Alex*2457 while (<$INPUT>)
141966b05… Alex*2458 {
2459 next if /^\
2460 next if /^$/;
2461 next if /\x1a/;
2462 if (/^0x[0-9a-fA-F]+\s+0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\
2463 {
d94b605a8… Alex*2464 add_mapping( 0x8080 + hex $1, hex $2 );
2465 next;
2466 }
5b4bdb9fd… Alex*2467 die "Unrecognized line $_\n";
d94b605a8… Alex*2468 }
2469 close $INPUT;
2470
cfaa28933… Alex*2471 $INPUT = open_data_file( "jis0212" );
d94b605a8… Alex*2472 while (<$INPUT>)
2473 {
2474 next if /^\
2475 next if /^$/;
2476 next if /\x1a/;
2477 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\
2478 {
2479 add_mapping( 0x8000 + hex $1, hex $2 );
141966b05… Alex*2480 next;
2481 }
5b4bdb9fd… Alex*2482 die "Unrecognized line $_\n";
141966b05… Alex*2483 }
65a82cb18… Alex*2484 close $INPUT;
09d97e968… Alex*2485
5b4bdb9fd… Alex*2486 output_codepage_file( 20932 );
141966b05… Alex*2487 }
2488
fd3a64020… Sang*2489
2490
6f6854369… Alex*2491 sub dump_krwansung_codepage(@)
fd3a64020… Sang*2492 {
6f6854369… Alex*2493 my @cp949 = @_;
fd3a64020… Sang*2494 @cp2uni = ();
2495 @glyph2uni = ();
2496 @lead_bytes = ();
2497 @uni2cp = ();
2498 $default_char = 0x3f;
2499 $default_wchar = 0x003f;
2500
2501
2502 foreach my $i (0x00 .. 0x9f) { add_mapping( $i, $i ); }
2503 add_mapping( 0xa0, 0xf8e6 );
2504 add_mapping( 0xad, 0xf8e7 );
2505 add_mapping( 0xae, 0xf8e8 );
2506 add_mapping( 0xaf, 0xf8e9 );
2507 add_mapping( 0xfe, 0xf8ea );
2508 add_mapping( 0xff, 0xf8eb );
2509
cfaa28933… Alex*2510 my $INPUT = open_data_file( "ksx1001" );
fd3a64020… Sang*2511 while (<$INPUT>)
2512 {
2513 next if /^\
2514 next if /^$/;
2515 next if /\x1a/;
2516 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\
2517 {
2518 add_mapping( 0x8080 + hex $1, hex $2 );
2519 next;
2520 }
2521 die "Unrecognized line $_\n";
2522 }
2523 close $INPUT;
2524
6f6854369… Alex*2525
2526 my @defined_lb;
2527 map { $defined_lb[$_] = 1; } @lead_bytes;
2528 foreach my $i (0x0000 .. 0xffff)
2529 {
2530 next if ($i >= 0x1100 && $i <= 0x11ff);
2531 next unless defined $cp949[$i];
2532 if ($cp949[$i] >= 0xff)
2533 {
2534
2535 my $hi = $cp949[$i] >> 8;
2536 my $lo = $cp949[$i] & 0xff;
2537 next unless $defined_lb[$hi];
2538 next unless $lo >= 0xa1 && $lo <= 0xfe;
2539 }
2540 add_mapping( $cp949[$i], $i );
2541 }
2542
fd3a64020… Sang*2543 output_codepage_file( 20949 );
2544 }
141966b05… Alex*2545
8c4b96fc4… Alex*2546
261abcd5b… Alex*2547
2548
9c3a25b85… Alex*2549 sub dump_array($$@)
261abcd5b… Alex*2550 {
9c3a25b85… Alex*2551 my ($bit_width, $default, @array) = @_;
2552 my $format = sprintf "0x%%0%ux", $bit_width / 4;
dc727fa7b… Alex*2553 my $i;
2554 my $ret = " ";
261abcd5b… Alex*2555 for ($i = 0; $i < $#array; $i++)
2556 {
2557 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2558 $ret .= (($i % 8) != 7) ? ", " : ",\n ";
2559 }
2560 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2561 return $ret;
2562 }
fb270ddca… Alex*2563
194c43ac3… Alex*2564
fb316c337… Alex*2565
2566
2567 sub dump_binary_sbcs_table($)
2568 {
2569 my $codepage = shift;
2570
2571 my @header = ( 13, $codepage, 1, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2572 my $wc_offset = 256 + 3 + (@glyph2uni ? 256 : 0);
2573
2574 print OUTPUT pack "S<*", @header;
2575 print OUTPUT pack "C12", (0) x 12;
2576 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2577
2578 if (@glyph2uni)
2579 {
2580 print OUTPUT pack "S<*", 256, get_glyphs_mapping(@cp2uni[0 .. 255]);
2581 }
2582 else
2583 {
2584 print OUTPUT pack "S<*", 0;
2585 }
2586
2587 print OUTPUT pack "S<*", 0, 0;
2588
2589 print OUTPUT pack "C*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2590 }
2591
2592
2593
2594
2595 sub dump_binary_dbcs_table($)
2596 {
2597 my $codepage = shift;
2598 my @lb_ranges = get_lb_ranges();
2599 my @header = ( 13, $codepage, 2, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2600
2601 my @offsets = (0) x 256;
2602 my $pos = 0;
2603 foreach my $i (@lead_bytes)
2604 {
2605 $offsets[$i] = ($pos += 256);
2606 $cp2uni[$i] = 0;
2607 }
2608
2609 my $wc_offset = 256 + 3 + 256 * (1 + scalar @lead_bytes);
2610
2611 print OUTPUT pack "S<*", @header;
2612 print OUTPUT pack "C12", @lb_ranges, 0 x 12;
2613 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2614 print OUTPUT pack "S<*", 0, scalar @lb_ranges / 2, @offsets;
2615
2616 foreach my $i (@lead_bytes)
2617 {
2618 my $base = $i << 8;
2619 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_wchar; } @cp2uni[$base .. $base + 255];
2620 }
2621
2622 print OUTPUT pack "S<", 4;
2623 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2624 }
2625
2626
fb270ddca… Alex*2627
97d31ec78… Alex*2628
2629 sub get_lb_ranges()
fb270ddca… Alex*2630 {
2631 my @list = ();
97d31ec78… Alex*2632 my @ranges = ();
fb316c337… Alex*2633
2634 foreach my $i (@lead_bytes) { $list[$i] = 1; }
fb270ddca… Alex*2635 my $on = 0;
dc727fa7b… Alex*2636 for (my $i = 0; $i < 256; $i++)
fb270ddca… Alex*2637 {
2638 if ($on)
2639 {
97d31ec78… Alex*2640 if (!defined $list[$i]) { push @ranges, $i-1; $on = 0; }
fb270ddca… Alex*2641 }
2642 else
2643 {
97d31ec78… Alex*2644 if ($list[$i]) { push @ranges, $i; $on = 1; }
fb270ddca… Alex*2645 }
2646 }
97d31ec78… Alex*2647 if ($on) { push @ranges, 0xff; }
2648 return @ranges;
fb270ddca… Alex*2649 }
2650
7339c04b7… Aric*2651
2652
2653 sub dump_indic($)
2654 {
2655 my $filename = shift;
575a97961… Alex*2656 my @indic_table;
7339c04b7… Aric*2657
cfaa28933… Alex*2658 my $INPUT = open_data_file( "ucd", "IndicSyllabicCategory.txt" );
7339c04b7… Aric*2659 while (<$INPUT>)
2660 {
2661 next if /^\
2662 next if /^\s*$/;
2663 next if /\x1a/;
2664 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*
2665 {
2666 my $type = $2;
2667 die "unknown indic $type" unless defined $indic_types{$type};
2668 if (hex $1 < 65536)
2669 {
2670 $indic_table[hex $1] = $indic_types{$type};
2671 }
2672 next;
2673 }
dec6f0773… Alex*2674 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*
7339c04b7… Aric*2675 {
2676 my $type = $3;
2677 die "unknown indic $type" unless defined $indic_types{$type};
cb65a3fbf… Alex*2678 if (hex $1 < 65536 and hex $2 < 65536)
7339c04b7… Aric*2679 {
2680 foreach my $i (hex $1 .. hex $2)
2681 {
2682 $indic_table[$i] = $indic_types{$type};
2683 }
2684 }
2685 next;
2686 }
2687 die "malformed line $_";
2688 }
2689 close $INPUT;
2690
cfaa28933… Alex*2691 my $prev_data_file = $current_data_file;
2692 $INPUT = open_data_file( "ucd", "IndicPositionalCategory.txt" );
7339c04b7… Aric*2693 while (<$INPUT>)
2694 {
2695 next if /^\
2696 next if /^\s*$/;
2697 next if /\x1a/;
dec6f0773… Alex*2698 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*
7339c04b7… Aric*2699 {
2700 my $type = $2;
2701 die "unknown matra $type" unless defined $matra_types{$type};
dec6f0773… Alex*2702 $indic_table[hex $1] |= $matra_types{$type} << 8;
7339c04b7… Aric*2703 next;
2704 }
dec6f0773… Alex*2705 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*
7339c04b7… Aric*2706 {
2707 my $type = $3;
2708 die "unknown matra $type" unless defined $matra_types{$type};
2709 foreach my $i (hex $1 .. hex $2)
2710 {
dec6f0773… Alex*2711 $indic_table[$i] |= $matra_types{$type} << 8;
7339c04b7… Aric*2712 }
2713 next;
2714 }
2715 die "malformed line $_";
2716 }
2717 close $INPUT;
2718
2719 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2720 print "Building $filename\n";
2721 print OUTPUT "/* Unicode Indic Syllabic Category */\n";
cfaa28933… Alex*2722 print OUTPUT "/* generated from $prev_data_file */\n";
2723 print OUTPUT "/* and from $current_data_file */\n";
7339c04b7… Aric*2724 print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6… Alex*2725 print OUTPUT "#include \"windef.h\"\n\n";
7339c04b7… Aric*2726
bfeb0a97f… Alex*2727 dump_two_level_mapping( "indic_syllabic_table", $indic_types{'Other'}, 16, @indic_table );
7339c04b7… Aric*2728
2729 close OUTPUT;
2730 save_file($filename);
2731 }
2732
95166855b… Aric*2733
2734
2735 sub dump_linebreak($)
2736 {
2737 my $filename = shift;
575a97961… Alex*2738 my @break_table;
95166855b… Aric*2739
cfaa28933… Alex*2740 my $INPUT = open_data_file( "ucd", "LineBreak.txt" );
95166855b… Aric*2741 while (<$INPUT>)
2742 {
2743 next if /^\
2744 next if /^\s*$/;
2745 next if /\x1a/;
58e0972c5… Niko*2746 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2747 {
2748 my $type = $2;
2749 die "unknown breaktype $type" unless defined $break_types{$type};
2750 $break_table[hex $1] = $break_types{$type};
2751 next;
2752 }
dec6f0773… Alex*2753 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
58e0972c5… Niko*2754 {
2755 my $type = $3;
2756 die "unknown breaktype $type" unless defined $break_types{$type};
2757 foreach my $i (hex $1 .. hex $2)
2758 {
2759 $break_table[$i] = $break_types{$type};
2760 }
2761 next;
2762 }
2763 elsif (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
95166855b… Aric*2764 {
2765 my $type = $2;
2766 die "unknown breaktype $type" unless defined $break_types{$type};
2767 $break_table[hex $1] = $break_types{$type};
2768 next;
2769 }
dec6f0773… Alex*2770 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
95166855b… Aric*2771 {
2772 my $type = $3;
2773 die "unknown breaktype $type" unless defined $break_types{$type};
2774 foreach my $i (hex $1 .. hex $2)
2775 {
2776 $break_table[$i] = $break_types{$type};
2777 }
2778 next;
2779 }
2780 die "malformed line $_";
2781 }
2782 close $INPUT;
2783
2784 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2785 print "Building $filename\n";
2786 print OUTPUT "/* Unicode Line Break Properties */\n";
cfaa28933… Alex*2787 print OUTPUT "/* generated from $current_data_file */\n";
95166855b… Aric*2788 print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6… Alex*2789 print OUTPUT "#include \"windef.h\"\n\n";
95166855b… Aric*2790
ba58338b2… Alex*2791 dump_three_level_mapping( "wine_linebreak_table", $break_types{'XX'}, 16, @break_table );
95166855b… Aric*2792
2793 close OUTPUT;
2794 save_file($filename);
2795 }
2796
c38be9106… Niko*2797 my %scripts =
2798 (
2799 "Unknown" => 0,
2800 "Common" => 1,
2801 "Inherited" => 2,
2802 "Arabic" => 3,
2803 "Armenian" => 4,
2804 "Avestan" => 5,
2805 "Balinese" => 6,
2806 "Bamum" => 7,
2807 "Batak" => 8,
2808 "Bengali" => 9,
2809 "Bopomofo" => 10,
2810 "Brahmi" => 11,
2811 "Braille" => 12,
2812 "Buginese" => 13,
2813 "Buhid" => 14,
2814 "Canadian_Aboriginal" => 15,
2815 "Carian" => 16,
2816 "Cham" => 17,
2817 "Cherokee" => 18,
2818 "Coptic" => 19,
2819 "Cuneiform" => 20,
2820 "Cypriot" => 21,
2821 "Cyrillic" => 22,
2822 "Deseret" => 23,
2823 "Devanagari" => 24,
2824 "Egyptian_Hieroglyphs" => 25,
2825 "Ethiopic" => 26,
2826 "Georgian" => 27,
2827 "Glagolitic" => 28,
2828 "Gothic" => 29,
2829 "Greek" => 30,
2830 "Gujarati" => 31,
2831 "Gurmukhi" => 32,
2832 "Han" => 33,
2833 "Hangul" => 34,
2834 "Hanunoo" => 35,
2835 "Hebrew" => 36,
2836 "Hiragana" => 37,
2837 "Imperial_Aramaic" => 38,
2838 "Inscriptional_Pahlavi" => 39,
2839 "Inscriptional_Parthian" => 40,
2840 "Javanese" => 41,
2841 "Kaithi" => 42,
2842 "Kannada" => 43,
2843 "Katakana" => 44,
2844 "Kayah_Li" => 45,
2845 "Kharoshthi" => 46,
2846 "Khmer" => 47,
2847 "Lao" => 48,
2848 "Latin" => 49,
2849 "Lepcha" => 50,
2850 "Limbu" => 51,
2851 "Linear_B" => 52,
2852 "Lisu" => 53,
2853 "Lycian" => 54,
2854 "Lydian" => 55,
2855 "Malayalam" => 56,
2856 "Mandaic" => 57,
2857 "Meetei_Mayek" => 58,
2858 "Mongolian" => 59,
2859 "Myanmar" => 60,
2860 "New_Tai_Lue" => 61,
2861 "Nko" => 62,
2862 "Ogham" => 63,
2863 "Ol_Chiki" => 64,
2864 "Old_Italic" => 65,
2865 "Old_Persian" => 66,
2866 "Old_South_Arabian" => 67,
2867 "Old_Turkic" => 68,
2868 "Oriya" => 69,
2869 "Osmanya" => 70,
2870 "Phags_Pa" => 71,
2871 "Phoenician" => 72,
2872 "Rejang" => 73,
2873 "Runic" => 74,
2874 "Samaritan" => 75,
2875 "Saurashtra" => 76,
2876 "Shavian" => 77,
2877 "Sinhala" => 78,
2878 "Sundanese" => 79,
2879 "Syloti_Nagri" => 80,
2880 "Syriac" => 81,
2881 "Tagalog" => 82,
2882 "Tagbanwa" => 83,
2883 "Tai_Le" => 84,
2884 "Tai_Tham" => 85,
2885 "Tai_Viet" => 86,
2886 "Tamil" => 87,
2887 "Telugu" => 88,
2888 "Thaana" => 89,
2889 "Thai" => 90,
2890 "Tibetan" => 91,
2891 "Tifinagh" => 92,
2892 "Ugaritic" => 93,
2893 "Vai" => 94,
2894 "Yi" => 95,
2895
2896 "Chakma" => 96,
2897 "Meroitic_Cursive" => 97,
2898 "Meroitic_Hieroglyphs" => 98,
2899 "Miao" => 99,
2900 "Sharada" => 100,
2901 "Sora_Sompeng" => 101,
2902 "Takri" => 102,
2903
2904 "Bassa_Vah" => 103,
2905 "Caucasian_Albanian" => 104,
2906 "Duployan" => 105,
2907 "Elbasan" => 106,
2908 "Grantha" => 107,
2909 "Khojki" => 108,
2910 "Khudawadi" => 109,
2911 "Linear_A" => 110,
2912 "Mahajani" => 111,
2913 "Manichaean" => 112,
2914 "Mende_Kikakui" => 113,
2915 "Modi" => 114,
2916 "Mro" => 115,
2917 "Nabataean" => 116,
2918 "Old_North_Arabian" => 117,
2919 "Old_Permic" => 118,
2920 "Pahawh_Hmong" => 119,
2921 "Palmyrene" => 120,
2922 "Pau_Cin_Hau" => 121,
2923 "Psalter_Pahlavi" => 122,
2924 "Siddham" => 123,
2925 "Tirhuta" => 124,
2926 "Warang_Citi" => 125,
4c7e13601… Niko*2927
2928 "Adlam" => 126,
2929 "Ahom" => 127,
2930 "Anatolian_Hieroglyphs" => 128,
2931 "Bhaiksuki" => 129,
2932 "Hatran" => 130,
2933 "Marchen" => 131,
2934 "Multani" => 132,
2935 "Newa" => 133,
2936 "Old_Hungarian" => 134,
2937 "Osage" => 135,
2938 "SignWriting" => 136,
d4eeb162b… Niko*2939 "Tangut" => 137,
2940
2941 "Masaram_Gondi" => 138,
2942 "Nushu" => 139,
2943 "Soyombo" => 140,
2944 "Zanabazar_Square" => 141,
cd7dd5e89… Niko*2945
2946 "Dogra" => 142,
2947 "Gunjala_Gondi" => 143,
2948 "Hanifi_Rohingya" => 144,
2949 "Makasar" => 145,
2950 "Medefaidrin" => 146,
2951 "Old_Sogdian" => 147,
2952 "Sogdian" => 148,
75a34d940… Niko*2953
2954 "Elymaic" => 149,
2955 "Nyiakeng_Puachue_Hmong" => 150,
2956 "Nandinagari" => 151,
2957 "Wancho" => 152,
992e0a604… Niko*2958
2959 "Chorasmian" => 153,
2960 "Dives_Akuru" => 154,
2961 "Khitan_Small_Script" => 155,
2962 "Yezidi" => 156,
c38be9106… Niko*2963 );
2964
e36025a2a… Niko*2965
2966
2967 sub dump_scripts($)
2968 {
2969 my $filename = shift;
2970 my $header = $filename;
575a97961… Alex*2971 my @scripts_table;
59bdda006… Niko*2972 my $script_index;
e36025a2a… Niko*2973 my $i;
2974
cfaa28933… Alex*2975 my $INPUT = open_data_file( "ucd", "Scripts.txt" );
c38be9106… Niko*2976
2977
e36025a2a… Niko*2978 while (<$INPUT>)
2979 {
2980 my $type = "";
2981
2982 next if /^\
2983 next if /^\s*$/;
2984 next if /\x1a/;
c38be9106… Niko*2985 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
e36025a2a… Niko*2986 {
2987 $type = $2;
2988 if (defined $scripts{$type})
2989 {
2990 $scripts_table[hex $1] = $scripts{$type};
2991 }
2992 next;
2993 }
dec6f0773… Alex*2994 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
e36025a2a… Niko*2995 {
2996 $type = $3;
2997 if (defined $scripts{$type})
2998 {
2999 foreach my $i (hex $1 .. hex $2)
3000 {
3001 $scripts_table[$i] = $scripts{$type};
3002 }
3003 }
3004 next;
3005 }
3006 }
3007
3008 close $INPUT;
3009
3010 $header = "$filename.h";
3011 open OUTPUT,">$header.new" or die "Cannot create $header";
3012 print "Building $header\n";
3013 print OUTPUT "/* Unicode Script IDs */\n";
cfaa28933… Alex*3014 print OUTPUT "/* generated from $current_data_file */\n";
e36025a2a… Niko*3015 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3016
3017 print OUTPUT "enum unicode_script_id {\n";
7dd90faaa… Niko*3018 foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts)
e36025a2a… Niko*3019 {
3020 print OUTPUT " Script_$script = $scripts{$script},\n";
3021 }
c38be9106… Niko*3022 print OUTPUT " Script_LastId = ", (scalar keys %scripts) - 1, "\n";
e36025a2a… Niko*3023 print OUTPUT "};\n";
3024
3025 close OUTPUT;
3026 save_file($header);
3027
3028 $filename = "$filename.c";
3029 open OUTPUT,">$filename.new" or die "Cannot create $header";
3030 print "Building $filename\n";
3031 print OUTPUT "/* Unicode Script IDs */\n";
cfaa28933… Alex*3032 print OUTPUT "/* generated from $current_data_file */\n";
e36025a2a… Niko*3033 print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6… Alex*3034 print OUTPUT "#include \"windef.h\"\n\n";
e36025a2a… Niko*3035
c848f42aa… Alex*3036 dump_three_level_mapping( "wine_scripts_table", 0, 16, @scripts_table );
e36025a2a… Niko*3037 close OUTPUT;
3038 save_file($filename);
3039 }
fb270ddca… Alex*3040
6f3f505f0… Alex*3041
3042
3043 sub dump_mirroring($)
3044 {
3045 my $filename = shift;
3046 my @mirror_table = ();
3047
cfaa28933… Alex*3048 my $INPUT = open_data_file( "ucd", "BidiMirroring.txt" );
6f3f505f0… Alex*3049 while (<$INPUT>)
3050 {
3051 next if /^\
3052 next if /^$/;
3053 next if /\x1a/;
3054 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+)/)
3055 {
3056 $mirror_table[hex $1] = hex $2;
3057 next;
3058 }
3059 die "malformed line $_";
3060 }
3061 close $INPUT;
3062
3063 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3064 print "Building $filename\n";
3065 print OUTPUT "/* Unicode BiDi mirroring */\n";
cfaa28933… Alex*3066 print OUTPUT "/* generated from $current_data_file */\n";
6f3f505f0… Alex*3067 print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6… Alex*3068 print OUTPUT "#include \"windef.h\"\n\n";
bfeb0a97f… Alex*3069 dump_two_level_mapping( "wine_mirror_map", 0, 16, @mirror_table );
6f3f505f0… Alex*3070 close OUTPUT;
3071 save_file($filename);
3072 }
3073
2a12c6a7d… Aric*3074
3075
3076 sub dump_bracket($)
3077 {
3078 my $filename = shift;
575a97961… Alex*3079 my @bracket_table;
2a12c6a7d… Aric*3080
cfaa28933… Alex*3081 my $INPUT = open_data_file( "ucd", "BidiBrackets.txt" );
2a12c6a7d… Aric*3082 while (<$INPUT>)
3083 {
3084 next if /^\
3085 next if /^\s*$/;
3086 next if /\x1a/;
3087 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+);\s*([con])/)
3088 {
3089 my $type = $3;
3090 die "unknown bracket $type" unless defined $bracket_types{$type};
4ac5d66df… Huw *3091 die "characters too distant $1 and $2" if abs(hex($2) - hex($1)) >= 128;
2a12c6a7d… Aric*3092 $bracket_table[hex $1] = (hex($2) - hex($1)) % 255;
3093 $bracket_table[hex $1] += $bracket_types{$type} << 8;
3094 next;
3095 }
3096 die "malformed line $_";
3097 }
3098 close $INPUT;
3099
3100 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3101 print "Building $filename\n";
3102 print OUTPUT "/* Unicode Bidirectional Bracket table */\n";
cfaa28933… Alex*3103 print OUTPUT "/* generated from $current_data_file */\n";
2a12c6a7d… Aric*3104 print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6… Alex*3105 print OUTPUT "#include \"windef.h\"\n\n";
2a12c6a7d… Aric*3106
bfeb0a97f… Alex*3107 dump_two_level_mapping( "bidi_bracket_table", 0, 16, @bracket_table );
2a12c6a7d… Aric*3108
3109 close OUTPUT;
3110 save_file($filename);
3111 }
6f3f505f0… Alex*3112
a5fe68870… Alex*3113
3114
3115 sub dump_shaping($)
3116 {
3117 my $filename = shift;
bea9c706e… Niko*3118 my @joining_table = @initial_joining_table;
a5fe68870… Alex*3119
cfaa28933… Alex*3120 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
a5fe68870… Alex*3121 while (<$INPUT>)
3122 {
3123 next if /^\
3124 next if /^\s*$/;
3125 next if /\x1a/;
3126 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3127 {
3128 my $type = $2;
016eeb510… Niko*3129 $joining_table[hex $1] = $joining_types{$type};
a5fe68870… Alex*3130 next;
3131 }
3132 die "malformed line $_";
3133 }
3134 close $INPUT;
3135
3136 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3137 print "Building $filename\n";
3138 print OUTPUT "/* Unicode Arabic shaping */\n";
cfaa28933… Alex*3139 print OUTPUT "/* generated from $current_data_file */\n";
a5fe68870… Alex*3140 print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6… Alex*3141 print OUTPUT "#include \"windef.h\"\n\n";
a5fe68870… Alex*3142
bfeb0a97f… Alex*3143 dump_two_level_mapping( "wine_shaping_table", 0, 16, @joining_table );
a5fe68870… Alex*3144
a5f62b3ef… Jace*3145 print OUTPUT "\nconst unsigned short wine_shaping_forms[256][4] =\n{\n";
a5fe68870… Alex*3146 for (my $i = 0x600; $i <= 0x6ff; $i++)
3147 {
3148 printf OUTPUT " { 0x%04x, 0x%04x, 0x%04x, 0x%04x },\n",
3149 ${joining_forms{"isolated"}}[$i] || $i,
3150 ${joining_forms{"final"}}[$i] || $i,
3151 ${joining_forms{"initial"}}[$i] || $i,
3152 ${joining_forms{"medial"}}[$i] || $i;
3153 }
3154 print OUTPUT "};\n";
3155
3156 close OUTPUT;
3157 save_file($filename);
3158 }
3159
bea9c706e… Niko*3160
3161
3162 sub dump_arabic_shaping($)
3163 {
3164 my $filename = shift;
3165 my @joining_table = @initial_joining_table;
3166
cfaa28933… Alex*3167 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
bea9c706e… Niko*3168 while (<$INPUT>)
3169 {
3170 next if /^\
3171 next if /^\s*$/;
3172 next if /\x1a/;
3173 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3174 {
3175 my $type = $2;
3176 my $group = $3;
3177
3178 if ($group eq "ALAPH" || $group eq "DALATH RISH")
3179 {
3180 $joining_table[hex $1] = $joining_types{$group};
3181 }
3182 else
3183 {
3184 $joining_table[hex $1] = $joining_types{$type};
3185 }
3186
3187 next;
3188 }
3189 die "malformed line $_";
3190 }
3191 close $INPUT;
3192
3193 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3194 print "Building $filename\n";
3195 print OUTPUT "/* Unicode Arabic shaping */\n";
cfaa28933… Alex*3196 print OUTPUT "/* generated from $current_data_file */\n";
bea9c706e… Niko*3197 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3198 print OUTPUT "#include \"windef.h\"\n\n";
3199
aeff5099f… Alex*3200 dump_three_level_mapping( "arabic_shaping_table", 0, 16, @joining_table );
bea9c706e… Niko*3201
3202 close OUTPUT;
3203 save_file($filename);
3204 }
3205
742cde4fa… Aric*3206
3207
6857cb569… Jace*3208 sub dump_vertical($$)
742cde4fa… Aric*3209 {
6857cb569… Jace*3210 my ($filename, $unix) = @_;
575a97961… Alex*3211 my @vertical_table;
742cde4fa… Aric*3212
cfaa28933… Alex*3213 my $INPUT = open_data_file( "ucd", "VerticalOrientation.txt" );
742cde4fa… Aric*3214 while (<$INPUT>)
3215 {
3216 next if /^\
3217 next if /^\s*$/;
3218 next if /\x1a/;
3219 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
3220 {
3221 my $type = $2;
3222 die "unknown vertical $type" unless defined $vertical_types{$type};
3223 if (hex $1 < 65536)
3224 {
3225 $vertical_table[hex $1] = $vertical_types{$type};
3226 }
3227 next;
3228 }
dec6f0773… Alex*3229 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*/)
742cde4fa… Aric*3230 {
3231 my $type = $3;
3232 die "unknown vertical $type" unless defined $vertical_types{$type};
3233 foreach my $i (hex $1 .. hex $2)
3234 {
3235 $vertical_table[$i] = $vertical_types{$type};
3236 }
3237 next;
3238 }
3239 die "malformed line $_";
3240 }
3241 close $INPUT;
3242
3243 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3244 print "Building $filename\n";
3245 print OUTPUT "/* Unicode Vertical Orientation */\n";
cfaa28933… Alex*3246 print OUTPUT "/* generated from $current_data_file */\n";
742cde4fa… Aric*3247 print OUTPUT "/* DO NOT EDIT!! */\n\n";
6857cb569… Jace*3248 if ($unix)
3249 {
3250 print OUTPUT "#if 0\n";
3251 print OUTPUT "#pragma makedep unix\n";
3252 print OUTPUT "#endif\n\n";
3253 }
333e6b3c6… Alex*3254 print OUTPUT "#include \"windef.h\"\n\n";
742cde4fa… Aric*3255
bfeb0a97f… Alex*3256 dump_two_level_mapping( "vertical_orientation_table", $vertical_types{'R'}, 16, @vertical_table );
742cde4fa… Aric*3257
3258 close OUTPUT;
3259 save_file($filename);
3260 }
a5fe68870… Alex*3261
3262
b99b9565e… Alex*3263
575a97961… Alex*3264 sub compress_array($$@)
a5fe68870… Alex*3265 {
b99b9565e… Alex*3266 my $rows = shift;
575a97961… Alex*3267 my $def = shift;
a5fe68870… Alex*3268 my @table = @_;
b99b9565e… Alex*3269 my $len = @table / $rows;
f17a228d4… Alex*3270 my @array;
3271 my $data = "";
a5fe68870… Alex*3272
3273
b99b9565e… Alex*3274 for (my $row = 0; $row < $rows; $row++)
a5fe68870… Alex*3275 {
f17a228d4… Alex*3276 my $rowtxt = pack "U*", map { defined($_) ? $_ : $def; } @table[($row * $len)..(($row + 1) * $len - 1)];
3277 my $pos = index $data, $rowtxt;
3278 if ($pos == -1)
a5fe68870… Alex*3279 {
f17a228d4… Alex*3280
3281 my $first = substr( $rowtxt, 0, 1 );
3282 for (my $i = length($data) - 1; $i > 0; $i--)
3283 {
3284 $pos = index( substr( $data, -$i ), $first );
3285 last if $pos == -1;
3286 $i -= $pos;
3287 next unless substr( $data, -$i ) eq substr( $rowtxt, 0, $i );
3288 substr( $data, -$i ) = "";
3289 last;
3290 }
3291 $pos = length $data;
3292 $data .= $rowtxt;
a5fe68870… Alex*3293 }
f17a228d4… Alex*3294 $array[$row] = $rows + $pos;
a5fe68870… Alex*3295 }
f17a228d4… Alex*3296 return @array, unpack "U*", $data;
b99b9565e… Alex*3297 }
3298
930a13dd8… Alex*3299
c848f42aa… Alex*3300
b956620d8… Alex*3301 sub dump_two_level_mapping($$$@)
b99b9565e… Alex*3302 {
3303 my $name = shift;
575a97961… Alex*3304 my $def = shift;
bfeb0a97f… Alex*3305 my $size = shift;
3306 my $type = $size == 16 ? "unsigned short" : "unsigned int";
b956620d8… Alex*3307 my (@array, @row_array, @data, @row_data);
3308 (@row_array[0..4095], @data) = compress_array( 4096, $def, @_[0..65535] );
3309 (@array[0..255], @row_data) = compress_array( 256, 0, @row_array );
b99b9565e… Alex*3310
b956620d8… Alex*3311 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += @row_data + 256 - 4096; }
b99b9565e… Alex*3312
a5f62b3ef… Jace*3313 printf OUTPUT "const %s %s[%d] =\n{\n", $type, $name, @array + @row_data + @data;
b956620d8… Alex*3314 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array );
3315 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @row_data );
3316 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @data );
b99b9565e… Alex*3317 }
3318
148f564d1… Alex*3319
3320
3321 sub dump_three_level_mapping($$@)
3322 {
3323 my $name = shift;
3324 my $def = shift;
3325 my $size = shift;
3326 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3327 my $level3 = ($MAX_CHAR + 1) / 16;
3328 my $level2 = $level3 / 16;
3329 my $level1 = $level2 / 16;
3330 my @array3 = compress_array( $level3, $def, @_[0..$MAX_CHAR] );
3331 my @array2 = compress_array( $level2, 0, @array3[0..$level3-1] );
3332 my @array1 = compress_array( $level1, 0, @array2[0..$level2-1] );
3333
3334 for (my $i = $level2; $i < @array2; $i++) { $array2[$i] += @array1 + @array2 - $level2 - $level3; }
3335 for (my $i = $level1; $i < @array1; $i++) { $array1[$i] += @array1 - $level2; }
3336
a5f62b3ef… Jace*3337 printf OUTPUT "const %s %s[%u] =\n{\n", $type, $name, @array1 + (@array2 - $level2) + (@array3 - $level3);
148f564d1… Alex*3338 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array1[0..$level1-1] );
3339 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array1[$level1..$#array1] );
3340 printf OUTPUT " /* level 3 offsets */\n%s,\n", dump_array( $size, 0, @array2[$level2..$#array2] );
3341 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @array3[$level3..$#array3] );
3342 }
3343
b99b9565e… Alex*3344
930a13dd8… Alex*3345
3346 sub dump_binary_case_table(@)
3347 {
3348 my (@table) = @_;
bd9d83b73… Alex*3349 my @difftable;
b956620d8… Alex*3350 my @res;
3351
bd9d83b73… Alex*3352 for (my $i = 0; $i < @table; $i++)
930a13dd8… Alex*3353 {
bd9d83b73… Alex*3354 next unless defined $table[$i];
b956620d8… Alex*3355 $difftable[$i] = ($table[$i] - $i) & 0xffffffff;
930a13dd8… Alex*3356 }
3357
b956620d8… Alex*3358 my (@low_array1, @low_array2, @low_data, @low_row_data);
3359 (@low_array2[0..4095], @low_data) = compress_array( 4096, 0, @difftable[0..65535] );
3360 (@low_array1[0..255], @low_row_data) = compress_array( 256, 0, @low_array2 );
3361
3362 if (scalar @table > 0x10000)
3363 {
3364 my (@high_array1, @high_array2, @high_data, @high_row_data);
3365 (@high_array2[0..32767], @high_data) = compress_array( 32768, 0, @difftable[65536..$MAX_CHAR] );
3366 (@high_array1[0..1023], @high_row_data) = compress_array( 1024, 0, @high_array2 );
3367
3368 push @res, map { $_ + 1024; } @low_array1;
3369 push @res, map { $_ + @res + @low_row_data + @low_data; } @high_array1;
3370 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3371 push @res, @low_data;
3372 push @res, map { 2 * ($_ - 32768) + @res + @high_row_data; } @high_row_data;
3373 return pack( "S<*", 1 + scalar @res + 2 * scalar @high_data, @res ) . pack( "L<*", @high_data );
3374 }
3375 else
3376 {
3377 push @res, @low_array1;
3378 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3379 push @res, @low_data;
3380 return pack "S<*", 1 + scalar @res, @res;
3381 }
930a13dd8… Alex*3382 }
3383
3384
3385
3386 sub dump_intl_nls($)
3387 {
57a6033c0… Alex*3388 my @upper_table = @toupper_table;
3389 my @lower_table = @tolower_table;
3390 remove_linguistic_mappings( \@upper_table, \@lower_table );
3391
b956620d8… Alex*3392 my $upper = dump_binary_case_table( @upper_table[0..65535] );
3393 my $lower = dump_binary_case_table( @lower_table[0..65535] );
bd9d83b73… Alex*3394
930a13dd8… Alex*3395 my $filename = shift;
3396 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3397 printf "Building $filename\n";
3398
3399 binmode OUTPUT;
3400 print OUTPUT pack "S<", 1;
57a6033c0… Alex*3401 print OUTPUT $upper;
3402 print OUTPUT $lower;
930a13dd8… Alex*3403 close OUTPUT;
3404 save_file($filename);
3405 }
3406
3407
2f6918cbe… Alex*3408
3409
3410 sub dump_bidi_dir_table($)
3411 {
3412 my $filename = shift;
3413 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3414 printf "Building $filename\n";
3415 printf OUTPUT "/* Unicode BiDi direction table */\n";
3416 printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
3417 printf OUTPUT "#include \"windef.h\"\n\n";
3418
3419 my @table;
3420
24da06789… Alex*3421 for (my $i = 0; $i < @direction_table; $i++)
2f6918cbe… Alex*3422 {
d68f8d7b3… Alex*3423 $table[$i] = $bidi_types{$direction_table[$i]} if defined $direction_table[$i];
2f6918cbe… Alex*3424 }
3425
24da06789… Alex*3426 dump_three_level_mapping( "bidi_direction_table", $bidi_types{"L"}, 16, @table );
2f6918cbe… Alex*3427
3428 close OUTPUT;
3429 save_file($filename);
3430 }
3431
3432
f9f3e57cf… Alex*3433 sub rol($$)
3434 {
3435 my ($byte, $count) = @_;
3436 return (($byte << $count) | ($byte >> (8 - $count))) & 0xff;
3437 }
3438
3439
3440
3441 sub compress_char_props_table($@)
3442 {
3443 my $rows = shift;
3444 my @table = @_;
3445 my $len = @table / $rows;
3446 my $pos = 0;
3447 my @array = (0) x $rows;
3448 my %sequences;
3449
3450
3451 foreach my $i (0, 0xfb .. 0xff) { $sequences{pack "L*", (rol($i,5)) x $len} = $i; }
3452
3453
3454 for (my $row = 0; $row < $rows; $row++)
3455 {
3456 my @table_row = map { defined $_ ? $_ : 0x7f; } @table[($row * $len)..(($row + 1) * $len - 1)];
3457 my $rowtxt = pack "L*", @table_row;
3458 if (defined($sequences{$rowtxt}))
3459 {
3460
3461 $array[$row] = $sequences{$rowtxt};
3462 }
3463 else
3464 {
3465
3466 $sequences{$rowtxt} = $array[$row] = ++$pos;
3467 push @array, @table_row;
3468 }
3469 }
3470 return @array;
3471 }
3472
3473
3474
3475 sub dump_norm_table($)
3476 {
3477 my $filename = shift;
3478
3479 my %forms = ( "nfc" => 1, "nfd" => 2, "nfkc" => 5, "nfkd" => 6, "idna" => 13 );
3480 my %decomp = ( "nfc" => \@decomp_table,
3481 "nfd" => \@decomp_table,
3482 "nfkc" => \@decomp_compat_table,
3483 "nfkd" => \@decomp_compat_table ,
3484 "idna" => \@idna_decomp_table );
3485
3486 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3487 print "Building $filename\n";
3488
3489 my $type = $filename;
3490 $type =~ s!.*/norm(\w+)\.nls!$1!;
3491
3492 my $compose = $forms{$type} & 1;
3493 my $compat = !!($forms{$type} & 4) + ($type eq "idna");
3494
3495 my @version = split /\./, $UNIVERSION;
3496
3497
3498
3499 my @classes;
3500 my @class_values;
3501
3502 foreach my $c (grep defined, @combining_class_table)
3503 {
3504 $classes[$c] = 1 if $c < 0x100;
3505 }
3506 for (my $i = 0; $i < @classes; $i++)
3507 {
3508 next unless defined $classes[$i];
3509 $classes[$i] = @class_values;
3510 push @class_values, $i;
3511 }
3512 push @class_values, 0 if (@class_values % 2);
3513 die "too many classes" if @class_values >= 0x40;
3514
3515
3516
3517 my @char_props;
3518 my @decomposed;
3519 my @comp_hash_table;
3520 my $comp_hash_size = $compose ? 254 : 0;
3521
3522 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3523 {
3524 next unless defined $combining_class_table[$i];
3525 if (defined $decomp{$type}->[$i])
3526 {
3527 my @dec = get_decomposition( $i, $decomp{$type} );
3528 if ($compose && (my @comp = get_composition( $i, $compat )))
3529 {
3530 my $hash = ($comp[0] + 95 * $comp[1]) % $comp_hash_size;
3531 push @{$comp_hash_table[$hash]}, to_utf16( @comp, $i );
3532
3533 my $val = 0;
3534 foreach my $d (@dec)
3535 {
3536 $val = $combining_class_table[$d];
3537 last if $val;
3538 }
3539 $char_props[$i] = $classes[$val];
3540 }
3541 else
3542 {
3543 $char_props[$i] = 0xbf;
3544 }
3545 @dec = compose_hangul( @dec ) if $compose;
3546 @dec = to_utf16( @dec );
3547 push @dec, 0 if @dec >= 7;
3548 $decomposed[$i] = \@dec;
3549 }
3550 else
3551 {
3552 if ($combining_class_table[$i] == 0x100)
3553 {
3554 $char_props[$i] = 0x7f;
3555 }
3556 elsif ($combining_class_table[$i])
3557 {
3558 $char_props[$i] = $classes[$combining_class_table[$i]] | 0x80;
3559 }
3560 elsif ($type eq "idna" && defined $idna_disallowed[$i])
3561 {
3562 $char_props[$i] = 0xff;
3563 }
3564 else
3565 {
3566 $char_props[$i] = 0;
3567 }
3568 }
3569 }
3570
3571 if ($compose)
3572 {
3573 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3574 {
3575 my @comp = get_composition( $i, $compat );
3576 next unless @comp;
3577 if ($combining_class_table[$comp[1]])
3578 {
3579 $char_props[$comp[0]] |= 0x40 unless $char_props[$comp[0]] & 0x80;
3580 $char_props[$comp[1]] |= 0x40;
3581 }
3582 else
3583 {
3584 $char_props[$comp[0]] = ($char_props[$comp[0]] & ~0x40) | 0x80;
3585 $char_props[$comp[1]] |= 0xc0;
3586 }
3587 }
3588 }
3589
3590
3591 foreach my $i (0xd800..0xdbff) { $char_props[$i] = 0xdf; }
3592 foreach my $i (0xdc00..0xdfff) { $char_props[$i] = 0x9f; }
3593
3594
3595 if ($type eq "nfc") { foreach my $i (0x1100..0x117f) { $char_props[$i] = 0xff; } }
3596 elsif ($compose) { foreach my $i (0x1100..0x11ff) { $char_props[$i] = 0xff; } }
3597 foreach my $i (0xac00..0xd7ff) { $char_props[$i] = 0xff; }
3598
3599
3600 if ($type eq "idna") { foreach my $i (0x00..0x1f, 0x7f) { $char_props[$i] = 0xff; } }
3601 foreach my $i (0xfdd0..0xfdef) { $char_props[$i] = 0xff; }
3602 foreach my $i (0x00..0x10)
3603 {
3604 $char_props[($i << 16) | 0xfffe] = 0xff;
3605 $char_props[($i << 16) | 0xffff] = 0xff;
3606 }
3607
3608
3609
3610 my @decomp_hash_table;
3611 my @decomp_hash_index;
3612 my @decomp_hash_data;
3613 my $decomp_hash_size = 944;
3614
3615
3616 my $decomp_char_data = "";
3617 foreach my $i (sort { @{$b} <=> @{$a} } grep defined, @decomposed)
3618 {
3619 my $str = pack "U*", @{$i};
3620 $decomp_char_data .= $str if index( $decomp_char_data, $str) == -1;
3621 }
3622 for (my $i = 0; $i < @decomposed; $i++)
3623 {
3624 next unless defined $decomposed[$i];
3625 my $pos = index( $decomp_char_data, pack( "U*", @{$decomposed[$i]} ));
3626 die "sequence not found" if $pos == -1;
3627 my $len = @{$decomposed[$i]};
3628 $len = 7 if $len > 7;
3629 my $hash = $i % $decomp_hash_size;
3630 push @{$decomp_hash_table[$hash]}, [ $i, ($len << 13) | $pos ];
3631 }
3632 for (my $i = 0; $i < $decomp_hash_size; $i++)
3633 {
3634 $decomp_hash_index[$i] = @decomp_hash_data / 2;
3635 next unless defined $decomp_hash_table[$i];
3636 if (@{$decomp_hash_table[$i]} == 1)
3637 {
3638 my $entry = $decomp_hash_table[$i]->[0];
3639 if ($char_props[$entry->[0]] == 0xbf)
3640 {
3641 $decomp_hash_index[$i] = $entry->[1];
3642 next;
3643 }
3644 }
3645 foreach my $entry (@{$decomp_hash_table[$i]})
3646 {
3647 push @decomp_hash_data, $entry->[0] & 0xffff, $entry->[1];
3648 }
3649 }
3650 push @decomp_hash_data, 0, 0;
3651
3652
3653
3654 my @comp_hash_index;
3655 my @comp_hash_data;
3656 if (@comp_hash_table)
3657 {
3658 for (my $i = 0; $i < $comp_hash_size; $i++)
3659 {
3660 $comp_hash_index[$i] = @comp_hash_data;
3661 push @comp_hash_data, @{$comp_hash_table[$i]} if defined $comp_hash_table[$i];
3662 }
3663 $comp_hash_index[$comp_hash_size] = @comp_hash_data;
3664 push @comp_hash_data, 0, 0, 0;
3665 }
3666
3667 my $level1 = ($MAX_CHAR + 1) / 128;
3668 my @rows = compress_char_props_table( $level1, @char_props[0..$MAX_CHAR] );
3669
3670 my @header = ( $version[0], $version[1], $version[2], 0, $forms{$type}, $compat ? 18 : 3,
3671 0, $decomp_hash_size, $comp_hash_size, 0 );
3672 my @tables = (0) x 8;
3673
3674 $tables[0] = 16 + @header + @tables;
3675 $tables[1] = $tables[0] + @class_values / 2;
3676 $tables[2] = $tables[1] + $level1 / 2;
3677 $tables[3] = $tables[2] + (@rows - $level1) / 2;
3678 $tables[4] = $tables[3] + @decomp_hash_index;
3679 $tables[5] = $tables[4] + @decomp_hash_data;
3680 $tables[6] = $tables[5] + length $decomp_char_data;
3681 $tables[7] = $tables[6] + @comp_hash_index;
3682
3683 print OUTPUT pack "S<16", unpack "U*", "norm$type.nlp";
3684 print OUTPUT pack "S<*", @header;
3685 print OUTPUT pack "S<*", @tables;
3686 print OUTPUT pack "C*", @class_values;
3687
3688 print OUTPUT pack "C*", @rows[0..$level1-1];
3689 print OUTPUT pack "C*", @rows[$level1..$#rows];
3690 print OUTPUT pack "S<*", @decomp_hash_index;
3691 print OUTPUT pack "S<*", @decomp_hash_data;
3692 print OUTPUT pack "S<*", unpack "U*", $decomp_char_data;
3693 print OUTPUT pack "S<*", @comp_hash_index;
3694 print OUTPUT pack "S<*", @comp_hash_data;
3695
3696 close OUTPUT;
3697 save_file($filename);
712839d58… Alex*3698
630f605c2… Alex*3699 add_registry_string_value( $nlskey, "Normalization", sprintf( "%x", $forms{$type} ), "norm$type.nls" );
f9f3e57cf… Alex*3700 }
3701
3702
97d31ec78… Alex*3703
f54c2f65b… Alex*3704
5b4bdb9fd… Alex*3705 sub output_codepage_file($)
f54c2f65b… Alex*3706 {
5b4bdb9fd… Alex*3707 my $codepage = shift;
fb316c337… Alex*3708
5b4bdb9fd… Alex*3709 my $output = sprintf "nls/c_%03d.nls", $codepage;
fb316c337… Alex*3710 open OUTPUT,">$output.new" or die "Cannot create $output";
3711
3712 printf "Building %s\n", $output;
3713 if (!@lead_bytes) { dump_binary_sbcs_table( $codepage ); }
3714 else { dump_binary_dbcs_table( $codepage ); }
3715
3716 close OUTPUT;
3717 save_file($output);
712839d58… Alex*3718
630f605c2… Alex*3719 add_registry_string_value( $nlskey, "Codepage", sprintf( "%d", $codepage ), sprintf( "c_%03d.nls", $codepage ));
f54c2f65b… Alex*3720 }
3721
3722
09d97e968… Alex*3723
5b4bdb9fd… Alex*3724 sub dump_msdata_codepage($)
97d31ec78… Alex*3725 {
5b4bdb9fd… Alex*3726 my $filename = shift;
3727
97d31ec78… Alex*3728 my $state = "";
f54c2f65b… Alex*3729 my ($codepage, $width, $count);
97d31ec78… Alex*3730 my ($lb_cur, $lb_end);
3731
5b4bdb9fd… Alex*3732 @cp2uni = ();
3733 @glyph2uni = ();
3734 @lead_bytes = ();
3735 @uni2cp = ();
3736 $default_char = $DEF_CHAR;
3737 $default_wchar = $DEF_CHAR;
3738
cfaa28933… Alex*3739 my $INPUT = open_data_file( "codepages", $filename );
97d31ec78… Alex*3740
65a82cb18… Alex*3741 while (<$INPUT>)
97d31ec78… Alex*3742 {
3743 next if /^;/;
3744 next if /^\s*$/;
3745 next if /\x1a/;
3746 last if /^ENDCODEPAGE/;
3747
3748 if (/^CODEPAGE\s+(\d+)/)
3749 {
3750 $codepage = $1;
3751 next;
3752 }
3753 if (/^CPINFO\s+(\d+)\s+0x([0-9a-fA-f]+)\s+0x([0-9a-fA-F]+)/)
3754 {
3755 $width = $1;
f54c2f65b… Alex*3756 $default_char = hex $2;
3757 $default_wchar = hex $3;
97d31ec78… Alex*3758 next;
3759 }
09d97e968… Alex*3760 if (/^(MBTABLE|GLYPHTABLE|WCTABLE|DBCSRANGE|DBCSTABLE)\s+(\d+)/)
97d31ec78… Alex*3761 {
3762 $state = $1;
3763 $count = $2;
3764 next;
3765 }
3766 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)/)
3767 {
3768 if ($state eq "MBTABLE")
3769 {
dc727fa7b… Alex*3770 my $cp = hex $1;
3771 my $uni = hex $2;
97d31ec78… Alex*3772 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3773 next;
3774 }
09d97e968… Alex*3775 if ($state eq "GLYPHTABLE")
3776 {
3777 my $cp = hex $1;
3778 my $uni = hex $2;
3779 $glyph2uni[$cp] = $uni unless defined($glyph2uni[$cp]);
3780 next;
3781 }
97d31ec78… Alex*3782 if ($state eq "WCTABLE")
3783 {
dc727fa7b… Alex*3784 my $uni = hex $1;
3785 my $cp = hex $2;
97d31ec78… Alex*3786 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
3787 next;
3788 }
3789 if ($state eq "DBCSRANGE")
3790 {
dc727fa7b… Alex*3791 my $start = hex $1;
3792 my $end = hex $2;
f54c2f65b… Alex*3793 for (my $i = $start; $i <= $end; $i++) { add_lead_byte( $i ); }
97d31ec78… Alex*3794 $lb_cur = $start;
3795 $lb_end = $end;
3796 next;
3797 }
3798 if ($state eq "DBCSTABLE")
3799 {
dc727fa7b… Alex*3800 my $mb = hex $1;
3801 my $uni = hex $2;
3802 my $cp = ($lb_cur << 8) | $mb;
97d31ec78… Alex*3803 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3804 if (!--$count)
3805 {
3806 if (++$lb_cur > $lb_end) { $state = "DBCSRANGE"; }
3807 }
3808 next;
3809 }
3810 }
dc727fa7b… Alex*3811 die "$filename: Unrecognized line $_\n";
97d31ec78… Alex*3812 }
65a82cb18… Alex*3813 close $INPUT;
09d97e968… Alex*3814
5b4bdb9fd… Alex*3815 output_codepage_file( $codepage );
6f6854369… Alex*3816
3817 if ($codepage == 949) { dump_krwansung_codepage( @uni2cp ); }
3d086ca2b… Alex*3818 }
3819
95aeb41c8… Alex*3820
3821
3822 sub align_string($$)
3823 {
3824 my ($align, $str) = @_;
3825 $str .= pack "C*", (0) x ($align - length($str) % $align) if length($str) % $align;
3826 return $str;
3827 }
3828
ad02ef7be… Alex*3829
3830
3831 sub pad_string($$)
3832 {
3833 my ($pad, $str) = @_;
3834 $str .= pack "C*", (0) x ($pad - length($str)) if length($str) < $pad;
3835 return $str;
3836 }
3837
95aeb41c8… Alex*3838
3839
3840 sub pack_guid($)
3841 {
3842 $_ = shift;
3843 /([0-9A-Fa-f]{8})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})/;
3844 return pack "L<S<2C8", hex $1, hex $2, hex $3, hex $4, hex $5, hex $6, hex $7, hex $8, hex $9, hex $10, hex $11;
3845 }
3846
3847
3848
3849 sub cmp_compression
3850 {
3851 return scalar @{$a} <=> scalar @{$b} ||
3852 $a->[4] <=> $b->[4] ||
3853 $a->[5] <=> $b->[5] ||
3854 $a->[6] <=> $b->[6] ||
3855 $a->[7] <=> $b->[7] ||
3856 $a->[8] <=> $b->[8] ||
3857 $a->[9] <=> $b->[9] ||
3858 $a->[10] <=> $b->[10] ||
3859 $a->[11] <=> $b->[11] ||
3860 $a->[12] <=> $b->[12];
3861 }
3862
3863
3864
cfaa28933… Alex*3865 sub dump_sortkey_table($)
95aeb41c8… Alex*3866 {
cfaa28933… Alex*3867 my $filename = shift;
95aeb41c8… Alex*3868 my @keys;
94397d96b… Alex*3869 my ($part, $section, $subsection, $guid, $version, $ling_flag);
95aeb41c8… Alex*3870 my @multiple_weights;
3871 my @expansions;
3872 my @compressions;
94397d96b… Alex*3873 my %exceptions;
95aeb41c8… Alex*3874 my %guids;
94397d96b… Alex*3875 my %compr_flags;
95aeb41c8… Alex*3876 my %locales;
3877 my $default_guid = "00000001-57ee-1e5c-00b4-d0000bb1e11e";
3878 my $jamostr = "";
3879
3880 my $re_hex = '0x[0-9A-Fa-f]+';
3881 my $re_key = '(\d+\s+\d+\s+\d+\s+\d+)';
3882 $guids{$default_guid} = { };
3883
3884 my %flags = ( "HAS_3_BYTE_WEIGHTS" => 0x01, "REVERSEDIACRITICS" => 0x10, "DOUBLECOMPRESSION" => 0x20, "INVERSECASING" => 0x40 );
3885
cfaa28933… Alex*3886 my $KEYS = open_data_file( "sorting" );
95aeb41c8… Alex*3887
3888 printf "Building $filename\n";
3889
3890 while (<$KEYS>)
3891 {
3892 s/\s*;.*$//;
3893 next if /^\s*$/;
3894 if (/^\s*(SORTKEY|SORTTABLES)/)
3895 {
3896 $part = $1;
3897 next;
3898 }
3899 if (/^\s*(ENDSORTKEY|ENDSORTTABLES)/)
3900 {
3901 $part = $section = "";
3902 next;
3903 }
3904 if (/^\s*(DEFAULT|RELEASE|REVERSEDIACRITICS|DOUBLECOMPRESSION|INVERSECASING|MULTIPLEWEIGHTS|EXPANSION|COMPATIBILITY|COMPRESSION|EXCEPTION|JAMOSORT)\s+/)
3905 {
3906 $section = $1;
3907 $guid = undef;
3908 next;
3909 }
3910 next unless $part;
3911 if ("$part.$section" eq "SORTKEY.DEFAULT")
3912 {
3913 if (/^\s*($re_hex)\s+$re_key/)
3914 {
3915 $keys[hex $1] = [ split(/\s+/,$2) ];
3916 next;
3917 }
3918 }
3919 elsif ("$part.$section" eq "SORTTABLES.RELEASE")
3920 {
3921 if (/^\s*NLSVERSION\s+0x([0-9A-Fa-f]+)/)
3922 {
3923 $version = hex $1;
3924 next;
3925 }
3926 if (/^\s*DEFINEDVERSION\s+0x([0-9A-Fa-f]+)/)
3927 {
3928
3929 next;
3930 }
3931 }
3932 elsif ("$part.$section" eq "SORTTABLES.REVERSEDIACRITICS" ||
3933 "$part.$section" eq "SORTTABLES.DOUBLECOMPRESSION" ||
3934 "$part.$section" eq "SORTTABLES.INVERSECASING")
3935 {
3936 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)/)
3937 {
3938 $guid = lc $1;
3939 $guids{$guid} = { } unless defined $guids{$guid};
3940 $guids{$guid}->{flags} |= $flags{$section};
3941 next;
3942 }
3943 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3944 {
3945 $locales{$1} = $guid;
3946 next;
3947 }
3948 }
3949 elsif ("$part.$section" eq "SORTTABLES.MULTIPLEWEIGHTS")
3950 {
3951 if (/^\s*(\d+)\s+(\d+)/)
3952 {
3953 push @multiple_weights, $1, $2;
3954 next;
3955 }
3956 }
3957 elsif ("$part.$section" eq "SORTTABLES.EXPANSION")
3958 {
3959 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3960 {
3961 my $pos = scalar @expansions / 2;
3962 $keys[hex $1] = [ 2, 0, $pos & 0xff, $pos >> 8 ] unless defined $keys[hex $1];
3963 push @expansions, hex $2, hex $3;
3964 next;
3965 }
3966 }
3967 elsif ("$part.$section" eq "SORTTABLES.COMPATIBILITY")
3968 {
3969 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3970 {
3971 $keys[hex $1] = $keys[hex $2];
3972 next;
3973 }
3974 }
3975 elsif ("$part.$section" eq "SORTTABLES.COMPRESSION")
3976 {
3977 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*([A-Z0-9_]+)?/)
3978 {
3979 if ($subsection || !$guid)
3980 {
3981 $guid = lc $1;
3982 $subsection = "";
3983 $guids{$guid} = { } unless defined $guids{$guid};
3984 $guids{$guid}->{flags} |= $flags{$2} if $2;
3985 $guids{$guid}->{compr} = @compressions;
94397d96b… Alex*3986 $exceptions{"$guid-"} = [ ] unless defined $exceptions{"$guid-"};
3987 $compr_flags{$guid} = [ ] unless defined $compr_flags{$guid};
95aeb41c8… Alex*3988 push @compressions, [ ];
3989 }
3990 else
3991 {
3992 $guids{lc $1} = { } unless defined $guids{lc $1};
3993 $guids{lc $1}->{flags} |= $flags{$2} if $2;
3994 $guids{lc $1}->{compr} = $guids{$guid}->{compr};
94397d96b… Alex*3995 $compr_flags{lc $1} = $compr_flags{$guid};
95aeb41c8… Alex*3996 }
3997 next;
3998 }
3999 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
4000 {
4001 $locales{$1} = $guid;
4002 next;
4003 }
4004 if (/^\s*(TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT)/)
4005 {
4006 $subsection = $1;
4007 next;
4008 }
4009 if ($subsection && /^\s*(($re_hex\s+){2,8})$re_key/)
4010 {
94397d96b… Alex*4011 my @comp = map { hex $_; } split(/\s+/,$1);
4012 push @{$compressions[$#compressions]}, [ split(/\s+/,$3), @comp ];
4013
4014 $compr_flags{$guid}->[$comp[0]] |= @comp >= 6 ? 0xc0 : @comp >= 4 ? 0x80 : 0x40;
95aeb41c8… Alex*4015 next;
4016 }
4017 }
4018 elsif ("$part.$section" eq "SORTTABLES.EXCEPTION")
4019 {
4020 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*(LINGUISTIC_CASING)?/)
4021 {
4022 $guid = lc $1;
4023 $guids{$guid} = { } unless defined $guids{lc $1};
94397d96b… Alex*4024 $ling_flag = ($2 ? "+" : "-");
4025 $exceptions{"$guid$ling_flag"} = [ ] unless defined $exceptions{"$guid$ling_flag"};
95aeb41c8… Alex*4026 next;
4027 }
4028 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
4029 {
4030 $locales{$1} = $guid;
4031 next;
4032 }
4033 if (/^\s*($re_hex)\s+$re_key/)
4034 {
94397d96b… Alex*4035 $exceptions{"$guid$ling_flag"}->[hex $1] = [ split(/\s+/,$2) ];
95aeb41c8… Alex*4036 next;
4037 }
4038 }
4039 elsif ("$part.$section" eq "SORTTABLES.JAMOSORT")
4040 {
4041 if (/^\s*$re_hex\s+(($re_hex\s*){5})/)
4042 {
4043 $jamostr .= pack "C8", map { hex $_; } split /\s+/, $1;
4044 next;
4045 }
4046 }
cfaa28933… Alex*4047 die "$current_data_file: $part.$section: unrecognized line $_\n";
95aeb41c8… Alex*4048 }
4049 close $KEYS;
4050
4051
4052
4053 my $table;
4054 for (my $i = 0; $i < 0x10000; $i++)
4055 {
4056 my @k = defined $keys[$i] ? @{$keys[$i]} : (0) x 4;
4057 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4058 }
4059
94397d96b… Alex*4060 foreach my $id (sort keys %exceptions)
95aeb41c8… Alex*4061 {
4062 my $pos = length($table) / 4;
94397d96b… Alex*4063 my @exc = @{$exceptions{$id}};
95aeb41c8… Alex*4064 my @filled;
94397d96b… Alex*4065 my $key = (substr( $id, -1 ) eq "+" ? "ling_except" : "except");
4066 my $guid = substr( $id, 0, -1 );
4067 $guids{$guid}->{$key} = $pos;
95aeb41c8… Alex*4068 $pos += 0x100;
94397d96b… Alex*4069 my @flags = @{$compr_flags{$guid}} if defined $compr_flags{$guid};
95aeb41c8… Alex*4070 for (my $j = 0; $j < 0x10000; $j++)
4071 {
94397d96b… Alex*4072 next unless defined $exc[$j] || defined $flags[$j];
95aeb41c8… Alex*4073 $filled[$j >> 8] = 1;
4074 $j |= 0xff;
4075 }
4076 for (my $j = 0; $j < 0x100; $j++)
4077 {
4078 $table .= pack "L<", $filled[$j] ? $pos : $j * 0x100;
4079 $pos += 0x100 if $filled[$j];
4080 }
4081 for (my $j = 0; $j < 0x10000; $j++)
4082 {
4083 next unless $filled[$j >> 8];
4084 my @k = defined $exc[$j] ? @{$exc[$j]} : defined $keys[$j] ? @{$keys[$j]} : (0) x 4;
94397d96b… Alex*4085 $k[3] |= $flags[$j] || 0;
95aeb41c8… Alex*4086 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4087 }
4088 }
4089
4090
4091
4092
4093 my @casemaps;
4094 my @upper = @toupper_table;
4095 my @lower = @tolower_table;
4096 remove_linguistic_mappings( \@upper, \@lower );
4097 $casemaps[0] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4098
4099
4100 $casemaps[1] = pack( "S<*", 1) . dump_binary_case_table( @toupper_table ) . dump_binary_case_table( @tolower_table );
4101
4102
4103 @upper = @toupper_table;
4104 @lower = @tolower_table;
4105 $upper[ord 'i'] = 0x130;
4106 $lower[ord 'I'] = 0x131;
4107 $casemaps[2] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4108 my $casemaps = align_string( 8, $casemaps[0] . $casemaps[1] . $casemaps[2] );
4109
4110
4111
4112 my @table;
4113 my $types = "";
4114 my %typestr;
4115 for (my $i = 0; $i < 0x10000; $i++)
4116 {
4117 my $str = pack "S<3",
4118 ($category_table[$i] || 0) & 0xffff,
4119 defined($direction_table[$i]) ? $c2_types{$direction_table[$i]} : 0,
4120 ($category_table[$i] || 0) >> 16;
4121
4122 if (!defined($typestr{$str}))
4123 {
4124 $typestr{$str} = length($types) / 6;
4125 $types .= $str;
4126 }
4127 $table[$i] = $typestr{$str};
4128 }
4129
b956620d8… Alex*4130 my (@rows, @array, @data, @row_data);
4131 (@rows[0..4095], @data) = compress_array( 4096, 0, @table[0..65535] );
4132 (@array[0..255], @row_data) = compress_array( 256, 0, @rows );
95aeb41c8… Alex*4133 for (my $i = 0; $i < 256; $i++) { $array[$i] *= 2; }
b956620d8… Alex*4134 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += 2 * @row_data + 512 - 4096; }
95aeb41c8… Alex*4135
b956620d8… Alex*4136 my $arraystr = pack("S<*", @array, @row_data) . pack("C*", @data);
95aeb41c8… Alex*4137 my $chartypes = pack "S<2", 4 + length($types) + length($arraystr), 2 + length($types);
4138 $chartypes = align_string( 8, $chartypes . $types . $arraystr );
4139
4140
4141
4142
4143 my $sorttables = pack "L<2", $version, scalar %guids;
4144 foreach my $id (sort keys %guids)
4145 {
4146 my %guid = %{$guids{$id}};
4147 my $flags = $guid{flags} || 0;
4148 my $map = length($casemaps[0]) + (defined $guid{ling_except} ? length($casemaps[1]) : 0);
4149 $sorttables .= pack_guid($id) . pack "L<5",
4150 $flags,
4151 defined($guid{compr}) ? $guid{compr} : 0xffffffff,
4152 $guid{except} || 0,
4153 $guid{ling_except} || 0,
4154 $map / 2;
4155 }
4156
4157
4158 $sorttables .= pack "L<S<*", scalar @expansions / 2, @expansions;
4159
4160
4161 $sorttables .= pack "L<", scalar @compressions;
4162 my $rowstr = "";
4163 foreach my $c (@compressions)
4164 {
4165 my $pos = length($rowstr) / 2;
4166 my $min = 0xffff;
4167 my $max = 0;
4168 my @lengths = (0) x 8;
4169 foreach my $r (sort cmp_compression @{$c})
4170 {
4171 my @row = @{$r};
4172 $lengths[scalar @row - 6]++;
4173 foreach my $val (@row[4..$#row])
4174 {
4175 $min = $val if $min > $val;
4176 $max = $val if $max < $val;
4177 }
4178 $rowstr .= align_string( 4, pack "S<*", @row[4..$#row] );
4179 $rowstr .= pack "C4", $row[1], $row[0], $row[2], $row[3];
4180 }
4181 $sorttables .= pack "L<S<10", $pos, $min, $max, @lengths;
4182 }
4183 $sorttables .= $rowstr;
4184
4185
4186 $sorttables .= align_string( 4, pack "L<C*", scalar @multiple_weights / 2, @multiple_weights );
4187
4188
4189 $sorttables .= pack("L<", length($jamostr) / 8) . $jamostr;
4190
4191
4192
630f605c2… Alex*4193 add_registry_key( $nlskey, "Sorting\\Ids", "{$default_guid}" );
95aeb41c8… Alex*4194 foreach my $loc (sort keys %locales)
4195 {
4196
4197 my @parts = split /[-_]/, $loc;
4198 next if @parts > 1 && defined($locales{$parts[0]}) && $locales{$parts[0]} eq $locales{$loc};
4199 next if @parts > 2 && defined($locales{"$parts[0]-$parts[1]"}) && $locales{"$parts[0]-$parts[1]"} eq $locales{$loc};
630f605c2… Alex*4200 add_registry_string_value( $nlskey, "Sorting\\Ids", $loc, "\{$locales{$loc}\}" );
95aeb41c8… Alex*4201 }
4202
4203
4204
4205 my @header;
4206 $header[0] = 16;
4207 $header[1] = $header[0] + length $table;
4208 $header[2] = $header[1] + length $casemaps;
4209 $header[3] = $header[2] + length $chartypes;
4210
4211 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
4212 print OUTPUT pack "L<*", @header;
4213 print OUTPUT $table, $casemaps, $chartypes, $sorttables;
4214 close OUTPUT;
4215 save_file($filename);
8cdb593f5… Alex*4216 return $chartypes;
4217 }
4218
4219
4220 my %lcnames;
4221
4222 sub locale_parent($)
4223 {
4224 my $loc = shift;
4225
4226 return undef unless $loc;
4227 return $lcnames{$loc}->{sparent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{sparent};
4228 return $lcnames{$loc}->{parent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{parent};
4229 if ($loc =~ /(.*)-[0-9A-Za-z]+/) { return $1; }
4230 return "";
4231 }
4232
4233 sub compare_locales
4234 {
4235 (my $n1 = $a) =~ tr/A-Z_/a-z-/;
4236 (my $n2 = $b) =~ tr/A-Z_/a-z-/;
4237 return $n1 cmp $n2;
4238 }
4239
4240
4241 sub xml_query($$)
4242 {
4243 my ($xml, $query) = @_;
4244 my $ret = $xml->find( $query );
4245 return undef unless $ret;
4246 printf STDERR "multiple entries for %s\n", $query if (@{$ret} > 1);
4247 return @{$ret}[0]->textContent;
4248 }
4249
4250
4251 sub loc_query($$)
4252 {
4253 my ($loc, $query) = @_;
4254
4255 $loc = $lcnames{"en-US"} unless $loc->{name};
4256
4257 for (my $cur = $loc->{name}; defined $cur; $cur = locale_parent( $cur ))
4258 {
4259 next unless defined $lcnames{$cur};
4260 my $xml = $lcnames{$cur}->{xml};
4261 my $ret = $xml->find( $query );
4262 next unless $ret;
4263 printf STDERR "%s: multiple entries for %s\n", $cur, $query if (@{$ret} > 1);
4264 next if @{$ret}[0]->textContent eq "\x{2191}\x{2191}\x{2191}"; ** Warning **
Wide character in print at /home/winehq/opt/source/source line 975, <$git> line 4270.
4265 return @{$ret}[0]->textContent;
4266 }
4267 return undef;
4268 }
4269
4270
4271 sub locale_entry($$$)
4272 {
4273 my ($loc, $field, $def) = @_;
4274
4275 return $loc->{$field} if defined $loc->{$field};
4276
4277 unless ($loc->{name})
4278 {
4279 $loc = $lcnames{"en-US"};
4280 return $loc->{$field} if defined $loc->{$field};
4281 }
4282 while (defined $loc->{alias})
4283 {
4284 $loc = $lcnames{$loc->{alias}};
4285 return $loc->{$field} if defined $loc->{$field};
4286 }
4287 my $cur = $loc->{name};
4288 while ($cur)
4289 {
4290 if (defined $lcnames{$cur} && defined $lcnames{$cur}->{sparent})
4291 {
4292 $cur = $lcnames{$cur}->{sparent};
4293 }
4294 elsif ($cur =~ /(.*)-[0-9A-Za-z]+/)
4295 {
4296 $cur = $1;
4297 }
4298 else
4299 {
4300 return $def;
4301 }
4302 return $lcnames{$cur}->{$field} if defined $lcnames{$cur} && defined $lcnames{$cur}->{$field};
4303 }
4304 return $def;
4305 }
4306
4307 my $string_data;
4308
4309 sub add_str_data($)
4310 {
4311 my $txt = shift;
4312 my $ret = index( $string_data, $txt );
4313 if ($ret == -1)
4314 {
4315 $ret = length($string_data);
4316 $string_data .= $txt
4317 }
4318 return $ret / 2;
4319 }
4320
4321 sub add_string($)
4322 {
4323 my $str = shift;
4324 return 0 unless defined($str) && $str ne "";
4325 my $utf = encode( "UTF16LE", $str );
4326 return add_str_data( (pack "S<", length($utf) / 2) . $utf . (pack "S", 0) );
4327 }
4328
4329 sub add_fontsig(@)
4330 {
4331 return add_str_data( pack "S<L<*", scalar(@_) * 2, @_ );
4332 }
4333
4334 sub add_strarray(@)
4335 {
4336 return 0 unless @_;
4337 return add_str_data( pack "S<L<*", scalar @_, map { add_string($_) } @_);
4338 }
4339
4340 sub format_to_grouping($)
4341 {
4342 my $format = shift;
4343 if ($format =~ /
4344 if ($format =~ /
4345
4346 return chr(3);
4347 }
4348
4349 sub parse_currency_format($$)
4350 {
4351 my $name = shift;
4352 my ($posfmt, $negfmt) = split /;/, shift;
4353 my @pospatterns = ( "\xa4[^\xa0]*#",
4354 "00[^\xa0]*\xa4",
4355 "\xa4.*\xa0.*#",
4356 "00.*\xa0.*\xa4" );
4357 my @negpatterns = ( "\\(\xa4[^\xa0]*#",
4358 "-\xa4[^\xa0]*#",
4359 "\xa4[^\xa0]*-#",
4360 "\xa4[^\xa0]*#.*00-",
4361 "00[^\xa0]*\xa4\\)",
4362 "-#.*00[^\xa0]*\xa4",
4363 "00-[^\xa0]*\xa4",
4364 "00[^\xa0]*\xa4-",
4365 "-#.*00.*\xa0.*\xa4",
4366 "-\xa4.*\xa0.*#",
4367 "00.*\xa0.*\xa4-",
4368 "\xa4.*\xa0.*#.*00-",
4369 "\xa4.*\xa0.*-#",
4370 "00-.*\xa0.*\xa4",
4371 "\\(\xa4.*\xa0.*#",
4372 "00.*\xa0.*\xa4\\)");
4373 my ($pos, $neg);
4374
4375 for ($pos = 0; $pos < @pospatterns; $pos++)
4376 {
4377 last if ($posfmt =~ /$pospatterns[$pos]/);
4378 }
4379
4380 $pos = 0 if ($pos == @pospatterns);
4381
4382 if (defined $negfmt)
4383 {
4384 for ($neg = 0; $neg < @negpatterns; $neg++)
4385 {
4386 last if ($negfmt =~ /$negpatterns[$neg]/);
4387 }
4388
4389 $neg = 0 if ($neg == @negpatterns);
4390 }
4391 elsif ($pos == 0) { $neg = 1; }
4392 elsif ($pos == 1) { $neg = 5; }
4393 elsif ($pos == 2) { $neg = 9; }
4394 elsif ($pos == 3) { $neg = 8; }
4395
4396 return ($pos, $neg);
4397 }
4398
4399 sub parse_percent_format($)
4400 {
4401 my $fmt = shift;
4402 my @patterns = ( "0.+%",
4403 "0%",
4404 "%#",
4405 "%.+#" );
4406 my $pos;
4407 for ($pos = 0; $pos < @patterns; $pos++)
4408 {
4409 last if ($fmt =~ /$patterns[$pos]/);
4410 }
4411 printf STDERR "unknown format '%s'\n", $fmt if ($pos == @patterns);
4412 return ($pos, ($pos == 3) ? 7 : $pos);
4413 }
4414
4415 sub convert_date_format($)
4416 {
4417 my $fmt = shift;
65fc470df… Alex*4418 $fmt =~ s/G+/gg/;
8cdb593f5… Alex*4419 $fmt =~ s/LLLL/MMMM/;
4420 $fmt =~ s/LLL/MMM/;
4421 $fmt =~ s/E+/dddd/;
4422 $fmt =~ s/ccc+/dddd/;
65fc470df… Alex*4423 $fmt =~ s/([^gy])y([^y])/$1yyyy$2/;
8cdb593f5… Alex*4424 $fmt =~ s/^y([^y])/yyyy$1/;
65fc470df… Alex*4425 $fmt =~ s/([^gy])y$/$1yyyy/;
8cdb593f5… Alex*4426 return $fmt;
4427 }
4428
4429 sub convert_time_format($)
4430 {
4431 my $fmt = shift;
4432 $fmt =~ s/a+/tt/;
4433 $fmt =~ s/B+/tt/;
198de0dcb… Alex*4434 $fmt =~ s/\x{202f}/ /;
8cdb593f5… Alex*4435 return $fmt;
4436 }
4437
4438 sub load_iso639()
4439 {
4440 my %iso639;
50c5eb31c… Alex*4441 my $DATA = open_data_file( "iso639", "iso-639-3_Code_Tables_$ISO639VERSION/iso-639-3_$ISO639VERSION.tab" );
8cdb593f5… Alex*4442 while (<$DATA>)
4443 {
4444 if (/^\s*[a-z]{3}\s+[a-z]{3}\s+([a-z]{3})\s+([a-z]{2})\s/) { $iso639{$2} = $1; }
4445 }
4446 close $DATA;
4447 return %iso639;
4448 }
4449
4450
4451
4452
4453 sub build_locale_data()
4454 {
4455 my $base = "cldr-release-$CLDRVERSION";
cfaa28933… Alex*4456 my $suppl = load_xml_data_file( "cldr", "$base/common/supplemental/supplementalData.xml" );
4457 my $subtags = load_xml_data_file( "cldr", "$base/common/supplemental/likelySubtags.xml" );
4458 my $numbers = load_xml_data_file( "cldr", "$base/common/supplemental/numberingSystems.xml" );
8cdb593f5… Alex*4459
cfaa28933… Alex*4460 my $phone = load_xml_data_file( "cldr33", "common/supplemental/telephoneCodeData.xml" );
8cdb593f5… Alex*4461 my %iso639 = load_iso639();
4462 $string_data = pack "S2", 0, 0;
4463
4464 %lcnames = map { $_->{name} => $_ } @locales;
4465
4466 my %lcids;
4467 foreach my $loc (@locales) { $lcids{$loc->{lcid}} = $loc if defined $loc->{lcid}; }
4468
4469 my %days = ( "sun" => 0, "mon" => 1, "tue" => 2, "wed" => 3, "thu" => 4, "fri" => 5, "sat" => 6 );
4470
4471
4472
4473 foreach my $loc (@locales)
4474 {
4475 next if $loc->{name} eq "";
4476 next if defined $loc->{parent};
4477 (my $unix_name = $loc->{name}) =~ s/-/_/g;
50c5eb31c… Alex*4478 my $parent = xml_query( $suppl, "/supplementalData/parentLocales[not(\@component)]/parentLocale[contains(concat(' ',\@locales,' '),' $unix_name ')]/\@parent" );
8cdb593f5… Alex*4479 if ($parent)
4480 {
4481 $parent =~ s/_/-/g;
4482 $parent = "" if $parent eq "root";
4483 }
4484 elsif ($loc->{name} =~ /(.*)-[0-9A-Za-z]+/) { $parent = $1; }
4485 $loc->{parent} = $parent || "";
4486 }
4487
4488
4489
4490 foreach my $loc (@locales)
4491 {
4492 next if defined $loc->{alias};
4493 (my $file = $loc->{file} || $loc->{name}) =~ s/-/_/g;
4494 $file = "$base/" . ($loc->{dir} || "common") . "/main/$file.xml";
cfaa28933… Alex*4495 my $xml = load_xml_data_file( "cldr", $file );
8cdb593f5… Alex*4496 $loc->{xml} = $xml;
4497 $loc->{language} ||= xml_query( $xml, "/ldml/identity/language/\@type" );
4498 $loc->{territory} ||= xml_query( $xml, "/ldml/identity/territory/\@type" );
4499 $loc->{script} = xml_query( $xml, "/ldml/identity/script/\@type" );
4500 if (!defined($loc->{territory}) && $loc->{name} =~ /-([A-Z]{2}|[0-9]{3})$/) { $loc->{territory} = $1; }
4501 if (!defined($loc->{script}) && $loc->{name} =~ /-([A-Z][a-z]{3})(-[A-Z]{2})?$/) { $loc->{script} = $1; }
4502 }
4503
4504
4505
4506 foreach my $loc (@locales)
4507 {
4508 next if defined $loc->{alias};
4509 next if defined $loc->{territory};
4510 my $id = $loc->{sortlocale};
4511 if (defined $id && ($id =~ /[-_]([A-Z0-9]+)$/))
4512 {
4513 $loc->{territory} = $1;
4514 next;
4515 }
4516 my @children = grep /^$loc->{name}-[A-Z0-9]+$/ && !defined $lcnames{$_}->{alias}, keys %lcnames;
4517 if (@children == 1)
4518 {
4519 $id = $children[0];
4520 }
4521 else
4522 {
4523 my $name = $loc->{file} || $loc->{name};
b591b3167… Alex*4524 $name =~ s/-(Arab|Beng|Cyrl|Deva|Guru|Hans|Hant|Latn|Tfng|Vaii)$//;
8cdb593f5… Alex*4525 $name =~ s/-/_/g;
4526 $id = xml_query( $subtags, "/supplementalData/likelySubtags/likelySubtag[\@from='$name']/\@to" );
4527 $id =~ s/_/-/g if $id;
4528 }
4529 if ($id =~ /[-_]([A-Z0-9]+)$/)
4530 {
4531 $loc->{territory} = $1;
4532 next if defined $loc->{sortlocale};
4533 next unless $id =~ /^$loc->{name}/;
4534 while (defined $lcnames{$id} && defined $lcnames{$id}->{alias}) { $id = $lcnames{$id}->{alias}; }
4535 $loc->{sortlocale} = $id if defined $lcnames{$id};
4536 next;
4537 }
4538 print STDERR "no territory found for $loc->{name}\n";
4539 }
4540
ad02ef7be… Alex*4541
4542
4543 my %geotable;
4544 foreach my $geo (@geoids)
4545 {
4546 my $name = $geo->{name};
4547 next unless defined $name;
4548 $geo->{alias} = $geotable{$name} if defined $geotable{$name};
4549 $geotable{$name} ||= $geo;
4550 }
4551 foreach my $loc (@locales)
4552 {
4553 next if defined $loc->{alias};
4554 my $territory = $loc->{territory};
4555 $geotable{$territory} ||= { name => $territory };
4556 }
4557 foreach my $name (keys %geotable)
4558 {
4559 my $geo = $geotable{$name};
4560 $geo->{dialcode} = xml_query( $phone, "(/supplementalData/telephoneCodeData/codesByTerritory[\@territory='$name']/telephoneCountryCode)[1]/\@code" );
4561 if ($name =~ /\d+/)
4562 {
4563 $geo->{uncode} = $name;
4564 next;
4565 }
4566 $geo->{iso2} = $name;
4567 $geo->{iso3} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@alpha3");
4568 $geo->{uncode} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@numeric");
4569 $geo->{sintlsymbol} ||= xml_query( $suppl, "(/supplementalData/currencyData/region[\@iso3166='$name']/currency[not(\@to)])[1]/\@iso4217") || "XXX";
4570 $geo->{sintlsymbol} =~ s/XXX/XDR/;
4571 }
4572 foreach my $geo (@geoids)
4573 {
4574 $geo->{parentid} = $geotable{$geo->{parent}}->{id} if defined $geo->{parent};
4575 next if defined $geo->{iso2};
4576 next if defined $geo->{alias};
4577 next unless defined $geo->{uncode};
4578 my @contains;
4579 my $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and not(\@status)]/\@contains");
4580 push @contains, split /\s+/, $list if defined $list;
4581 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and \@status='deprecated']/\@contains");
4582 push @contains, split /\s+/, $list if defined $list;
4583 while (@contains)
4584 {
4585 my $territory = pop @contains;
4586 if (defined $geotable{$territory})
4587 {
4588 $geotable{$territory}->{parentid} ||= $geo->{id};
4589 }
4590 elsif ($territory =~ /\d+/)
4591 {
4592
4593 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$territory' and not(\@status)]/\@contains" );
4594 push @contains, split /\s+/, $list if defined $list;
4595 }
4596 }
4597 }
4598
65fc470df… Alex*4599
4600
4601 foreach my $cal (@calendars)
4602 {
4603 next unless defined $cal->{locale};
4604 my $loc = $lcnames{$cal->{locale}};
4605 $loc->{calendar} = [ ] unless defined $loc->{calendar};
4606 push @{$loc->{calendar}}, $cal;
4607 }
4608
8cdb593f5… Alex*4609
4610
4611 foreach my $loc (@locales)
4612 {
4613 next unless defined $loc->{alias};
4614 next if defined $loc->{lcid};
4615 my $alias = $loc->{alias};
4616 my $lcid = $lcnames{$alias}->{lcid} || 0x1000;
4617 $loc->{lcid} = $lcid | 0x80000000;
4618 }
4619
4620
4621
4622 foreach my $loc (@locales)
4623 {
4624 next unless $loc->{name} =~ /_/;
4625 next unless defined $loc->{alias};
4626 my $alias = $loc->{alias};
4627 my $parent = $lcnames{$alias};
4628 my $basename = $parent->{name};
4629 while (1)
4630 {
4631 @{$parent->{sortnames}}[($loc->{lcid} >> 16) - 1] = $loc->{name};
4632 $alias = locale_parent( $alias );
4633 last unless $alias && defined $lcnames{$alias};
4634 $parent = $lcnames{$alias};
4635 last if defined $parent->{sortbase} && $parent->{sortbase} ne $basename;
4636 $parent->{sortbase} = $basename;
4637 }
4638 }
4639
4640
4641
4642 my $idx = 0;
4643 foreach my $loc (@locales)
4644 {
4645 next if defined $loc->{alias};
4646 $loc->{idx} = $idx++;
4647 }
4648 foreach my $loc (@locales)
4649 {
4650 my $alias = $loc->{alias};
4651 next unless defined $alias;
4652 while (defined $lcnames{$alias}->{alias}) { $alias = $lcnames{$alias}->{alias}; }
4653 $loc->{idx} = $lcnames{$alias}->{idx};
4654 }
4655
4656
4657
4658 my $lcid_data = "";
4659 foreach my $id (sort { $a <=> $b } keys %lcids)
4660 {
4661 my $loc = $lcids{$id};
4662 $lcid_data .= pack "L<S<2", $id, $loc->{idx}, add_string($loc->{name});
4663 }
4664
4665
4666
4667 my $lcname_data = "";
4668 foreach my $name (sort compare_locales keys %lcnames)
4669 {
4670 my $loc = $lcnames{$name};
4671 $lcname_data .= pack "S<2L<", add_string($name), $loc->{idx}, $loc->{lcid} || 0x1000;
4672 }
4673
4674
4675
4676 my $locale_data = "";
4677 my $default_lcid = 0x8001;
4678 foreach my $loc (@locales)
4679 {
4680 next if defined $loc->{alias};
4681 my $sname = $loc->{name};
4682 my $language = $loc->{language};
4683 my $territory = $loc->{territory};
4684 my $script = $loc->{script};
4685 my $neutral = ($sname && $sname !~ /-$territory/);
4686 my $sparent = $loc->{sparent} || (($sname =~ /(.*)-[0-9A-Za-z]+/) ? $1 : $loc->{parent});
4687 my $unique_lcid = $loc->{lcid};
4688 unless (defined $unique_lcid) { $unique_lcid = $default_lcid++; }
ad02ef7be… Alex*4689 my $geo = $geotable{$territory};
8cdb593f5… Alex*4690 my $territory_match = "contains(concat(' ',normalize-space(\@territories),' '),' $territory ')";
4691
4692
4693
4694 my $ssortlocale = $loc->{sortlocale} || ($neutral ? "$sname-$territory" : $sname);
4695 my $idefaultlanguage = defined $lcnames{$ssortlocale} ? $lcnames{$ssortlocale}->{lcid} : undef;
4696 $idefaultlanguage = $lcnames{"en-US"}->{lcid} unless $ssortlocale;
4697 (my $siso639langname = $sname) =~ s/-.*$//;
4698 my $siso639langname2 = $iso639{$siso639langname} || $siso639langname;
4699 my $sopentypelang = sprintf "%-4s", locale_entry( $loc, "sopentypelang", uc $siso639langname2 );
4700 my $sabbrevlangname = defined $loc->{lcid} ? locale_entry( $loc, "sabbrevlangname", uc $siso639langname2 ) : "ZZZ";
ad02ef7be… Alex*4701 my $siso3166ctryname2 = $geo->{iso3} || $geo->{uncode};
8cdb593f5… Alex*4702 my $senglanguage = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" ) || "";
4703 my $sengcountry = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" ) || "";
4704 my $snativelangname = loc_query( $loc, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" );
4705 my $snativectryname = loc_query( $loc, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" );
4706 $sengcountry =~ s/South Korea/Korea/;
8b442c29a… Piot*4707 $sengcountry =~ s/T\xfcrkiye/Turkey/;
8cdb593f5… Alex*4708 $snativelangname ||= $senglanguage;
4709 $snativectryname ||= $sengcountry;
4710 if ($script)
4711 {
4712 my $engscript = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4713 my $nativescript = loc_query( $loc, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4714 $senglanguage .= " ($engscript)" if $engscript;
4715 $snativelangname .= " ($nativescript)" if $nativescript;
4716 }
4717 my $sengdisplayname = $neutral ? $senglanguage : "$senglanguage ($sengcountry)";
4718 my $snativedisplayname = $neutral ? $snativelangname : "$snativelangname ($snativectryname)";
4719 $sengdisplayname =~ s/\) \(/, /;
4720 $snativedisplayname =~ s/\) \(/, /;
4721 my $sscripts = locale_entry( $loc, "sscripts", $script ) || xml_query( $suppl, "/supplementalData/languageData/language[\@type='$language' and not(\@alt)]/\@scripts" );
4722 $sscripts = (join ";", (sort split / /, ($sscripts || "Latn"))) . ";";
4723 my $ireadinglayout = locale_entry( $loc, "ireadinglayout", 0 );
4724 my $charlayout = loc_query( $loc, "/ldml/layout/orientation/characterOrder" );
4725 if ($charlayout eq "right-to-left")
4726 {
4727 $ireadinglayout = 1;
4728 }
4729 elsif ($charlayout eq "top-to-bottom")
4730 {
4731 my $linelayout = loc_query( $loc, "/ldml/layout/orientation/lineOrder" );
4732 $ireadinglayout = $linelayout eq "right-to-left" ? 2 : 3;
4733 }
ad02ef7be… Alex*4734 my $igeoid = $geo->{id} || 0;
8cdb593f5… Alex*4735
4736
4737
4738 my $sdecimal = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/decimal" );
4739 my $slist = locale_entry( $loc, "slist", ";" );
4740 my $smondecimalsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyDecimal" ) || $sdecimal;
4741 my $sthousand = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/group" );
4742 $sthousand =~ s/\x{202f}/\x{00a0}/;
4743 my $smonthousandsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyGroup" ) || $sthousand;
4744 my $spositivesign = "";
1ad2cb51c… Niko*4745 my $snegativesign = "-";
8cdb593f5… Alex*4746 my $spercent = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/percentSign" );
4747 my $snan = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/nan" );
4748 my $sposinfinity = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/infinity" );
4749 my $sneginfinity = $sposinfinity ? "-$sposinfinity" : "";
4750 my $sgrouping = format_to_grouping( loc_query( $loc, "/ldml/numbers/decimalFormats[\@numberSystem='latn']/decimalFormatLength[not(\@type)]/decimalFormat/pattern" ));
4751 my $percentformat = loc_query( $loc, "/ldml/numbers/percentFormats[\@numberSystem='latn']/percentFormatLength[not(\@type)]/percentFormat/pattern" );
198de0dcb… Alex*4752 my $currencyformat = loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='accounting']/pattern[not(\@alt)]" ) ||
4753 loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='standard']/pattern[not(\@alt)]" );
8cdb593f5… Alex*4754 my $smongrouping = format_to_grouping( $currencyformat );
4755 my ($icurrency, $inegcurr) = parse_currency_format( $sname, $currencyformat );
4756 my ($ipospercent, $inegpercent) = parse_percent_format( $percentformat );
4757 my $native_numbering = loc_query( $loc, "/ldml/numbers/otherNumberingSystems/native" );
4172c0482… Alex*4758 my @snativedigits = split //, (locale_entry( $loc, "nativedigits", "" ) || xml_query( $numbers, "/supplementalData/numberingSystems/numberingSystem[\@id='$native_numbering']/\@digits" ));
8cdb593f5… Alex*4759 my $digitsubstitution = !(ord($snativedigits[0]) >= 0x600 && ord($snativedigits[0]) <= 0x6ff);
4760 my $measure = defined xml_query( $suppl, "/supplementalData/measurementData/measurementSystem[\@type='US' and $territory_match]" );
4761 my $papersize = defined xml_query( $suppl, "/supplementalData/measurementData/paperSize[\@type='US-Letter' and $territory_match]" );
4762
4763
4764
ad02ef7be… Alex*4765 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
4766 my $scurrency = $geo->{scurrency} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[\@alt='narrow']" );
8cdb593f5… Alex*4767 $scurrency ||= loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[not(\@alt)]" );
6d046dd98… Alex*4768 $scurrency ||= $geo->{sintlsymbol};
ad02ef7be… Alex*4769 $geo->{scurrency} = $scurrency if $scurrency;
8cdb593f5… Alex*4770 my $sengcurrname = $loc->{sengcurrname} || loc_query( $lcnames{en}, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" );
4771 my $snativecurrname = $loc->{sengcurrname} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" ) || $sengcurrname;
4772 my $icurrdigits = xml_query( $suppl, "/supplementalData/currencyData/fractions/info[\@iso4217='$sintlsymbol']/\@digits" );
4773 $icurrdigits = 2 unless defined $icurrdigits;
4774
4775
4776
4777 my $firstday = xml_query( $suppl, "/supplementalData/weekData/firstDay[not(\@alt) and $territory_match]/\@day" );
4778 my $ifirstdayofweek = $firstday ? $days{$firstday} : 1;
4779 my $firstweekofyear = (xml_query( $suppl, "/supplementalData/weekData/minDays[$territory_match]/\@count" ) || 0) == 4 ? 2 : 0;
4780 my $serastring = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/eras/eraAbbr/era[\@type='1' and not(\@alt)]" );
4781 my (@sdayname, @sabbrevdayname, @sshortestdayname);
4782 foreach my $d (sort { $days{$a} <=> $days{$b} } keys %days)
4783 {
4784 my $n = $days{$d};
4785 my %name;
4786 foreach my $type ()
4787 {
4788 $name{$type} = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/days/dayContext[\@type='format']/dayWidth[\@type='$type']/day[\@type='$d' and not(\@alt)]" );
4789 }
4790 push @sdayname, $name{wide};
4791 push @sabbrevdayname, $name{abbreviated} || $name{wide};
4792 push @sshortestdayname, $name{short} || $name{abbreviated} || $name{wide};
4793 }
4794 my (@smonthname, @sabbrevmonthname, @sgenitivemonth, @sabbrevgenitivemonth);
4795 foreach my $n (1..13)
4796 {
4797 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='wide']/month[\@type='$n']" );
4798 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4799 my $genitive = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n']" );
4800 my $abbrevgen = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4801 push @smonthname, $name || $genitive || "";
4802 push @sabbrevmonthname, $abbrev || $abbrevgen || $name || $genitive || "";
4803 push @sgenitivemonth, $genitive || "";
4804 push @sabbrevgenitivemonth, $abbrevgen || $genitive || "";
4805 }
4806 @sgenitivemonth = () if join("|",@smonthname) eq join("|",@sgenitivemonth);
4807 @sabbrevgenitivemonth = () if join("|",@sabbrevmonthname) eq join("|",@sabbrevgenitivemonth);
4808 my %caltypes = ( "gregorian" => 1, "japanese" => 3, "chinese" => 4, "dangi" => 5, "islamic" => 6, "buddhist" => 7, "hebrew" => 8,
4809 "persian" => 22, "islamic-civil" => 23, "islamic-umalqura" => 23 );
4810 my $calpref = xml_query( $suppl, "/supplementalData/calendarPreferenceData/calendarPreference[$territory_match]/\@ordering" ) || "gregorian";
4811 my $icalendartype;
4812 my @scalnames;
4813 foreach my $c (split /\s+/, $calpref)
4814 {
4815 next unless defined $caltypes{$c};
4816 $icalendartype .= chr($caltypes{$c});
4817 $scalnames[$caltypes{$c} - 1] = loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$c']" );
4818 }
4819
4820
4821
4822 my $s1159 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='am' and not(\@alt)]" );
4823 my $s2359 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='pm' and not (\@alt)]" );
4824 my $sshortestam = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='am' and not(\@alt)]" );
4825 my $sshortestpm = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='pm' and not (\@alt)]" );
4826 my @stimeformat = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='medium']/timeFormat/pattern[not(\@alt)]" ));
4827 push @stimeformat, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4828 pop @stimeformat if $stimeformat[0] eq $stimeformat[1];
4829 @stimeformat = map convert_time_format($_), @stimeformat;
4830 my @sshorttime = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='short']/timeFormat/pattern[not(\@alt)]" ));
4831 push @sshorttime, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hm' and not(\@alt)]" );
4832 pop @sshorttime if $sshorttime[0] eq $sshorttime[1];
4833 @sshorttime = map convert_time_format($_), @sshorttime;
4834 my @sshortdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
4835 push @sshortdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
4836 @sshortdate = map convert_date_format($_), @sshortdate;
4837 my @slongdate = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" ));
4838 push @slongdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
4839 @slongdate = map convert_date_format($_), @slongdate;
4840 my @smonthday = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMd' and not(\@alt)]" ));
4841 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Md' and not(\@alt)]" );
4842 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMd' and not(\@alt)]" );
4843 @smonthday = map convert_date_format($_), @smonthday;
4844 my @syearmonth = map convert_date_format($_), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMM' and not(\@alt)]" );
4845 my @sduration = map convert_time_format( lc $_ ), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4846 my $srelativelongdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMEd' and not(\@alt)]" ) ||
4847 loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMEd' and not(\@alt)]" );
4848 $srelativelongdate = convert_date_format( $srelativelongdate );
4849
65fc470df… Alex*4850 if (defined $loc->{calendar})
4851 {
4852 foreach my $cal (@{$loc->{calendar}})
4853 {
4854 $cal->{sshortdate} = \@sshortdate;
4855 $cal->{syearmonth} = \@syearmonth;
4856 $cal->{slongdate} = \@slongdate;
4857 $cal->{serastring} = [ $serastring ];
4858 $cal->{sdayname} = \@sdayname;
4859 $cal->{sabbrevdayname} = \@sabbrevdayname;
4860 $cal->{smonthname} = \@smonthname;
4861 $cal->{sabbrevmonthname} = \@sabbrevmonthname;
4862 $cal->{scalname} = $scalnames[$cal->{id}];
4863 $cal->{smonthday} = \@smonthday;
4864 $cal->{sshortestdayname} = \@sshortestdayname;
4865 $cal->{sabbreverastring} = [ $serastring ];
4866 $cal->{sshortestdayname} = \@sshortestdayname;
4867 $cal->{srelativelongdate} = $srelativelongdate;
4868 }
4869 }
4870
8cdb593f5… Alex*4871
4872
4873 my %ansicpmap = ( 437 => 1252, 720 => 1256, 737 => 1253, 775 => 1257, 850 => 1252,
4874 852 => 1250, 855 => 1251, 866 => 1251, 857 => 1254, 862 => 1255 );
4875 my %maccpmap = ( 437 => 10000, 720 => 10004, 737 => 10006, 775 => 10029, 850 => 10000,
4876 852 => 10029, 855 => 10007, 857 => 10081, 862 => 10005, 866 => 10007,
4877 874 => 10021, 932 => 10001, 936 => 10008, 949 => 10003, 950 => 10002,
4878 1258 => 10000 );
4879 my %ebcdiccpmap = ( 437 => 37, 720 => 20420, 737 => 20273, 866 => 20880, 932 => 20290 );
4880 my %codepagemasks = ( 874 => [ 0x01000000, 0x00000000, 0x00000000, 0, 0x00010000, 0x00000000, 0x00010000, 0x00000000 ],
4881 932 => [ 0x00000000, 0x28c70000, 0x00000010, 0, 0x00020000, 0x00000000, 0x00020000, 0x00000000 ],
4882 936 => [ 0x00000000, 0x28010000, 0x00000002, 0, 0x00040000, 0x00000000, 0x00040000, 0x00000000 ],
4883 949 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00080000, 0x00000000, 0x00080000, 0x00000000 ],
4884 950 => [ 0x00000000, 0x28c10000, 0x00000012, 0, 0x00100000, 0x00000000, 0x00100000, 0x00000000 ],
4885 1258 => [ 0x2000000f, 0x00000000, 0x00000000, 0, 0x00000100, 0x00008000, 0x00000100, 0x00008000 ],
4886 866 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x00020000, 0x00000004, 0x02020000 ],
4887 862 => [ 0x00000800, 0x40000000, 0x00000000, 0, 0x00000020, 0x00200000, 0x00000020, 0x00200000 ],
4888 857 => [ 0x0000001f, 0x00000000, 0x00000000, 0, 0x00000010, 0x01000000, 0x00000010, 0x01000000 ],
4889 855 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x02000000, 0x00000004, 0x02000000 ],
4890 852 => [ 0x00000027, 0x00000000, 0x00000000, 0, 0x00000002, 0x04000000, 0x00000002, 0x04000000 ],
4891 775 => [ 0x00000007, 0x00000000, 0x00000000, 0, 0x00000080, 0x08000000, 0x00000080, 0x08000000 ],
4892 737 => [ 0x00000080, 0x00000000, 0x00000000, 0, 0x00000008, 0x10000000, 0x00000008, 0x10010000 ],
4893 720 => [ 0x00002000, 0x00000000, 0x00000000, 0, 0x00000040, 0x20000000, 0x00000040, 0x20080000 ],
4894 850 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x40000000, 0x0000019f, 0xdfd70000 ],
4895 437 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x80000000, 0x0000019f, 0xdfd70000 ],
4896 65001 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00000000, 0x00000000, 0x0000019f, 0xdfd70000 ] );
4897 my $oemcp = locale_entry( $loc, "oemcp", 65001 );
4898 my $maccp = locale_entry( $loc, "maccp", undef ) || $maccpmap{$oemcp} || 65001;
4899 my $ebcdiccp = locale_entry( $loc, "ebcdiccp", undef ) || $ebcdiccpmap{$oemcp} || 500;
4900 $ebcdiccp = 500 if (defined $loc->{oemcp} && $loc->{oemcp} == 65001) || (defined $loc->{maccp} && $loc->{maccp} == 65001);
4901 my $ansicp = $ansicpmap{$oemcp} || $oemcp;
4902 my @fontsig = (0) x 8;
4903 my $sig = locale_entry( $loc, "fontsig", [] );
4904 foreach my $i (0..7) { $fontsig[$i] |= $codepagemasks{$oemcp}->[$i]; }
4905 foreach my $i (0..$
4906 $fontsig[3] |= 1 << 31;
4907 $fontsig[3] |= 1 << 27 if $ireadinglayout == 1;
4908 $fontsig[3] |= 1 << 28 if $ireadinglayout == 3;
4909
4910
4911
4912 unless ($loc->{name})
4913 {
4914 $siso639langname = "iv";
4915 $siso639langname2 = "ivl";
4916 $senglanguage = $snativelangname = "Invariant Language";
4917 $sengcountry = $snativectryname = "Invariant Country";
4918 $sengdisplayname = "Invariant Language (Invariant Country)";
4919 $snativedisplayname = "Invariant Language (Invariant Region)";
4920 $sengcurrname = $snativecurrname = "International Monetary Fund";
4921 $scurrency = "\x{00a4}";
4922 $ifirstdayofweek = 0;
ad02ef7be… Alex*4923 $igeoid = $geotable{"US"}->{id};
8cdb593f5… Alex*4924 @stimeformat = ("HH:mm:ss");
4925 @sshortdate = ("MM/dd/yyyy", "yyyy-MM-dd");
4926 @slongdate = ("dddd, dd MMMM yyyy");
4927 @syearmonth = ("yyyy MMMM");
4928 @smonthday = ("MMMM dd", "MMMM d", "M/d", "MMM d");
4929 @sshorttime = ("HH:mm", "hh:mm tt", "H:mm", "h:mm tt");
4930 $srelativelongdate = "dddd, MMMM dd";
4931 $sposinfinity = "Infinity";
4932 $sneginfinity = "-Infinity";
4933 $spositivesign = "+";
4934 $ipospercent = $inegpercent = 0;
4935 }
4936
4937
4938
4939 $locale_data .= pack "L<2",
4940 add_string( $sname ),
4941 add_string( $sopentypelang );
4942
4943 $locale_data .= pack "S<14",
4944 $loc->{lcid} || 0x1000,
4945 $unique_lcid,
4946 locale_entry( $loc, "idigits", 2 ),
4947 locale_entry( $loc, "inegnumber", 1 ),
4948 $icurrdigits,
4949 $icurrency,
4950 $inegcurr,
4951 locale_entry( $loc, "ilzero", 1 ),
4952 !$neutral,
4953 $ifirstdayofweek,
4954 $firstweekofyear,
ad02ef7be… Alex*4955 $geo->{dialcode} || 1 ,
8cdb593f5… Alex*4956 $measure,
4957 $digitsubstitution;
4958
4959 $locale_data .= pack "L<18",
4960 add_string( $sgrouping ),
4961 add_string( $smongrouping ),
4962 add_string( $slist ),
4963 add_string( $sdecimal ),
4964 add_string( $sthousand ),
4965 add_string( $scurrency ),
4966 add_string( $smondecimalsep ),
4967 add_string( $smonthousandsep ),
4968 add_string( $spositivesign ),
4969 add_string( $snegativesign ),
4970 add_string( $s1159 ),
4971 add_string( $s2359 ),
4972 add_strarray( @snativedigits ),
4973 add_strarray( @stimeformat ),
4974 add_strarray( @sshortdate ),
4975 add_strarray( @slongdate ),
4976 add_strarray( @syearmonth ),
4977 add_strarray( @sduration );
4978
4979 $locale_data .= pack "S<8",
4980 $idefaultlanguage || 0x1000,
4981 $ansicp,
4982 $oemcp,
4983 $maccp,
4984 $ebcdiccp,
ad02ef7be… Alex*4985 $igeoid < 65536 ? $igeoid : 39070,
8cdb593f5… Alex*4986 $papersize ? 1 : 9,
4987 0;
4988
4989 $locale_data .= pack "L<24",
4990 add_string( $icalendartype ),
4991 add_string( $sabbrevlangname ),
4992 add_string( $siso639langname ),
4993 add_string( $senglanguage ),
4994 add_string( $snativelangname ),
4995 add_string( $sengcountry ),
4996 add_string( $snativectryname ),
4997 add_string( $siso3166ctryname2 ),
4998 add_string( $territory ),
4999 add_string( $sintlsymbol ),
5000 add_string( $sengcurrname ),
5001 add_string( $snativecurrname ),
5002 add_fontsig( @fontsig ),
5003 add_string( $siso639langname2 ),
5004 add_string( $siso3166ctryname2 ),
5005 add_string( $sparent ),
5006 add_strarray( @sdayname ),
5007 add_strarray( @sabbrevdayname ),
5008 add_strarray( @smonthname ),
5009 add_strarray( @sabbrevmonthname ),
5010 add_strarray( @sgenitivemonth ),
5011 add_strarray( @sabbrevgenitivemonth ),
5012 add_strarray( @scalnames ),
5013 add_strarray( @{$loc->{sortnames}} );
5014
5015 $locale_data .= pack "S<6",
5016 $inegpercent,
5017 $ipospercent,
5018 0,
5019 $ireadinglayout,
5020 0x2a,
5021 0x2a;
5022
5023 $locale_data .= pack "L<24",
5024 0,
5025 add_string( $sengdisplayname ),
5026 add_string( $snativedisplayname ),
5027 add_string( $spercent ),
5028 add_string( $snan ),
5029 add_string( $sposinfinity ),
5030 add_string( $sneginfinity ),
5031 0,
5032 add_string( $serastring ),
5033 add_string( $serastring ),
5034 0,
5035 add_string( $ssortlocale ),
5036 add_strarray( @sshorttime ),
5037 add_strarray( @sshortestdayname ),
5038 0,
5039 add_string( $ssortlocale ),
5040 add_string( "0409:00000409" ),
5041 add_string( $sscripts ),
5042 add_string( $srelativelongdate ),
ad02ef7be… Alex*5043 $igeoid,
8cdb593f5… Alex*5044 add_string( $sshortestam || "a" ),
5045 add_string( $sshortestpm || "p" ),
5046 add_strarray( @smonthday ),
5047 add_string( "k0-windows-us" )
5048 }
5049
989f7d648… Alex*5050
5051
5052 my %groups;
630f605c2… Alex*5053 add_registry_key( $nlskey, "Locale", "00000409" );
989f7d648… Alex*5054 foreach my $loc (@locales)
5055 {
5056 next unless defined $loc->{lcid};
5057 next if ($loc->{lcid} & 0x80000000);
5058 next if !defined($loc->{alias}) && $loc->{name} !~ /-$loc->{territory}/;
5059 my $group = locale_entry( $loc, "group", 1 );
5060 my $name = sprintf( "%08x", $loc->{lcid} );
5061 my $val = sprintf( "%x", $group );
630f605c2… Alex*5062 add_registry_string_value( $nlskey, "Locale", $name, $val ) unless ($loc->{lcid} & 0x000f0000);
5063 add_registry_string_value( $nlskey, "Locale\\Alternate Sorts", $name, $val ) if $loc->{name} =~ /_/;
989f7d648… Alex*5064 $groups{$val} = 1;
5065 }
630f605c2… Alex*5066 foreach my $group (keys %groups) { add_registry_string_value( $nlskey, "Language Groups", $group, "1" ); }
989f7d648… Alex*5067
65fc470df… Alex*5068
5069
5070 my $calendar_data = "";
5071 foreach my $cal (@calendars)
5072 {
5073 my $scalname = $cal->{name};
5074 my $iyearoffsetrange = 0;
5075 my $itwodigityearmax = $cal->{itwodigityearmax};
5076 my @sshortdate;
5077 my @syearmonth;
5078 my @slongdate;
5079 my @serastring;
5080 my @sdayname;
5081 my @sabbrevdayname;
5082 my @smonthname;
5083 my @sabbrevmonthname;
5084 my @smonthday;
5085 my @sabbreverastring;
5086 my @sshortestdayname;
5087
5088 my $type = $cal->{type};
5089 if (defined $cal->{locale} && defined $type)
5090 {
5091 my $loc = $lcnames{$cal->{locale}};
5092 my $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
5093 push @sshortdate, $fmt if $fmt;
5094 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMd' and not(\@alt)]" );
5095 push @sshortdate, $fmt if $fmt;
5096 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
5097 push @sshortdate, $fmt if $fmt;
5098 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMMMd' and not(\@alt)]" );
5099 push @sshortdate, $fmt if $fmt;
5100 @sshortdate = map convert_date_format($_), @sshortdate;
5101 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" );
5102 push @slongdate, $fmt if $fmt;
5103 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
5104 push @slongdate, $fmt if $fmt;
5105 @slongdate = map convert_date_format($_), @slongdate;
5106
5107 foreach my $n (1..13)
5108 {
5109 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n' and not(\@yeartype)]" );
5110 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n' and not(\@yeartype)]" );
5111 push @smonthname, $name || "";
5112 push @sabbrevmonthname, $abbrev || $name || "";
5113 }
5114
5115 $scalname ||= loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$type']" );
5116 if (defined $cal->{eras})
5117 {
5118 my @eras;
5119 my $idx = 1;
5120 foreach my $era (@{$cal->{eras}})
5121 {
5122 my $start = xml_query( $suppl, "/supplementalData/calendarData/calendar[\@type='$type']/eras/era[\@type='$era']/\@start" );
5123 next unless $start =~ /^(-?\d+)-(\d+)-(\d+)/;
5124 my ($year, $mon, $day, $zero, $first) = ($1, $2, $3, $1 - 1, 1);
5125 if ($zero < 0)
5126 {
5127 $first -= $zero;
5128 $year = 1;
5129 $itwodigityearmax = 2049 - $zero;
5130 }
5131 unshift @eras, pack( "S<8", 6, $idx++, $year, $mon, $day, $zero, $first, 0 );
5132 push @serastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraAbbr/era[\@type='$era']" );
5133 push @sabbreverastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraNarrow/era[\@type='$era']" );
5134 }
5135 $iyearoffsetrange = add_str_data( pack "S<L<*", scalar @eras, map { add_str_data($_); } @eras );
5136 }
5137 }
5138
5139 @sshortdate = @{$cal->{sshortdate}} if defined $cal->{sshortdate} && !@sshortdate;
5140 @syearmonth = @{$cal->{syearmonth}} if defined $cal->{syearmonth};
5141 @slongdate = @{$cal->{slongdate}} if defined $cal->{slongdate} && !@slongdate;
5142 @serastring = @{$cal->{serastring}} if defined $cal->{serastring} && !@serastring;
5143 @sdayname = @{$cal->{sdayname}} if defined $cal->{sdayname};
5144 @sabbrevdayname = @{$cal->{sabbrevdayname}} if defined $cal->{sabbrevdayname};
5145 @smonthname = @{$cal->{smonthname}} if defined $cal->{smonthname} && !join("",@smonthname);
5146 @sabbrevmonthname = @{$cal->{sabbrevmonthname}} if defined $cal->{sabbrevmonthname} && !join("",@sabbrevmonthname);
5147 @smonthday = @{$cal->{smonthday}} if defined $cal->{smonthday};
5148 @sabbreverastring = @{$cal->{sabbreverastring}} if defined $cal->{sabbreverastring} && !@sabbreverastring;
5149 @sshortestdayname = @{$cal->{sshortestdayname}} if defined $cal->{sshortestdayname};
5150 my $srelativelongdate = $cal->{srelativelongdate};
5151
5152 @serastring = ("A.D.") unless @serastring;
5153 @sabbreverastring = ("AD") unless @sabbreverastring;
5154
5155 if ($cal->{id} != 1)
5156 {
5157 @sshortdate = ("") unless @sshortdate;
5158 @syearmonth = ("") unless @syearmonth;
5159 @slongdate = ("") unless @slongdate;
5160 @sdayname = ("") x 7 unless @sdayname;
5161 @sabbrevdayname = ("") x 7 unless @sabbrevdayname;
5162 @sshortestdayname = ("") x 7 unless @sshortestdayname;
5163 @smonthname = ("") x 13 unless @smonthname;
5164 @sabbrevmonthname = ("") x 13 unless @sabbrevmonthname;
5165 @smonthday = ("") unless @smonthday;
5166 }
5167
5168 $calendar_data .= pack "S<2L<17",
5169 $cal->{id},
5170 $itwodigityearmax || 99,
5171 add_strarray( @sshortdate ),
5172 add_strarray( @syearmonth ),
5173 add_strarray( @slongdate ),
5174 add_strarray( @serastring ),
5175 $iyearoffsetrange,
5176 add_strarray( @sdayname ),
5177 add_strarray( @sabbrevdayname ),
5178 add_strarray( @smonthname ),
5179 add_strarray( @sabbrevmonthname ),
5180 add_string( $scalname ),
5181 add_strarray( @smonthday ),
5182 add_strarray( @sabbreverastring ),
5183 add_strarray( @sshortestdayname ),
5184 add_string( $srelativelongdate );
5185 }
5186
5187
5188
8cdb593f5… Alex*5189 my $nb_lcids = scalar keys %lcids;
5190 my $nb_locales = scalar grep { !defined $_->{alias} } @locales;
5191 my $nb_lcnames = scalar keys %lcnames;
5192 my $locale_size = length($locale_data) / $nb_locales;
65fc470df… Alex*5193 my $nb_calendars = scalar @calendars;
5194 my $calendar_size = length($calendar_data) / $nb_calendars;
8cdb593f5… Alex*5195 my $lcids_offset = 19 * 4;
5196 my $lcnames_offset = $lcids_offset + length $lcid_data;
5197 my $locales_offset = $lcnames_offset + length $lcname_data;
5198 my $calendar_offset = $locales_offset + length $locale_data;
65fc470df… Alex*5199 my $strings_offset = $calendar_offset + length $calendar_data;
8cdb593f5… Alex*5200
5201 my $locale_header = pack "L<7S<4L<S<2L<3S<2L<4",
5202 8,
5203 0,
5204 7,
5205 0x5344534e,
5206 0, 0, 0,
5207 0,
5208 $nb_lcids,
5209 $nb_locales,
5210 $locale_size,
5211 $locales_offset,
5212 $nb_lcnames,
5213 0,
5214 $lcids_offset,
5215 $lcnames_offset,
5216 0,
5217 $nb_calendars,
65fc470df… Alex*5218 $calendar_size,
8cdb593f5… Alex*5219 $calendar_offset,
5220 $strings_offset,
5221 0, 0;
5222
65fc470df… Alex*5223 return align_string( 4, $locale_header . $lcid_data . $lcname_data . $locale_data . $calendar_data . $string_data );
8cdb593f5… Alex*5224 }
5225
5226
9e6d0e459… Alex*5227
5228
5229 sub build_charmaps_data()
5230 {
5231 my $data = "";
5232
5233
b956620d8… Alex*5234 my @digits = (ord('0') .. ord('9'));
5235 $digitmap_table[0x3007] = $digits[0];
5236 @digitmap_table[0x0c78..0x0c7b] = @digits[0..3];
5237 @digitmap_table[0x0c7c..0x0c7e] = @digits[1..3];
5238 @digitmap_table[0x3021..0x3029] = @digits[1..9];
5239 @digitmap_table[0xa8e0..0xa8e9] = @digits;
5240 @digitmap_table[0x10107..0x1010f] = @digits[1..9];
5241 $digitmap_table[0x10320] = $digits[1];
5242 $digitmap_table[0x10321] = $digits[5];
9e6d0e459… Alex*5243 $data .= dump_binary_case_table( @digitmap_table );
5244
5245
5246 $data .= dump_binary_case_table( @cjk_compat_table );
5247
5248
5249 my (@hiragana_table, @katakana_table);
5250 foreach my $ch (0x3041..0x3096, 0x309d..0x309e)
5251 {
5252 $hiragana_table[$ch + 0x60] = $ch;
5253 $katakana_table[$ch] = $ch + 0x60;
5254 }
5255 $data .= dump_binary_case_table( @hiragana_table ) . dump_binary_case_table( @katakana_table );
5256
5257
5258 $halfwidth_table[0x2018] = 0x0027;
5259 $halfwidth_table[0x2019] = 0x0027;
5260 $halfwidth_table[0x201c] = 0x0022;
5261 $halfwidth_table[0x201d] = 0x0022;
5262 $halfwidth_table[0x309b] = 0xff9e;
5263 $halfwidth_table[0x309c] = 0xff9f;
5264 $fullwidth_table[0x309b] = 0x3099;
5265 $fullwidth_table[0x309c] = 0x309a;
5266 $data .= dump_binary_case_table( @halfwidth_table ) . dump_binary_case_table( @fullwidth_table );
5267
5268
5269 $data .= dump_binary_case_table( @chinese_traditional_table ) . dump_binary_case_table( @chinese_simplified_table );
5270
5271
5272
5273 return $data;
5274 }
5275
5276
ad02ef7be… Alex*5277
5278
5279 sub build_geoids_data()
5280 {
5281 my $data = "";
5282 my %index;
5283 my $idx = 0;
5284 my @geo_header = (0x00650067, 0x0000006f, 0, 4 * 7, scalar @geoids, 0, 0);
5285
5286 foreach my $geo (@geoids)
5287 {
5288 my $id = $geo->{id};
5289 $geo = $geo->{alias} if defined $geo->{alias};
5290 my $lat = "0.000";
5291 my $long = "0.000";
5292 my $iso2 = $geo->{iso2} || "XX";
5293 my $iso3 = $geo->{iso3} || "XX";
5294 my $isregion = $geo->{region} || (defined $geo->{uncode} && !defined $geo->{iso2});
5295 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
5296 my $scurrency = $geo->{scurrency} || "\x{00a4}";
5297
5298 $data .= pack( "L<", $id );
5299 $data .= pad_string( 24, encode( "UTF16LE", $lat ));
5300 $data .= pad_string( 24, encode( "UTF16LE", $long ));
5301 $data .= pack( "L<2", $isregion ? 14 : 16, $geo->{parentid} || 39070 );
5302 $data .= pad_string( 8, encode( "UTF16LE", $iso2 ));
5303 $data .= pad_string( 8, encode( "UTF16LE", $iso3 ));
5304 $data .= pack( "S<2", $geo->{uncode} || 0, $geo->{dialcode} || 0 );
5305 $data .= pad_string( 8, encode( "UTF16LE", $sintlsymbol ));
5306 $data .= pad_string( 16, encode( "UTF16LE", $scurrency ));
5307 $index{$geo->{name}} = $idx if $geo->{name};
5308 $idx++;
5309 }
5310 $index{"XX"} = $index{"001"};
5311
5312 $geo_header[5] = $geo_header[3] + length $data;
5313 $geo_header[6] = scalar keys %index;
5314
5315 foreach my $name (sort keys %index)
5316 {
5317 $data .= pad_string( 8, encode( "UTF16LE", $name ));
5318 $data .= pack "L<", $index{$name};
5319 }
5320
5321 $geo_header[2] = $geo_header[3] + length $data;
5322 return pack( "L<7", @geo_header ) . $data;
5323 }
5324
5325
8cdb593f5… Alex*5326
5327
5328 sub dump_locales($$)
5329 {
5330 my ($filename, $chartypes) = @_;
5331
5332 printf "Building $filename\n";
5333
5334 my $locale_data = build_locale_data();
9e6d0e459… Alex*5335 my $charmaps_data = build_charmaps_data();
ad02ef7be… Alex*5336 my $geoids_data = build_geoids_data();
8cdb593f5… Alex*5337 my $scripts_data = "";
5338
5339 my @header = ( 0 ) x 8;
5340 $header[0] = 4 * scalar @header;
5341 $header[4] = $header[0] + length $chartypes;
5342 $header[5] = $header[4] + length $locale_data;
5343 $header[6] = $header[5] + length $charmaps_data;
5344 $header[7] = $header[6] + length $geoids_data;
5345
5346 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5347 print OUTPUT pack "L<*", @header;
5348 print OUTPUT $chartypes, $locale_data, $charmaps_data, $geoids_data, $scripts_data;
5349 close OUTPUT;
5350 save_file($filename);
95aeb41c8… Alex*5351 }
5352
3d086ca2b… Alex*5353
3ec7c467c… Alex*5354
5355
5356 sub month_first_dow($$)
5357 {
5358 my ($year, $month) = @_;
5359 my @time = gmtime( timegm_modern( 0, 0, 0, 1, $month - 1, $year ));
5360 return $time[6];
5361 }
5362
5363
5364
5365
5366 sub compare_systime($$)
5367 {
5368 my ($a, $b) = @_;
5369 return $a->[0] <=> $b->[0] ||
5370 $a->[1] <=> $b->[1] ||
5371 $a->[2] <=> $b->[2] ||
5372 $a->[3] <=> $b->[3] ||
5373 $a->[4] <=> $b->[4] ||
5374 $a->[5] <=> $b->[5] ||
5375 $a->[6] <=> $b->[6];
5376 }
5377
5378
5379
5380
5381 sub compare_transition_date($$$$)
5382 {
5383 my ($stdoff, $isdst, $zone, $rule) = @_;
5384
5385 if (scalar @{$zone} <= 1)
5386 {
5387 return (!defined($zone->[0]) || $zone->[0] > $rule->[0]) ? 1 : -1;
5388 }
5389
5390 my @date = parse_transition_date( $stdoff, $isdst, $zone->[0], $zone->[1], $zone->[2], $zone->[3] || 0 );
5391 return compare_systime( \@date, $rule );
5392 }
5393
5394
5395
5396
5397 sub load_windows_zones()
5398 {
5399 my $current_name;
5400 my %names;
5401 my $base = "cldr-release-$CLDRVERSION";
cfaa28933… Alex*5402 my $INPUT = open_data_file( "cldr", "$base/common/supplemental/windowsZones.xml" );
3ec7c467c… Alex*5403 while (<$INPUT>)
5404 {
da387a9b5… Alex*5405 if (/<!-- +(\(UTC[^<]*) -->.*/)
3ec7c467c… Alex*5406 {
5407 $current_name = $1;
5408 }
5409 if (/<mapZone other="(.*)" territory="001" type="(.*)"\/>/)
5410 {
5411 $names{$1} = [ $current_name, $2 ];
5412 }
5413 }
5414 close $INPUT;
5415 return %names;
5416 }
5417
5418
5419
5420
5421 sub parse_transition_date($$@)
5422 {
5423 use integer;
5424 my ($stdoff, $isdst, $year, $in, $on, $at) = @_;
5425
5426 $on = "1" unless defined $on;
5427 $at = "0" unless defined $at;
5428
5429 my %months = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6,
5430 Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 );
5431 my %days = ( Sun => 0, Mon => 1, Tue => 2, Wed => 3, Thu => 4, Fri => 5, Sat => 6 );
5432
5433 my $mon = $in ? $months{$in} : 1;
5434 my ($week, $dow, $flag, $time, $sec);
5435 my $first = month_first_dow( $year, $mon );
5436
5437 if ($on =~ /^last(.*)$/)
5438 {
5439 $week = 5;
5440 $dow = $days{$1};
5441 }
5442 elsif ($on =~ /^(.*)>=(\d+)$/)
5443 {
5444 $dow = $days{$1};
5445 my $diff = ($first + 6 - $dow) % 7;
5446 $week = $2 >= 25 ? 5 : ($2 + 6 + $diff) / 7;
5447 }
5448 elsif ($on =~ /^(.*)<=(\d+)$/)
5449 {
5450 $dow = $days{$1};
5451 my $diff = ($first + $2 + 6 - $dow) % 7;
5452 $week = ($2 + 6 - $diff) / 7;
5453 if (!$week)
5454 {
5455 $week = 5;
5456 if (!--$mon) { $mon = 12; $year--; }
5457 }
5458 }
5459 elsif ($on =~ /^\d+$/)
5460 {
5461 $dow = ($first + $on - 1) % 7;
5462 $week = $on >= 25 ? 5 : ($on + 6) / 7;
5463 }
5464 else
5465 {
5466 die "unsupported date specification $year $in $on $at";
5467 }
5468
5469 if ($at =~ /^(\d+):(\d+):(\d+)([uws]?)$/)
5470 {
5471 $time = $1 * 60 + $2;
5472 $sec = $3;
5473 $flag = $4;
5474 }
5475 elsif ($at =~ /^(\d+):(\d+)([uws]?)$/)
5476 {
5477 $time = $1 * 60 + $2;
5478 $flag = $3;
5479 }
5480 elsif ($at =~ /^(\d+)([uws]?)$/)
5481 {
5482 $time = $1 * 60;
5483 $flag = $2;
5484 }
5485 else
5486 {
5487 die "unsupported time specification $year $in $on $at";
5488 }
5489
5490 $flag ||= "w";
5491 $time -= $stdoff if $flag eq "u";
5492 $time += 60 if !$isdst && $flag ne "w";
5493
5494 if ($time < 0)
5495 {
5496 $week-- if $week < 5 && $dow == month_first_dow( $year, $mon );
5497 $week-- if $week == 5 && $dow == month_first_dow( $year + ($mon == 12), $mon % 12 + 1 );
5498 if (!$week)
5499 {
5500 $week = 5;
5501 if (!--$mon) { $mon = 12; $year--; }
5502 }
5503 $dow = ($dow + 6) % 7;
5504 $time += 24 * 60;
5505 }
5506
5507 return ($year, $mon, $week, $dow, $time / 60, $time % 60, $sec || 0);
5508 }
5509
5510
5511
5512
5513 sub pack_systime(@)
5514 {
5515 my ($year, $mon, $week, $dow, $hour, $min, $sec) = @_;
5516 return pack "S<8", 0, $mon, $dow, $week, $hour < 24 ? ($hour, $min, $sec, 0) : (23, 59, 59, 999);
5517 }
5518
5519
5520
5521
5522 sub parse_tz_offset($)
5523 {
5524 my ($hour, $min) = split /:/, shift;
5525 $min ||= 0;
5526 return $hour < 0 ? -$hour * 60 + $min : -$hour * 60 - $min;
5527 }
5528
5529
5530
5531
5532 sub dump_timezones($@)
5533 {
5534 my $filename = shift;
5535 my $FIRST_YEAR = 2000;
5536 my $LAST_YEAR = 2030;
5537
5538 my %names = load_windows_zones();
5539 my %zones;
5540 my %rules;
5541 my %links;
5542 my %res_indices;
5543
5544 printf "Building $filename\n";
5545
5546 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5547 print OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
5548 print OUTPUT "#include \"winresrc.h\"\n\n";
5549 print OUTPUT "#pragma makedep po\n\n";
5550 print OUTPUT "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n\n";
5551 print OUTPUT "STRINGTABLE\n{\n";
5552
5553
5554
5555 foreach my $filename (@_)
5556 {
cfaa28933… Alex*5557 my $FILE = open_data_file( "tzdata", $filename );
3ec7c467c… Alex*5558 my $zonename;
5559 while (<$FILE>)
5560 {
5561 chomp;
5562 s/\#.*$//;
5563 next if /^\s*$/;
5564 my @fields = split /\s+/;
5565 if ($fields[0] eq "Zone" || ($zonename && $fields[0] eq ""))
5566 {
5567 shift @fields;
5568 $zonename = shift @fields unless $zonename;
5569 my ($stdoff, $rules, $dummy, @date) = @fields;
5570 $zones{$zonename} ||= [ ];
5571 push @{$zones{$zonename}}, [ parse_tz_offset( $stdoff ), $rules, @date ];
5572 $zonename = undef unless @date;
5573 next;
5574 }
5575 if ($fields[0] eq "Rule")
5576 {
5577 shift @fields;
5578 my ($rulename, $from, $to, $dummy, $in, $on, $at, $save) = @fields;
5579 $to = $from if $to eq "only";
5580 $to = $LAST_YEAR if $to eq "max";
5581 push @{$rules{$rulename}}, [ parse_tz_offset( $save ), $from, $to, $in, $on, $at ];
5582 next;
5583 }
5584 if ($fields[0] eq "Link")
5585 {
5586 $links{$fields[2]} = $fields[1];
5587 next;
5588 }
5589 die "unrecognized line $_";
5590 }
5591 close $FILE;
5592 }
5593
5594 foreach my $name (sort { uc($a) cmp uc($b) } keys %names)
5595 {
5596 my ($display, $zone) = @{$names{$name}};
5597 $zone = $links{$zone} if defined $links{$zone};
5598
5599
5600
5601 my @transitions;
5602 my @from_date = ( 1 );
5603 my $last_stdoff = 0;
5604 for (my $i = 0; $i < scalar @{$zones{$zone}}; $i++)
5605 {
5606 my ($stdoff, $rule, @until_date) = @{$zones{$zone}->[$i]};
5607 my $isdst = ($last_stdoff != $stdoff);
5608 $from_date[0] ||= $LAST_YEAR;
5609 my @systime = parse_transition_date( $stdoff, $isdst, @from_date );
5610 push @transitions, [ $stdoff, -1, \@systime ];
5611
5612 if (defined $rules{$rule})
5613 {
5614 foreach my $r (@{$rules{$rule}})
5615 {
5616 my ($offset, $from, $to, $in, $on, $at) = @{$r};
5617 foreach my $year ($from..$to)
5618 {
5619 next if $year < $from_date[0];
5620 next if $until_date[0] && $year > $until_date[0];
5621 my @systime = parse_transition_date( $stdoff, !!$offset, $year, $in, $on, $at );
5622 next if compare_transition_date( $stdoff, $isdst, \@until_date, \@systime ) <= 0;
5623 my $ret = compare_transition_date( $stdoff, $isdst, \@from_date, \@systime );
5624 next if $ret > 0;
5625 pop @transitions if !$ret;
5626 push @transitions, [ $stdoff, $offset, \@systime ];
5627 }
5628 }
5629 }
5630 @from_date = @until_date;
5631 $last_stdoff = $stdoff;
5632 }
5633 @transitions = sort { compare_systime( $a->[2], $b->[2] ) } @transitions;
5634
5635
5636
5637 my @info;
5638 my $last_dstoff = 0;
5639 my $last_dst = 0;
5640 my $year = $FIRST_YEAR;
5641 while ($year <= $LAST_YEAR)
5642 {
5643 if (@transitions && $transitions[0]->[2]->[0] < $year)
5644 {
5645 $last_stdoff = $transitions[0]->[0];
5646 shift @transitions;
5647 next;
5648 }
5649 my ($std, $dst, @trans);
5650 my $cur_stdoff = $last_stdoff;
5651 my $cur_dstoff = ($name =~ /^UTC/) ? 0 : -60;
5652 while (@transitions && $transitions[0]->[2]->[0] == $year)
5653 {
5654 my $t = shift @transitions;
5655 my ($stdoff, $dstoff, $systime) = @{$t};
5656 $systime = pack_systime( @{$systime} );
5657 if (!$dstoff)
5658 {
5659 $cur_stdoff = $stdoff unless $std;
5660 $std = $systime;
5661 }
5662 elsif ($dstoff != -1)
5663 {
5664 $cur_dstoff = $dstoff unless $dst;
5665 $dst ||= $systime;
5666 }
5667 elsif ($stdoff != $last_stdoff)
5668 {
5669
5670
5671
5672
5673
5674
5675
5676
5677 if ($last_stdoff - $stdoff < 24 * 60)
5678 {
5679 @trans = ($last_stdoff, $stdoff, $systime);
5680 $cur_stdoff = $stdoff;
5681 }
5682 }
5683 elsif ($dst)
5684 {
5685 $std = $systime;
5686 }
5687 $last_dstoff = ($dstoff == -1) ? 0 : $dstoff;
5688 }
5689 $last_stdoff = $cur_stdoff;
5690
5691 if ($cur_dstoff > 0)
5692 {
5693 ($std, $dst) = ($dst, $std);
5694 $cur_stdoff += $cur_dstoff;
5695 $cur_dstoff = -$cur_dstoff;
5696 }
5697
5698 if (@trans)
5699 {
5700
5701 if ($last_dst == $year - 1 || (!$last_dst && $trans[0] > $trans[1]))
5702 {
5703 $dst ||= $trans[2];
5704 $cur_stdoff = $trans[0];
5705 $cur_dstoff = $trans[1] - $trans[0];
5706 }
5707 else
5708 {
5709 $std ||= $trans[2];
5710 $cur_stdoff = $trans[1];
5711 $cur_dstoff = $trans[0] - $trans[1];
5712 }
5713 }
5714
5715 if ($std || $dst)
5716 {
5717 $std ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5718 $dst ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5719 $last_dst = $year;
5720 }
5721 else
5722 {
5723 $std = pack "S<8", 0;
5724 $dst = pack "S<8", 0;
5725 $cur_stdoff += $last_dstoff;
5726 }
5727 $info[$year++] = pack( "l<3", $cur_stdoff, 0, $cur_dstoff ) . $std . $dst;
5728 }
5729
5730
5731
5732 my $std_name = $name eq "UTC" ? "Coordinated Universal Time" : $name;
5733 my $dlt_name = $std_name =~ s/Standard Time/Daylight Time/r;
5734 my $res_idx = hex( substr( Digest::SHA::sha1_hex($name), -3, 3 )) << 4;
5735 $res_idx += 16 while exists $res_indices{$res_idx};
5736 $res_indices{$res_idx} = 1;
5737
5738 add_registry_string_value( $zonekey, $name, "Display", $display );
5739 add_registry_string_value( $zonekey, $name, "Std", $std_name );
5740 add_registry_string_value( $zonekey, $name, "Dlt", $dlt_name );
5741 add_registry_string_value( $zonekey, $name, "MUI_Std", sprintf( "\@tzres.dll,-%u", $res_idx ));
5742 add_registry_string_value( $zonekey, $name, "MUI_Dlt", sprintf( "\@tzres.dll,-%u", $res_idx + 1 ));
5743 add_registry_string_value( $zonekey, $name, "MUI_Display", sprintf( "\@tzres.dll,-%u", $res_idx + 2 ));
5744 add_registry_binary_value( $zonekey, $name, "TZI", $info[$LAST_YEAR] );
5745
5746 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx, $std_name;
5747 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx + 1, $dlt_name;
5748 printf OUTPUT "%7d \"%s\"\n", $res_idx + 2, $display;
5749
5750 my $first_year = $FIRST_YEAR;
5751 my $last_year = $LAST_YEAR;
5752 $last_year-- while $last_year > $FIRST_YEAR && $info[$last_year] eq $info[$last_year - 1];
5753 $first_year++ while $first_year < $last_year && $info[$first_year] eq $info[$last_year];
5754
5755 next if $last_year <= $first_year;
5756
5757 foreach my $i ($first_year..$last_year)
5758 {
5759 add_registry_binary_value( $zonekey, "$name\\Dynamic DST", $i, $info[$i] );
5760 }
5761 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "FirstEntry", $first_year );
5762 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "LastEntry", $last_year );
5763 }
5764
5765 print OUTPUT "}\n";
5766 close OUTPUT;
5767 save_file($filename);
5768 }
5769
5770
712839d58… Alex*5771
5772
5773 sub dump_registry_script($%)
5774 {
5775 my ($filename, %keys) = @_;
5776 my $indent = 1;
630f605c2… Alex*5777 my @prev;
712839d58… Alex*5778
5779 printf "Building %s\n", $filename;
5780 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5781 print OUTPUT "HKLM\n{\n";
630f605c2… Alex*5782 foreach my $k (sort { ($a =~ tr/a-z\\/A-Z\001/r) cmp ($b =~ tr/a-z\\/A-Z\001/r) } keys %keys)
712839d58… Alex*5783 {
5784 my @subkeys = split /\\/, $k;
630f605c2… Alex*5785 while (@prev && @subkeys && $prev[0] eq $subkeys[0]) { shift @prev; shift @subkeys; }
5786 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
712839d58… Alex*5787 my ($def, @vals) = @{$keys{$k}};
5788 for (my $i = 0; $i < @subkeys; $i++)
5789 {
630f605c2… Alex*5790 my $name = $subkeys[$i];
5791 my $prefix = "";
5792 if ($name =~ /^-/)
5793 {
5794 $name =~ s/^-//;
5795 $prefix = "NoRemove ";
5796 }
5797 if ($name =~ /\s/)
5798 {
5799 $name = "'$name'";
5800 }
5801 printf OUTPUT "%*s%s%s%s\n%*s{\n", 4 * $indent, "", $prefix, $name,
712839d58… Alex*5802 $i == $#subkeys && $def ? " = s '$def'" : "", 4 * $indent, "";
5803 $indent++;
5804 }
6f6854369… Alex*5805 foreach my $v (sort @vals) { printf OUTPUT "%*sval $v\n", 4 * $indent, ""; }
630f605c2… Alex*5806 @prev = split /\\/, $k;
712839d58… Alex*5807 }
630f605c2… Alex*5808 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5809 printf OUTPUT "}\n";
712839d58… Alex*5810 close OUTPUT;
5811 save_file($filename);
5812 }
5813
5814
3d086ca2b… Alex*5815
5816
5817 sub save_file($)
5818 {
5819 my $file = shift;
5fe73e690… Alex*5820 if (-f $file && !system "cmp $file $file.new >/dev/null")
3d086ca2b… Alex*5821 {
5822 unlink "$file.new";
5823 }
5824 else
5825 {
5826 rename "$file.new", "$file";
5827 }
fb270ddca… Alex*5828 }
5829
5830
dc727fa7b… Alex*5831
5832
5833
704a330a8… Alex*5834 chdir ".." if -f "./make_unicode";
cb524e09a… Alex*5835 load_data();
b9178da58… Niko*5836 dump_bidi_dir_table( "dlls/gdi32/uniscribe/direction.c" );
01ebf1ade… Alex*5837 dump_bidi_dir_table( "dlls/dwrite/direction.c" );
0d95b8218… Piot*5838 dump_bidi_dir_table( "dlls/wineps.drv/direction.c" );
b9178da58… Niko*5839 dump_mirroring( "dlls/gdi32/uniscribe/mirror.c" );
370407291… Niko*5840 dump_mirroring( "dlls/dwrite/mirror.c" );
b9178da58… Niko*5841 dump_bracket( "dlls/gdi32/uniscribe/bracket.c" );
fdd8454ef… Niko*5842 dump_bracket( "dlls/dwrite/bracket.c" );
b9178da58… Niko*5843 dump_shaping( "dlls/gdi32/uniscribe/shaping.c" );
bea9c706e… Niko*5844 dump_arabic_shaping( "dlls/dwrite/shapers/arabic_table.c" );
b9178da58… Niko*5845 dump_linebreak( "dlls/gdi32/uniscribe/linebreak.c" );
f60350808… Niko*5846 dump_linebreak( "dlls/dwrite/linebreak.c" );
e36025a2a… Niko*5847 dump_scripts( "dlls/dwrite/scripts" );
b9178da58… Niko*5848 dump_indic( "dlls/gdi32/uniscribe/indicsyllable.c" );
5740b091b… Jace*5849 dump_vertical( "dlls/win32u/vertical.c", 1 );
6857cb569… Jace*5850 dump_vertical( "dlls/wineps.drv/vertical.c", 0 );
42a30a693… Alex*5851 dump_intl_nls("nls/l_intl.nls");
f9f3e57cf… Alex*5852 dump_norm_table( "nls/normnfc.nls" );
5853 dump_norm_table( "nls/normnfd.nls" );
5854 dump_norm_table( "nls/normnfkc.nls" );
5855 dump_norm_table( "nls/normnfkd.nls" );
5856 dump_norm_table( "nls/normidna.nls" );
cfaa28933… Alex*5857 my $chartypes = dump_sortkey_table( "nls/sortdefault.nls" );
8cdb593f5… Alex*5858 dump_locales( "nls/locale.nls", $chartypes );
5b4bdb9fd… Alex*5859 foreach my $file (@allfiles) { dump_msdata_codepage( $file ); }
5860 dump_eucjp_codepage();
3ec7c467c… Alex*5861 dump_timezones( "dlls/tzres/tzres.rc", @timezone_files );
712839d58… Alex*5862 dump_registry_script( "dlls/kernelbase/kernelbase.rgs", %registry_keys );
dc727fa7b… Alex*5863
5864 exit 0;
5865
5866
704a330a8… Alex*5867
dc727fa7b… Alex*5868