Back to home page

Wine source

 
 

    


File indexing completed on 2024-05-17 22:39:36

dc727fa7b Alex*0001 #!/usr/bin/perl -w
fb270ddca Alex*0002 #
                0003 # Generate code page .c files from ftp.unicode.org descriptions
                0004 #
                0005 # Copyright 2000 Alexandre Julliard
                0006 #
0799c1a78 Alex*0007 # This library is free software; you can redistribute it and/or
                0008 # modify it under the terms of the GNU Lesser General Public
                0009 # License as published by the Free Software Foundation; either
                0010 # version 2.1 of the License, or (at your option) any later version.
                0011 #
                0012 # This library is distributed in the hope that it will be useful,
                0013 # but WITHOUT ANY WARRANTY; without even the implied warranty of
                0014 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                0015 # Lesser General Public License for more details.
                0016 #
                0017 # You should have received a copy of the GNU Lesser General Public
                0018 # License along with this library; if not, write to the Free Software
360a3f914 Jona*0019 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
0799c1a78 Alex*0020 #
fb270ddca Alex*0021 
dc727fa7b Alex*0022 use strict;
8cdb593f5 Alex*0023 use XML::LibXML;
3ec7c467c Alex*0024 use Digest::SHA;
8cdb593f5 Alex*0025 use Encode;
3ec7c467c Alex*0026 use Time::Local qw(timegm_modern);
dc727fa7b Alex*0027 
c54765958 Alex*0028 my $UNIVERSION = "15.1.0";
da387a9b5 Alex*0029 my $CLDRVERSION = "45";
50c5eb31c Alex*0030 my $ISO639VERSION = "20230123";
6e9d21409 Alex*0031 my $TZVERSION = "2024a";
cfaa28933 Alex*0032 
                0033 my %data_files =
                0034 (
3ccb5f238 Alex*0035  ucd       => { url  => "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip", name => "UCD-$UNIVERSION.zip",
c54765958 Alex*0036                 sha  => "cb1c663d053926500cd501229736045752713a066bd75802098598b7a7056177" },
3ccb5f238 Alex*0037  unihan    => { url  => "https://www.unicode.org/Public/$UNIVERSION/ucd/Unihan.zip", name => "Unihan-$UNIVERSION.zip",
c54765958 Alex*0038                 sha  => "a0226610e324bcf784ac380e11f4cbf533ee1e6b3d028b0991bf8c0dc3f85853" },
3ccb5f238 Alex*0039  idna      => { url  => "https://www.unicode.org/Public/idna/$UNIVERSION/IdnaMappingTable.txt", name => "IdnaMappingTable-$UNIVERSION.txt",
c54765958 Alex*0040                 sha  => "402cbd285f1f952fcd0834b63541d54f69d3d8f1b8f8599bf71a1a14935f82c4" },
3ccb5f238 Alex*0041  cldr      => { url  => "https://github.com/unicode-org/cldr/archive/refs/tags/release-$CLDRVERSION.zip",
da387a9b5 Alex*0042                 sha  => "29f4dafa918017e74cb84edf19fb707894cbce1cf7da1dd89fff24c107a66ecb" },
3ccb5f238 Alex*0043  cldr33    => { url  => "https://www.unicode.org/Public/cldr/33/cldr-common-33.0.zip",
                0044                 sha  => "fa3490082c086d21257153609642f54fcf788fcfda4966fe67f3f6daca0d58b9" },
                0045  sorting   => { url  => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows 10 Sorting Weight Table.txt",
                0046                 sha  => "81fcfa1e5ed3e3a94d329959ff7d97d522ddf9d653d2c4d6ddcccc5cd4df663f" },
                0047  codepages => { url  => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows Supported Code Page Data Files.zip",
                0048                 sha  => "5074e6dd253056ba61fc6c870c9a955467855129c6ad3a51761c386b301b125a" },
                0049  iso639    => { url  => "https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_$ISO639VERSION.zip",
50c5eb31c Alex*0050                 sha  => "884faa6cc5ac5181ed7969eed75355c1bc665447614cf4c06c62e87b38fe6a97" },
3ccb5f238 Alex*0051  ksx1001   => { url  => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/KSX1001.TXT",
                0052                 sha  => "d8d2a35206ac0ea2865f5d801c9d6717f735bf46f263a658a64a960abe59e371" },
                0053  jis0208   => { url  => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT",
                0054                 sha  => "1c571870457f19c97720631fa83ee491549a96ba1436da1296786a67d8632e87" },
                0055  jis0212   => { url  => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT",
                0056                 sha  => "477820bb3055bbcc90880d788cd95607d221dc94457bae249231adecf13c12e6" },
                0057  tzdata    => { url  => "https://data.iana.org/time-zones/releases/tzdata$TZVERSION.tar.gz",
6e9d21409 Alex*0058                 sha  => "0d0434459acbd2059a7a8da1f3304a84a86591f6ed69c6248fffa502b6edffe3" },
cfaa28933 Alex*0059 );
                0060 
3ec7c467c Alex*0061 
fb270ddca Alex*0062 # Default char for undefined mappings
dc727fa7b Alex*0063 my $DEF_CHAR = ord '?';
fb270ddca Alex*0064 
148f564d1 Alex*0065 # Last valid Unicode character
                0066 my $MAX_CHAR = 0x10ffff;
                0067 
630f605c2 Alex*0068 my $nlskey = "-SYSTEM\\-CurrentControlSet\\-Control\\-Nls";
3ec7c467c Alex*0069 my $zonekey = "-Software\\-Microsoft\\-Windows NT\\-CurrentVersion\\Time Zones";
630f605c2 Alex*0070 
dc727fa7b Alex*0071 my @allfiles =
fb270ddca Alex*0072 (
5b4bdb9fd Alex*0073     "CodpageFiles/037.txt",
                0074     "CodpageFiles/437.txt",
                0075     "CodpageFiles/500.txt",
1ca4536f7 Niko*0076     "CodpageFiles/708.txt",
a1bae9f28 Fran*0077     "CodpageFiles/720.txt",
5b4bdb9fd Alex*0078     "CodpageFiles/737.txt",
                0079     "CodpageFiles/775.txt",
                0080     "CodpageFiles/850.txt",
                0081     "CodpageFiles/852.txt",
                0082     "CodpageFiles/855.txt",
                0083     "CodpageFiles/857.txt",
                0084     "CodpageFiles/860.txt",
                0085     "CodpageFiles/861.txt",
                0086     "CodpageFiles/862.txt",
                0087     "CodpageFiles/863.txt",
                0088     "CodpageFiles/864.txt",
                0089     "CodpageFiles/865.txt",
                0090     "CodpageFiles/866.txt",
                0091     "CodpageFiles/869.txt",
                0092     "CodpageFiles/874.txt",
                0093     "CodpageFiles/875.txt",
                0094     "CodpageFiles/932.txt",
                0095     "CodpageFiles/936.txt",
                0096     "CodpageFiles/949.txt",
                0097     "CodpageFiles/950.txt",
                0098     "CodpageFiles/1026.txt",
                0099     "CodpageFiles/1250.txt",
                0100     "CodpageFiles/1251.txt",
                0101     "CodpageFiles/1252.txt",
                0102     "CodpageFiles/1253.txt",
                0103     "CodpageFiles/1254.txt",
                0104     "CodpageFiles/1255.txt",
                0105     "CodpageFiles/1256.txt",
                0106     "CodpageFiles/1257.txt",
                0107     "CodpageFiles/1258.txt",
                0108     "CodpageFiles/1361.txt",
                0109     "CodpageFiles/10000.txt",
                0110     "CodpageFiles/10001.txt",
                0111     "CodpageFiles/10002.txt",
                0112     "CodpageFiles/10003.txt",
                0113     "CodpageFiles/10004.txt",
                0114     "CodpageFiles/10005.txt",
                0115     "CodpageFiles/10006.txt",
                0116     "CodpageFiles/10007.txt",
                0117     "CodpageFiles/10008.txt",
                0118     "CodpageFiles/10010.txt",
                0119     "CodpageFiles/10017.txt",
                0120     "CodpageFiles/10021.txt",
                0121     "CodpageFiles/10029.txt",
                0122     "CodpageFiles/10079.txt",
                0123     "CodpageFiles/10081.txt",
                0124     "CodpageFiles/10082.txt",
                0125     "CodpageFiles/20127.txt",
                0126     "CodpageFiles/20866.txt",
                0127     "CodpageFiles/21866.txt",
                0128     "CodpageFiles/28591.txt",
                0129     "CodpageFiles/28592.txt",
                0130     "CodpageFiles/28593.txt",
                0131     "CodpageFiles/28594.txt",
                0132     "CodpageFiles/28595.txt",
                0133     "CodpageFiles/28596.txt",
                0134     "CodpageFiles/28597.txt",
                0135     "CodpageFiles/28598.txt",
                0136     "CodpageFiles/28599.txt",
                0137     "CodpageFiles/28603.txt",
                0138     "CodpageFiles/28605.txt",
fb270ddca Alex*0139 );
                0140 
3ec7c467c Alex*0141 my @timezone_files = qw(africa antarctica asia australasia europe northamerica southamerica etcetera backward);
c97bb4c5d Alex*0142 
dc727fa7b Alex*0143 my %ctype =
c97bb4c5d Alex*0144 (
d87d4a4a0 Alex*0145      # CT_CTYPE1
c97bb4c5d Alex*0146     "upper"  => 0x0001,
                0147     "lower"  => 0x0002,
                0148     "digit"  => 0x0004,
                0149     "space"  => 0x0008,
                0150     "punct"  => 0x0010,
                0151     "cntrl"  => 0x0020,
                0152     "blank"  => 0x0040,
                0153     "xdigit" => 0x0080,
d87d4a4a0 Alex*0154     "alpha"  => 0x0100 | 0x80000000,
                0155     "defin"  => 0x0200,
                0156      # CT_CTYPE3 in high 16 bits
                0157     "nonspacing"    => 0x00010000,
                0158     "diacritic"     => 0x00020000,
                0159     "vowelmark"     => 0x00040000,
                0160     "symbol"        => 0x00080000,
                0161     "katakana"      => 0x00100000,
                0162     "hiragana"      => 0x00200000,
                0163     "halfwidth"     => 0x00400000,
                0164     "fullwidth"     => 0x00800000,
                0165     "ideograph"     => 0x01000000,
                0166     "kashida"       => 0x02000000,
                0167     "lexical"       => 0x04000000,
                0168     "highsurrogate" => 0x08000000,
                0169     "lowsurrogate"  => 0x10000000,
c97bb4c5d Alex*0170 );
                0171 
2a12c6a7d Aric*0172 my %bracket_types =
                0173 (
                0174     "o" => 0x0000,
                0175     "c" => 0x0001,
                0176 );
                0177 
7339c04b7 Aric*0178 my %indic_types =
                0179 (
                0180     "Other"    => 0x0000,
                0181     "Bindu"    => 0x0001,
                0182     "Visarga"  => 0x0002,
                0183     "Avagraha" => 0x0003,
                0184     "Nukta"    => 0x0004,
                0185     "Virama"   => 0x0005,
                0186     "Vowel_Independent"  => 0x0006,
                0187     "Vowel_Dependent"  => 0x0007,
                0188     "Vowel"  => 0x0008,
                0189     "Consonant_Placeholder"  => 0x0009,
                0190     "Consonant"  => 0x000a,
                0191     "Consonant_Dead"  => 0x000b,
96ff3daaa Alex*0192     "Consonant_Succeeding_Repha" => 0x000c,
7339c04b7 Aric*0193     "Consonant_Subjoined"  => 0x000d,
                0194     "Consonant_Medial"  => 0x000e,
                0195     "Consonant_Final"  => 0x000f,
                0196     "Consonant_Head_Letter"  => 0x0010,
                0197     "Modifying_Letter"  => 0x0011,
                0198     "Tone_Letter"  => 0x0012,
                0199     "Tone_Mark"  => 0x0013,
96ff3daaa Alex*0200     "Register_Shifter"  => 0x0014,
                0201     "Consonant_Preceding_Repha" => 0x0015,
                0202     "Pure_Killer" => 0x0016,
                0203     "Invisible_Stacker" => 0x0017,
                0204     "Gemination_Mark" => 0x0018,
                0205     "Cantillation_Mark" => 0x0019,
                0206     "Non_Joiner" => 0x001a,
                0207     "Joiner" => 0x001b,
                0208     "Number_Joiner" => 0x001c,
                0209     "Number" => 0x001d,
e8c1cf440 Niko*0210     "Brahmi_Joining_Number" => 0x001e,
                0211     "Consonant_With_Stacker" => 0x001f,
                0212     "Consonant_Prefixed" => 0x0020,
                0213     "Syllable_Modifier" => 0x0021,
d4eeb162b Niko*0214     "Consonant_Killer" => 0x0022,
                0215     "Consonant_Initial_Postfixed" => 0x0023,
7339c04b7 Aric*0216 );
                0217 
                0218 my %matra_types =
                0219 (
                0220     "Right"    => 0x01,
                0221     "Left"  => 0x02,
                0222     "Visual_Order_Left" => 0x03,
                0223     "Left_And_Right"    => 0x04,
                0224     "Top"   => 0x05,
                0225     "Bottom"  => 0x06,
                0226     "Top_And_Bottom"  => 0x07,
                0227     "Top_And_Right"  => 0x08,
                0228     "Top_And_Left"  => 0x09,
                0229     "Top_And_Left_And_Right"  => 0x0a,
                0230     "Bottom_And_Right"  => 0x0b,
                0231     "Top_And_Bottom_And_Right"  => 0x0c,
                0232     "Overstruck"  => 0x0d,
b736dbe93 Jens*0233     "Invisible"  => 0x0e,
b83af7c76 Alex*0234     "Bottom_And_Left"  => 0x0f,
                0235     "Top_And_Bottom_And_Left"  => 0x10,
7339c04b7 Aric*0236 );
                0237 
95166855b Aric*0238 my %break_types =
                0239 (
                0240     "BK"  => 0x0001,
                0241     "CR"  => 0x0002,
                0242     "LF"  => 0x0003,
                0243     "CM"  => 0x0004,
                0244     "SG"  => 0x0005,
                0245     "GL"  => 0x0006,
                0246     "CB"  => 0x0007,
                0247     "SP"  => 0x0008,
                0248     "ZW"  => 0x0009,
                0249     "NL"  => 0x000a,
                0250     "WJ"  => 0x000b,
                0251     "JL"  => 0x000c,
                0252     "JV"  => 0x000d,
                0253     "JT"  => 0x000e,
                0254     "H2"  => 0x000f,
                0255     "H3"  => 0x0010,
                0256     "XX"  => 0x0011,
                0257     "OP"  => 0x0012,
                0258     "CL"  => 0x0013,
                0259     "CP"  => 0x0014,
                0260     "QU"  => 0x0015,
                0261     "NS"  => 0x0016,
                0262     "EX"  => 0x0017,
                0263     "SY"  => 0x0018,
                0264     "IS"  => 0x0019,
                0265     "PR"  => 0x001a,
                0266     "PO"  => 0x001b,
                0267     "NU"  => 0x001c,
                0268     "AL"  => 0x001d,
                0269     "ID"  => 0x001e,
                0270     "IN"  => 0x001f,
                0271     "HY"  => 0x0020,
                0272     "BB"  => 0x0021,
                0273     "BA"  => 0x0022,
                0274     "SA"  => 0x0023,
                0275     "AI"  => 0x0024,
1f44de3f6 Alex*0276     "B2"  => 0x0025,
                0277     "HL"  => 0x0026,
                0278     "CJ"  => 0x0027,
                0279     "RI"  => 0x0028,
58e0972c5 Niko*0280     "EB"  => 0x0029,
                0281     "EM"  => 0x002a,
                0282     "ZWJ" => 0x002b,
c54765958 Alex*0283     "AK"  => 0x002c,
                0284     "AP"  => 0x002d,
                0285     "AS"  => 0x002e,
                0286     "VF"  => 0x002f,
                0287     "VI"  => 0x0030,
95166855b Aric*0288 );
                0289 
742cde4fa Aric*0290 my %vertical_types =
                0291 (
                0292     "R"  => 0x0000,
                0293     "U"  => 0x0001,
                0294     "Tr" => 0x0002,
                0295     "Tu" => 0x0003,
                0296 );
                0297 
dc727fa7b Alex*0298 my %categories =
c97bb4c5d Alex*0299 (
f2b8873b1 Aric*0300     "Lu" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}, # Letter, Uppercase
                0301     "Ll" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"lower"}, # Letter, Lowercase
                0302     "Lt" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}|$ctype{"lower"},    # Letter, Titlecase
d87d4a4a0 Alex*0303     "Mn" => $ctype{"defin"}|$ctype{"nonspacing"}, # Mark, Non-Spacing
f2b8873b1 Aric*0304     "Mc" => $ctype{"defin"},                    # Mark, Spacing Combining
                0305     "Me" => $ctype{"defin"},                    # Mark, Enclosing
                0306     "Nd" => $ctype{"defin"}|$ctype{"digit"},    # Number, Decimal Digit
                0307     "Nl" => $ctype{"defin"}|$ctype{"alpha"},    # Number, Letter
                0308     "No" => $ctype{"defin"},                    # Number, Other
                0309     "Zs" => $ctype{"defin"}|$ctype{"space"},    # Separator, Space
                0310     "Zl" => $ctype{"defin"}|$ctype{"space"},    # Separator, Line
                0311     "Zp" => $ctype{"defin"}|$ctype{"space"},    # Separator, Paragraph
                0312     "Cc" => $ctype{"defin"}|$ctype{"cntrl"},    # Other, Control
                0313     "Cf" => $ctype{"defin"}|$ctype{"cntrl"},    # Other, Format
                0314     "Cs" => $ctype{"defin"},                    # Other, Surrogate
                0315     "Co" => $ctype{"defin"},                    # Other, Private Use
                0316     "Cn" => $ctype{"defin"},                    # Other, Not Assigned
                0317     "Lm" => $ctype{"defin"}|$ctype{"alpha"},    # Letter, Modifier
                0318     "Lo" => $ctype{"defin"}|$ctype{"alpha"},    # Letter, Other
                0319     "Pc" => $ctype{"defin"}|$ctype{"punct"},    # Punctuation, Connector
                0320     "Pd" => $ctype{"defin"}|$ctype{"punct"},    # Punctuation, Dash
                0321     "Ps" => $ctype{"defin"}|$ctype{"punct"},    # Punctuation, Open
                0322     "Pe" => $ctype{"defin"}|$ctype{"punct"},    # Punctuation, Close
                0323     "Pi" => $ctype{"defin"}|$ctype{"punct"},    # Punctuation, Initial quote
                0324     "Pf" => $ctype{"defin"}|$ctype{"punct"},    # Punctuation, Final quote
                0325     "Po" => $ctype{"defin"}|$ctype{"punct"},    # Punctuation, Other
d87d4a4a0 Alex*0326     "Sm" => $ctype{"defin"}|$ctype{"symbol"},   # Symbol, Math
                0327     "Sc" => $ctype{"defin"}|$ctype{"symbol"},   # Symbol, Currency
                0328     "Sk" => $ctype{"defin"}|$ctype{"symbol"},   # Symbol, Modifier
                0329     "So" => $ctype{"defin"}|$ctype{"symbol"}    # Symbol, Other
c97bb4c5d Alex*0330 );
                0331 
                0332 # a few characters need additional categories that cannot be determined automatically
dc727fa7b Alex*0333 my %special_categories =
c97bb4c5d Alex*0334 (
                0335     "xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
                0336                   0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
04aa6a88b Robe*0337     "space"  => [ 0x09..0x0d, 0x85 ],
                0338     "blank"  => [ 0x09, 0x20, 0xa0, 0x3000, 0xfeff ],
84561d76e Aric*0339     "cntrl"  => [ 0x070f, 0x200c, 0x200d,
04aa6a88b Robe*0340                   0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e,
                0341                   0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
459fb56af Aric*0342                   0xfff9, 0xfffa, 0xfffb ],
                0343     "punct"  => [ 0x24, 0x2b, 0x3c..0x3e, 0x5e, 0x60, 0x7c, 0x7e, 0xa2..0xbe,
46a5b4671 Aric*0344                   0xd7, 0xf7 ],
                0345     "digit"  => [ 0xb2, 0xb3, 0xb9 ],
d87d4a4a0 Alex*0346     "lower"  => [ 0xaa, 0xba, 0x2071, 0x207f ],
                0347     "nonspacing" => [ 0xc0..0xc5, 0xc7..0xcf, 0xd1..0xd6, 0xd8..0xdd, 0xe0..0xe5, 0xe7..0xef,
                0348                       0xf1..0xf6, 0xf8..0xfd, 0xff, 0x6de, 0x1929..0x192b, 0x302e..0x302f ],
                0349     "diacritic" => [ 0x5e, 0x60, 0xb7, 0xd8, 0xf8 ],
                0350     "symbol" => [ 0x09..0x0d, 0x20..0x23, 0x25, 0x26, 0x28..0x2a, 0x2c, 0x2e..0x2f, 0x3a..0x40,
                0351                   0x5b..0x60, 0x7b..0x7e, 0xa0..0xa9, 0xab..0xb1, 0xb4..0xb8, 0xbb, 0xbf,
                0352                   0x02b9..0x02ba, 0x02c6..0x02cf ],
                0353     "halfwidth" => [ 0x20..0x7e, 0xa2..0xa3, 0xa5..0xa6, 0xac, 0xaf, 0x20a9 ],
                0354     "fullwidth" => [ 0x2018..0x2019, 0x201c..0x201d, 0x3000..0x3002, 0x300c..0x300d, 0x309b..0x309c,
                0355                      0x30a1..0x30ab, 0x30ad, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9,
                0356                      0x30bb, 0x30bd, 0x30bf, 0x30c1, 0x30c3, 0x30c4, 0x30c6, 0x30c8, 0x30ca..0x30cf,
                0357                      0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de..0x30ed, 0x30ef, 0x30f2..0x30f3, 0x30fb,
                0358                      0x3131..0x3164 ],
                0359     "ideograph" => [ 0x3006..0x3007 ],
                0360     "lexical" => [ 0x22, 0x24, 0x27, 0x2d, 0x2f, 0x3d, 0x40, 0x5c, 0x5e..0x60, 0x7e,
                0361                    0xa8, 0xaa, 0xad, 0xaf, 0xb4, 0xb8, 0xba,
                0362                    0x02b0..0x02b8, 0x02bc, 0x02c7, 0x02ca..0x02cb, 0x02cf, 0x02d8..0x02dd, 0x02e0..0x02e3,
                0363                    0x037a, 0x0384..0x0385, 0x0387, 0x0559..0x055a, 0x0640, 0x1fbd..0x1fc1,
                0364                    0x1fcd..0x1fcf, 0x1fdd..0x1fdf, 0x1fed..0x1fef, 0x1ffd..0x1ffe, 0x2010..0x2015,
                0365                    0x2032..0x2034, 0x2038, 0x2043..0x2044, 0x207b..0x207c, 0x207f, 0x208b..0x208c,
                0366                    0x2212, 0x2215..0x2216, 0x2500, 0x2504..0x2505, 0x2508..0x2509, 0x254c..0x254d,
                0367                    0x3003, 0x301c, 0x3030..0x3035, 0x309b..0x309e, 0x30fd..0x30fe, 0xfe31..0xfe32,
                0368                    0xfe58, 0xfe63, 0xfe66, 0xfe68..0xfe69, 0xfe6b, 0xff04, 0xff07, 0xff0d, 0xff0f,
                0369                    0xff1d, 0xff20, 0xff3c, 0xff3e, 0xff40, 0xff5e ],
                0370     "kashida" => [ 0x0640 ],
c97bb4c5d Alex*0371 );
                0372 
dc727fa7b Alex*0373 my %directions =
c97bb4c5d Alex*0374 (
                0375     "L"   => 1,    # Left-to-Right
                0376     "R"   => 2,    # Right-to-Left
dc0e712d1 Alex*0377     "AL"  => 12,   # Right-to-Left Arabic
c97bb4c5d Alex*0378     "EN"  => 3,    # European Number
                0379     "ES"  => 4,    # European Number Separator
                0380     "ET"  => 5,    # European Number Terminator
                0381     "AN"  => 6,    # Arabic Number
                0382     "CS"  => 7,    # Common Number Separator
dc0e712d1 Alex*0383     "NSM" => 13,   # Non-Spacing Mark
                0384     "BN"  => 14,   # Boundary Neutral
c97bb4c5d Alex*0385     "B"   => 8,    # Paragraph Separator
                0386     "S"   => 9,    # Segment Separator
                0387     "WS"  => 10,   # Whitespace
b5d9fc323 Aric*0388     "ON"  => 11,   # Other Neutrals
                0389     "LRE" => 15,   # Left-to-Right Embedding
                0390     "LRO" => 15,   # Left-to-Right Override
                0391     "RLE" => 15,   # Right-to-Left Embedding
                0392     "RLO" => 15,   # Right-to-Left Override
                0393     "PDF" => 15,   # Pop Directional Format
                0394     "LRI" => 15,   # Left-to-Right Isolate
                0395     "RLI" => 15,   # Right-to-Left Isolate
                0396     "FSI" => 15,   # First Strong Isolate
                0397     "PDI" => 15    # Pop Directional Isolate
c97bb4c5d Alex*0398 );
                0399 
02a04ba05 Alex*0400 my %c2_types =
                0401 (
                0402     "L"   => 1,    # C2_LEFTTORIGHT
                0403     "R"   => 2,    # C2_RIGHTTOLEFT
                0404     "AL"  => 2,    # C2_RIGHTTOLEFT
                0405     "EN"  => 3,    # C2_EUROPENUMBER
                0406     "ES"  => 4,    # C2_EUROPESEPARATOR
                0407     "ET"  => 5,    # C2_EUROPETERMINATOR
                0408     "AN"  => 6,    # C2_ARABICNUMBER
                0409     "CS"  => 7,    # C2_COMMONSEPARATOR
95aeb41c8 Alex*0410     "NSM" => 11,   # C2_OTHERNEUTRAL
02a04ba05 Alex*0411     "BN"  => 0,    # C2_NOTAPPLICABLE
                0412     "B"   => 8,    # C2_BLOCKSEPARATOR
                0413     "S"   => 9,    # C2_SEGMENTSEPARATOR
                0414     "WS"  => 10,   # C2_WHITESPACE
                0415     "ON"  => 11,   # C2_OTHERNEUTRAL
                0416     "LRE" => 11,   # C2_OTHERNEUTRAL
                0417     "LRO" => 11,   # C2_OTHERNEUTRAL
                0418     "RLE" => 11,   # C2_OTHERNEUTRAL
                0419     "RLO" => 11,   # C2_OTHERNEUTRAL
                0420     "PDF" => 11,   # C2_OTHERNEUTRAL
                0421     "LRI" => 11,   # C2_OTHERNEUTRAL
                0422     "RLI" => 11,   # C2_OTHERNEUTRAL
                0423     "FSI" => 11,   # C2_OTHERNEUTRAL
                0424     "PDI" => 11    # C2_OTHERNEUTRAL
                0425 );
                0426 
d68f8d7b3 Alex*0427 my %bidi_types =
                0428 (
                0429     "ON"  => 0,    # Other Neutrals
                0430     "L"   => 1,    # Left-to-Right
                0431     "R"   => 2,    # Right-to-Left
                0432     "AN"  => 3,    # Arabic Number
                0433     "EN"  => 4,    # European Number
                0434     "AL"  => 5,    # Right-to-Left Arabic
                0435     "NSM" => 6,    # Non-Spacing Mark
                0436     "CS"  => 7,    # Common Number Separator
                0437     "ES"  => 8,    # European Number Separator
                0438     "ET"  => 9,    # European Number Terminator
                0439     "BN"  => 10,   # Boundary Neutral
                0440     "S"   => 11,   # Segment Separator
                0441     "WS"  => 12,   # Whitespace
                0442     "B"   => 13,   # Paragraph Separator
                0443     "RLO" => 14,   # Right-to-Left Override
                0444     "RLE" => 15,   # Right-to-Left Embedding
                0445     "LRO" => 16,   # Left-to-Right Override
                0446     "LRE" => 17,   # Left-to-Right Embedding
                0447     "PDF" => 18,   # Pop Directional Format
                0448     "LRI" => 19,   # Left-to-Right Isolate
                0449     "RLI" => 20,   # Right-to-Left Isolate
                0450     "FSI" => 21,   # First Strong Isolate
                0451     "PDI" => 22    # Pop Directional Isolate
                0452 );
                0453 
a5fe68870 Alex*0454 my %joining_types =
                0455 (
f734d98a9 Niko*0456    "U" => 0,           # Non_Joining
                0457    "L" => 1,           # Left_Joining
                0458    "R" => 2,           # Right_Joining
                0459    "D" => 3,           # Dual_Joining
                0460    "C" => 3,           # Join_Causing
                0461    "ALAPH" => 4,       # Syriac ALAPH
                0462    "DALATH RISH" => 5, # Syriac DALATH RISH group
                0463    "T" => 6,           # Transparent
a5fe68870 Alex*0464 );
                0465 
8cdb593f5 Alex*0466 my @locales =
                0467 (
                0468  { name => "", lcid => 0x0000007f, file => "root", territory => "IV", sabbrevlangname => "IVL", sopentypelang =>"dflt" },
50c5eb31c Alex*0469  { name => "aa", sopentypelang => "AFR" },
                0470  { name => "aa-DJ" },
                0471  { name => "aa-ER" },
                0472  { name => "aa-ET" },
8cdb593f5 Alex*0473  { name => "af", lcid => 0x00000036, oemcp => 850, sabbrevlangname => "AFK", sopentypelang => "AFK" },
                0474  { name => "af-NA" },
                0475  { name => "af-ZA", lcid => 0x00000436 },
                0476  { name => "agq" },
                0477  { name => "agq-CM" },
                0478  { name => "ak", sopentypelang => "TWI" },
                0479  { name => "ak-GH" },
                0480  { name => "am", lcid => 0x0000005e, sabbrevlangname => "AMH" },
                0481  { name => "am-ET", lcid => 0x0000045e },
                0482  { name => "ar", lcid => 0x00000001, territory => "SA", oemcp => 720, group => 13 },
                0483  { name => "ar-001" },
                0484  { name => "ar-AE", lcid => 0x00003801, sabbrevlangname => "ARU" },
                0485  { name => "ar-BH", lcid => 0x00003c01, sabbrevlangname => "ARH" },
                0486  { name => "ar-DJ" },
4172c0482 Alex*0487  { name => "ar-DZ", lcid => 0x00001401, sabbrevlangname => "ARG", nativedigits => "0123456789" },
8cdb593f5 Alex*0488  { name => "ar-EG", lcid => 0x00000c01, sabbrevlangname => "ARE" },
                0489  { name => "ar-EH" },
                0490  { name => "ar-ER" },
                0491  { name => "ar-IL" },
                0492  { name => "ar-IQ", lcid => 0x00000801, sabbrevlangname => "ARI" },
                0493  { name => "ar-JO", lcid => 0x00002c01, sabbrevlangname => "ARJ" },
                0494  { name => "ar-KM" },
                0495  { name => "ar-KW", lcid => 0x00003401, sabbrevlangname => "ARK" },
                0496  { name => "ar-LB", lcid => 0x00003001, sabbrevlangname => "ARB" },
4172c0482 Alex*0497  { name => "ar-LY", lcid => 0x00001001, sabbrevlangname => "ARL", nativedigits => "0123456789" },
                0498  { name => "ar-MA", lcid => 0x00001801, sabbrevlangname => "ARM", nativedigits => "0123456789" },
8cdb593f5 Alex*0499  { name => "ar-MR" },
                0500  { name => "ar-OM", lcid => 0x00002001, sabbrevlangname => "ARO" },
                0501  { name => "ar-PS" },
                0502  { name => "ar-QA", lcid => 0x00004001, sabbrevlangname => "ARQ" },
                0503  { name => "ar-SA", lcid => 0x00000401, sabbrevlangname => "ARA" },
                0504  { name => "ar-SD" },
                0505  { name => "ar-SO" },
                0506  { name => "ar-SS" },
                0507  { name => "ar-SY", lcid => 0x00002801, sabbrevlangname => "ARS" },
                0508  { name => "ar-TD" },
4172c0482 Alex*0509  { name => "ar-TN", lcid => 0x00001c01, sabbrevlangname => "ART", nativedigits => "0123456789" },
8cdb593f5 Alex*0510  { name => "ar-YE", lcid => 0x00002401, sabbrevlangname => "ARY" },
50c5eb31c Alex*0511  { name => "arn", lcid => 0x0000007a, oemcp => 850, ebcdiccp => 20284, slist => ",", sabbrevlangname => "MPD", sopentypelang => "MAP" },
                0512  { name => "arn-CL", lcid => 0x0000047a },
8cdb593f5 Alex*0513  { name => "arn-Latn", alias => "arn" },
                0514  { name => "arn-Latn-CL", alias => "arn-CL" },
                0515  { name => "as", lcid => 0x0000004d, slist => ",", group => 15 },
                0516  { name => "as-IN", lcid => 0x0000044d },
                0517  { name => "asa" },
                0518  { name => "asa-TZ" },
                0519  { name => "ast" },
                0520  { name => "ast-ES" },
                0521  { name => "az", lcid => 0x0000002c, oemcp => 857, ebcdiccp => 20905, group => 2 },
                0522  { name => "az-Cyrl", lcid => 0x0000742c, oemcp => 866, ebcdiccp => 20880, group => 5, sabbrevlangname => "AZC" },
                0523  { name => "az-Cyrl-AZ", lcid => 0x0000082c },
                0524  { name => "az-Latn", lcid => 0x0000782c },
                0525  { name => "az-Latn-AZ", lcid => 0x0000042c },
50c5eb31c Alex*0526  { name => "ba", lcid => 0x0000006d, oemcp => 866, group => 5, sabbrevlangname => "BAS", sopentypelang => "BSH" },
8cdb593f5 Alex*0527  { name => "ba-Cyrl", alias => "ba" },
                0528  { name => "ba-Cyrl-RU", alias => "ba-RU" },
50c5eb31c Alex*0529  { name => "ba-RU", lcid => 0x0000046d },
8cdb593f5 Alex*0530  { name => "bas" },
                0531  { name => "bas-CM" },
                0532  { name => "be", lcid => 0x00000023, oemcp => 866, ebcdiccp => 500, group => 5 },
                0533  { name => "be-BY", lcid => 0x00000423 },
                0534  { name => "bem" },
                0535  { name => "bem-ZM" },
6d046dd98 Alex*0536  { name => "bew" },
                0537  { name => "bew-ID" },
8cdb593f5 Alex*0538  { name => "bez" },
                0539  { name => "bez-TZ" },
                0540  { name => "bg", lcid => 0x00000002, oemcp => 866, ebcdiccp => 21025, group => 5, sabbrevlangname => "BGR", sopentypelang => "BGR" },
                0541  { name => "bg-BG", lcid => 0x00000402 },
                0542  { name => "bin", lcid => 0x00000066, oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "EDO" },
                0543  { name => "bin-NG", lcid => 0x00000466, file => "bin", dir => "exemplars" },
6d046dd98 Alex*0544  { name => "blo" },
                0545  { name => "blo-BJ" },
8cdb593f5 Alex*0546  { name => "bm", sopentypelang => "BMB" },
                0547  { name => "bm-Latn", file => "bm" },
                0548  { name => "bm-Latn-ML", file => "bm_ML" },
                0549  { name => "bm-ML", alias => "bm-Latn-ML" },
                0550  { name => "bn", lcid => 0x00000045, slist => ",", group => 15, sabbrevlangname => "BNB" },
                0551  { name => "bn-BD", lcid => 0x00000845 },
                0552  { name => "bn-IN", lcid => 0x00000445, sabbrevlangname => "BNG" },
                0553  { name => "bo", lcid => 0x00000051, slist => ",", group => 15, sabbrevlangname => "BOB", sopentypelang => "TIB" },
                0554  { name => "bo-CN", lcid => 0x00000451 },
                0555  { name => "bo-IN", slist => "," },
                0556  { name => "bo-Tibt", alias => "bo" },
                0557  { name => "bo-Tibt-CN", alias => "bo-CN" },
                0558  { name => "bo-Tibt-IN", alias => "bo-IN" },
                0559  { name => "br", lcid => 0x0000007e, oemcp => 850, ebcdiccp => 20297 },
                0560  { name => "br-FR", lcid => 0x0000047e },
                0561  { name => "br-Latn", alias => "br" },
                0562  { name => "br-Latn-FR", alias => "br-FR" },
                0563  { name => "brx" },
                0564  { name => "brx-IN" },
                0565  { name => "bs", lcid => 0x0000781a, oemcp => 852, maccp => 10082, ebcdiccp => 870, group => 2, sabbrevlangname => "BSB" },
                0566  { name => "bs-Cyrl", lcid => 0x0000641a, oemcp => 855, group => 5, sabbrevlangname => "BSC" },
                0567  { name => "bs-Cyrl-BA", lcid => 0x0000201a },
                0568  { name => "bs-Latn", lcid => 0x0000681a },
                0569  { name => "bs-Latn-BA", lcid => 0x0000141a },
50c5eb31c Alex*0570  { name => "byn", sopentypelang => "BIL" },
                0571  { name => "byn-ER" },
8cdb593f5 Alex*0572  { name => "ca", lcid => 0x00000003, oemcp => 850 },
                0573  { name => "ca-AD", maccp => 65001 },
                0574  { name => "ca-ES", lcid => 0x00000403 },
                0575  { name => "ca-ES-valencia", lcid => 0x00000803, file => "ca_ES_VALENCIA", sabbrevlangname => "VAL" },
                0576  { name => "ca-FR", maccp => 65001 },
                0577  { name => "ca-IT", maccp => 65001 },
                0578  { name => "ccp" },
                0579  { name => "ccp-BD", alias => "ccp-Cakm-BD" },
                0580  { name => "ccp-Cakm", file => "ccp" },
                0581  { name => "ccp-Cakm-BD", file => "ccp_BD" },
                0582  { name => "ccp-Cakm-IN", file => "ccp_IN" },
                0583  { name => "ccp-IN", alias => "ccp-Cakm-IN" },
                0584  { name => "ce" },
                0585  { name => "ce-RU" },
                0586  { name => "ceb" },
                0587  { name => "ceb-Latn", file => "ceb" },
                0588  { name => "ceb-Latn-PH", file => "ceb_PH" },
                0589  { name => "ceb-PH", alias => "ceb-Latn-PH" },
                0590  { name => "cgg" },
                0591  { name => "cgg-UG" },
                0592  { name => "chr", lcid => 0x0000005c, slist => ",", sabbrevlangname => "CRE" },
                0593  { name => "chr-Cher", lcid => 0x00007c5c, file => "chr" },
                0594  { name => "chr-Cher-US", lcid => 0x0000045c, file => "chr_US" },
                0595  { name => "chr-US", alias => "chr-Cher-US" },
                0596  { name => "ckb", alias => "ku" },
                0597  { name => "ckb-IQ", alias => "ku-Arab-IQ" },
                0598  { name => "ckb-IR", alias => "ku-Arab-IR" },
50c5eb31c Alex*0599  { name => "co", lcid => 0x00000083, oemcp => 850, ebcdiccp => 20297 },
                0600  { name => "co-FR", lcid => 0x00000483 },
8cdb593f5 Alex*0601  { name => "co-Latn", alias => "co" },
                0602  { name => "co-Latn-FR", alias => "co-FR" },
                0603  { name => "cs", lcid => 0x00000005, oemcp => 852, group => 2, sabbrevlangname => "CSY", sopentypelang => "CSY" },
                0604  { name => "cs-CZ", lcid => 0x00000405 },
6d046dd98 Alex*0605  { name => "csw" },
                0606  { name => "csw-CA" },
50c5eb31c Alex*0607  { name => "cu", sopentypelang => "CSL" },
                0608  { name => "cu-RU" },
8cdb593f5 Alex*0609  { name => "cy", lcid => 0x00000052, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "CYM", sopentypelang => "WEL" },
                0610  { name => "cy-GB", lcid => 0x00000452 },
                0611  { name => "da", lcid => 0x00000006, oemcp => 850, ebcdiccp => 20277 },
                0612  { name => "da-DK", lcid => 0x00000406 },
                0613  { name => "da-GL", maccp => 65001 },
                0614  { name => "dav" },
                0615  { name => "dav-KE" },
                0616  { name => "de", lcid => 0x00000007, oemcp => 850, ebcdiccp => 20273 },
                0617  { name => "de-AT", lcid => 0x00000c07, sabbrevlangname => "DEA" },
                0618  { name => "de-BE" },
                0619  { name => "de-CH", lcid => 0x00000807, sabbrevlangname => "DES" },
                0620  { name => "de-DE", lcid => 0x00000407 },
                0621  { name => "de-DE_phoneb", lcid => 0x00010407, alias => "de-DE" },
                0622  { name => "de-DE-u-co-phonebk", alias => "de-DE_phoneb" },
                0623  { name => "de-IT", oemcp => 65001 },
                0624  { name => "de-LI", lcid => 0x00001407, sabbrevlangname => "DEC" },
                0625  { name => "de-LU", lcid => 0x00001007, sabbrevlangname => "DEL" },
                0626  { name => "dje", sopentypelang => "DJR" },
                0627  { name => "dje-NE" },
b591b3167 Alex*0628  { name => "doi", sopentypelang => "DGR" },
                0629  { name => "doi-IN", alias => "doi-Deva-IN" },
                0630  { name => "doi-Deva", file => "doi" },
                0631  { name => "doi-Deva-IN", file => "doi_IN" },
8cdb593f5 Alex*0632  { name => "dsb", lcid => 0x00007c2e, sparent => "hsb", oemcp => 850, ebcdiccp => 870, sabbrevlangname => "DSB", sopentypelang => "LSB" },
                0633  { name => "dsb-DE", lcid => 0x0000082e },
                0634  { name => "dua" },
                0635  { name => "dua-CM" },
50c5eb31c Alex*0636  { name => "dv", lcid => 0x00000065, slist => "\x{060c}", group => 13, nativedigits => "0123456789" },
                0637  { name => "dv-MV", lcid => 0x00000465 },
8cdb593f5 Alex*0638  { name => "dyo" },
                0639  { name => "dyo-SN" },
                0640  { name => "dz", sopentypelang => "DZN" },
                0641  { name => "dz-BT", lcid => 0x00000c51, sabbrevlangname => "ZZZ" },
                0642  { name => "ebu" },
                0643  { name => "ebu-KE" },
                0644  { name => "ee" },
                0645  { name => "ee-GH" },
                0646  { name => "ee-TG" },
                0647  { name => "el", lcid => 0x00000008, oemcp => 737, group => 4 },
                0648  { name => "el-CY" },
                0649  { name => "el-GR", lcid => 0x00000408 },
                0650  { name => "en", lcid => 0x00000009, oemcp => 437, slist => ",", sabbrevlangname => "ENU" },
                0651  { name => "en-001", oemcp => 850 },
ad02ef7be Alex*0652  { name => "en-029", lcid => 0x00002409, file => "en", oemcp => 850, sabbrevlangname => "ENB" },
8cdb593f5 Alex*0653  { name => "en-150", oemcp => 65001 },
                0654  { name => "en-AE", lcid => 0x00004c09, oemcp => 65001, sabbrevlangname => "ZZZ" },
                0655  { name => "en-AG", oemcp => 850 },
                0656  { name => "en-AI", oemcp => 850 },
                0657  { name => "en-AS", oemcp => 850 },
                0658  { name => "en-AT", oemcp => 65001 },
                0659  { name => "en-AU", lcid => 0x00000c09, oemcp => 850, sabbrevlangname => "ENA" },
                0660  { name => "en-BB", oemcp => 850 },
                0661  { name => "en-BE", oemcp => 850 },
                0662  { name => "en-BI", oemcp => 65001 },
                0663  { name => "en-BM", oemcp => 850 },
                0664  { name => "en-BS", oemcp => 850 },
                0665  { name => "en-BW", oemcp => 850 },
                0666  { name => "en-BZ", lcid => 0x00002809, oemcp => 850, sabbrevlangname => "ENL" },
                0667  { name => "en-CA", lcid => 0x00001009, oemcp => 850, ebcdiccp => 37, sabbrevlangname => "ENC" },
                0668  { name => "en-CC", oemcp => 850 },
                0669  { name => "en-CH", oemcp => 65001 },
                0670  { name => "en-CK", oemcp => 850 },
                0671  { name => "en-CM", oemcp => 850 },
                0672  { name => "en-CX", oemcp => 850 },
                0673  { name => "en-CY", oemcp => 65001 },
                0674  { name => "en-DE", oemcp => 65001 },
                0675  { name => "en-DG", oemcp => 850 },
                0676  { name => "en-DK", oemcp => 65001 },
                0677  { name => "en-DM", oemcp => 850 },
                0678  { name => "en-ER", oemcp => 850 },
                0679  { name => "en-FI", oemcp => 65001 },
                0680  { name => "en-FJ", oemcp => 850 },
                0681  { name => "en-FK", oemcp => 850 },
                0682  { name => "en-FM", oemcp => 850 },
                0683  { name => "en-GB", lcid => 0x00000809, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "ENG" },
                0684  { name => "en-GD", oemcp => 850 },
                0685  { name => "en-GG", oemcp => 850 },
                0686  { name => "en-GH", oemcp => 850 },
                0687  { name => "en-GI", oemcp => 850 },
                0688  { name => "en-GM", oemcp => 850 },
                0689  { name => "en-GU", oemcp => 850 },
                0690  { name => "en-GY", oemcp => 850 },
                0691  { name => "en-HK", lcid => 0x00003c09, oemcp => 850, sabbrevlangname => "ENH" },
6d046dd98 Alex*0692  { name => "en-ID", lcid => 0x00003809, oemcp => 850, sabbrevlangname => "ZZZ" },
8cdb593f5 Alex*0693  { name => "en-IE", lcid => 0x00001809, oemcp => 850, sabbrevlangname => "ENI" },
                0694  { name => "en-IL", oemcp => 65001 },
                0695  { name => "en-IM", oemcp => 850 },
                0696  { name => "en-IN", lcid => 0x00004009, sabbrevlangname => "ENN" },
                0697  { name => "en-IO", oemcp => 850 },
                0698  { name => "en-JE", oemcp => 850 },
                0699  { name => "en-JM", lcid => 0x00002009, oemcp => 850, sabbrevlangname => "ENJ" },
                0700  { name => "en-KE", oemcp => 850 },
                0701  { name => "en-KI", oemcp => 850 },
                0702  { name => "en-KN", oemcp => 850 },
                0703  { name => "en-KY", oemcp => 850 },
                0704  { name => "en-LC", oemcp => 850 },
                0705  { name => "en-LR", oemcp => 850 },
                0706  { name => "en-LS", oemcp => 850 },
                0707  { name => "en-MG", oemcp => 850 },
                0708  { name => "en-MH", oemcp => 850 },
                0709  { name => "en-MO", oemcp => 850 },
                0710  { name => "en-MP", oemcp => 850 },
                0711  { name => "en-MS", oemcp => 850 },
                0712  { name => "en-MT", oemcp => 850 },
                0713  { name => "en-MU", oemcp => 850 },
                0714  { name => "en-MW", oemcp => 850 },
                0715  { name => "en-MY", lcid => 0x00004409, sabbrevlangname => "ENM" },
                0716  { name => "en-NA", oemcp => 850 },
                0717  { name => "en-NF", oemcp => 850 },
                0718  { name => "en-NG", oemcp => 850 },
                0719  { name => "en-NL", oemcp => 65001 },
                0720  { name => "en-NR", oemcp => 850 },
                0721  { name => "en-NU", oemcp => 850 },
                0722  { name => "en-NZ", lcid => 0x00001409, oemcp => 850, sabbrevlangname => "ENZ" },
                0723  { name => "en-PG", oemcp => 850 },
                0724  { name => "en-PH", lcid => 0x00003409, ebcdiccp => 500, sabbrevlangname => "ENP" },
                0725  { name => "en-PK", oemcp => 850 },
                0726  { name => "en-PN", oemcp => 850 },
                0727  { name => "en-PR", oemcp => 850 },
                0728  { name => "en-PW", oemcp => 850 },
                0729  { name => "en-RW", oemcp => 850 },
                0730  { name => "en-SB", oemcp => 850 },
                0731  { name => "en-SC", oemcp => 850 },
                0732  { name => "en-SD", oemcp => 850 },
                0733  { name => "en-SE", oemcp => 65001 },
                0734  { name => "en-SG", lcid => 0x00004809, sabbrevlangname => "ENE" },
                0735  { name => "en-SH", oemcp => 850 },
                0736  { name => "en-SI", oemcp => 65001 },
                0737  { name => "en-SL", oemcp => 850 },
                0738  { name => "en-SS", oemcp => 850 },
                0739  { name => "en-SX", oemcp => 850 },
                0740  { name => "en-SZ", oemcp => 850 },
                0741  { name => "en-TC", oemcp => 850 },
                0742  { name => "en-TK", oemcp => 850 },
                0743  { name => "en-TO", oemcp => 850 },
                0744  { name => "en-TT", lcid => 0x00002c09, oemcp => 850, sabbrevlangname => "ENT" },
                0745  { name => "en-TV", oemcp => 850 },
                0746  { name => "en-TZ", oemcp => 850 },
                0747  { name => "en-UG", oemcp => 850 },
                0748  { name => "en-UM", oemcp => 850 },
                0749  { name => "en-US", lcid => 0x00000409 },
                0750  { name => "en-VC", oemcp => 850 },
                0751  { name => "en-VG", oemcp => 850 },
                0752  { name => "en-VI", oemcp => 850 },
                0753  { name => "en-VU", oemcp => 850 },
                0754  { name => "en-WS", oemcp => 850 },
                0755  { name => "en-ZA", lcid => 0x00001c09, ebcdiccp => 500, sabbrevlangname => "ENS" },
                0756  { name => "en-ZM", oemcp => 850 },
                0757  { name => "en-ZW", lcid => 0x00003009, ebcdiccp => 500, sabbrevlangname => "ENW" },
                0758  { name => "eo", sopentypelang => "NTO" },
                0759  { name => "eo-001" },
                0760  { name => "es", lcid => 0x0000000a, oemcp => 850, ebcdiccp => 20284, sabbrevlangname => "ESP", sopentypelang => "ESP" },
                0761  { name => "es-419", lcid => 0x0000580a, sabbrevlangname => "ESJ" },
                0762  { name => "es-AR", lcid => 0x00002c0a, sabbrevlangname => "ESS" },
                0763  { name => "es-BO", lcid => 0x0000400a, sabbrevlangname => "ESB" },
                0764  { name => "es-BR", oemcp => 65001 },
                0765  { name => "es-BZ", oemcp => 65001 },
                0766  { name => "es-CL", lcid => 0x0000340a, sabbrevlangname => "ESL" },
                0767  { name => "es-CO", lcid => 0x0000240a, sabbrevlangname => "ESO" },
                0768  { name => "es-CR", lcid => 0x0000140a, sabbrevlangname => "ESC" },
                0769  { name => "es-CU", lcid => 0x00005c0a, sabbrevlangname => "ESK" },
                0770  { name => "es-DO", lcid => 0x00001c0a, sabbrevlangname => "ESD" },
                0771  { name => "es-EA" },
                0772  { name => "es-EC", lcid => 0x0000300a, sabbrevlangname => "ESF" },
                0773  { name => "es-ES", lcid => 0x00000c0a, sabbrevlangname => "ESN" },
                0774  { name => "es-ES_tradnl", lcid => 0x0000040a, file => "es_ES" },
                0775  { name => "es-ES-u-co-trad", alias => "es-ES_tradnl" },
                0776  { name => "es-GQ" },
                0777  { name => "es-GT", lcid => 0x0000100a, sabbrevlangname => "ESG" },
                0778  { name => "es-HN", lcid => 0x0000480a, sabbrevlangname => "ESH" },
                0779  { name => "es-IC" },
                0780  { name => "es-MX", lcid => 0x0000080a, sabbrevlangname => "ESM" },
                0781  { name => "es-NI", lcid => 0x00004c0a, sabbrevlangname => "ESI" },
                0782  { name => "es-PA", lcid => 0x0000180a, sabbrevlangname => "ESA" },
                0783  { name => "es-PE", lcid => 0x0000280a, sabbrevlangname => "ESR" },
                0784  { name => "es-PH" },
                0785  { name => "es-PR", lcid => 0x0000500a, sabbrevlangname => "ESU" },
                0786  { name => "es-PY", lcid => 0x00003c0a, sabbrevlangname => "ESZ" },
                0787  { name => "es-SV", lcid => 0x0000440a, sabbrevlangname => "ESE" },
                0788  { name => "es-US", lcid => 0x0000540a, sabbrevlangname => "EST" },
                0789  { name => "es-UY", lcid => 0x0000380a, sabbrevlangname => "ESY" },
                0790  { name => "es-VE", lcid => 0x0000200a, sabbrevlangname => "ESV" },
                0791  { name => "et", lcid => 0x00000025, oemcp => 775, group => 3, sabbrevlangname => "ETI", sopentypelang => "ETI" },
                0792  { name => "et-EE", lcid => 0x00000425 },
                0793  { name => "eu", lcid => 0x0000002d, oemcp => 850, maccp => 65001, sabbrevlangname => "EUQ", sopentypelang => "EUQ" },
                0794  { name => "eu-ES", lcid => 0x0000042d },
                0795  { name => "ewo" },
                0796  { name => "ewo-CM" },
                0797  { name => "fa", lcid => 0x00000029, inegnumber => 3, oemcp => 720, slist => "\x{061b}", group => 13, sabbrevlangname => "FAR", sopentypelang => "FAR" },
                0798  { name => "fa-AF", alias => "prs-AF" },
                0799  { name => "fa-IR", lcid => 0x00000429 },
                0800  { name => "ff", lcid => 0x00000067, oemcp => 850, ebcdiccp => 20297 },
                0801  { name => "ff-CM", alias => "ff-Latn-CM" },
                0802  { name => "ff-GN", alias => "ff-Latn-GN" },
                0803  { name => "ff-MR", alias => "ff-Latn-MR" },
                0804  { name => "ff-NG", alias => "ff-Latn-NG" },
                0805  { name => "ff-SN", alias => "ff-Latn-SN" },
b591b3167 Alex*0806  { name => "ff-Adlm", oemcp => 65001 },
8cdb593f5 Alex*0807  { name => "ff-Adlm-BF" },
                0808  { name => "ff-Adlm-CM" },
                0809  { name => "ff-Adlm-GH" },
                0810  { name => "ff-Adlm-GM" },
                0811  { name => "ff-Adlm-GN" },
                0812  { name => "ff-Adlm-GW" },
                0813  { name => "ff-Adlm-LR" },
                0814  { name => "ff-Adlm-MR" },
                0815  { name => "ff-Adlm-NE" },
                0816  { name => "ff-Adlm-NG" },
                0817  { name => "ff-Adlm-SL" },
                0818  { name => "ff-Adlm-SN" },
                0819  { name => "ff-Latn", lcid => 0x00007c67 },
                0820  { name => "ff-Latn-BF", oemcp => 65001 },
                0821  { name => "ff-Latn-CM" },
                0822  { name => "ff-Latn-GH", oemcp => 65001 },
                0823  { name => "ff-Latn-GM", oemcp => 65001 },
                0824  { name => "ff-Latn-GN" },
                0825  { name => "ff-Latn-GW", oemcp => 65001 },
                0826  { name => "ff-Latn-LR", oemcp => 65001 },
                0827  { name => "ff-Latn-MR" },
                0828  { name => "ff-Latn-NE", oemcp => 65001 },
                0829  { name => "ff-Latn-NG", lcid => 0x00000467, sabbrevlangname => "ZZZ" },
                0830  { name => "ff-Latn-SL", oemcp => 65001 },
                0831  { name => "ff-Latn-SN", lcid => 0x00000867 },
                0832  { name => "fi", lcid => 0x0000000b, oemcp => 850, ebcdiccp => 20278 },
                0833  { name => "fi-FI", lcid => 0x0000040b },
                0834  { name => "fil", lcid => 0x00000064, oemcp => 437, ebcdiccp => 500, sabbrevlangname => "FPO", sopentypelang => "PIL" },
                0835  { name => "fil-PH", lcid => 0x00000464 },
                0836  { name => "fil-Latn", alias => "fil" },
                0837  { name => "fil-Latn-PH", alias => "fil-PH" },
                0838  { name => "fo", lcid => 0x00000038, oemcp => 850, maccp => 10079, ebcdiccp => 20277, sabbrevlangname => "FOS", sopentypelang => "FOS" },
                0839  { name => "fo-DK", oemcp => 65001, maccp => 65001 },
                0840  { name => "fo-FO", lcid => 0x00000438 },
                0841  { name => "fr", lcid => 0x0000000c, oemcp => 850, ebcdiccp => 20297 },
ad02ef7be Alex*0842  { name => "fr-029", lcid => 0x00001c0c, file => "fr", sabbrevlangname => "ZZZ" },
8cdb593f5 Alex*0843  { name => "fr-BE", lcid => 0x0000080c, sabbrevlangname => "FRB" },
                0844  { name => "fr-BF" },
                0845  { name => "fr-BI" },
                0846  { name => "fr-BJ" },
                0847  { name => "fr-BL" },
                0848  { name => "fr-CA", lcid => 0x00000c0c, sabbrevlangname => "FRC" },
                0849  { name => "fr-CD", lcid => 0x0000240c, sabbrevlangname => "FRD" },
                0850  { name => "fr-CF" },
                0851  { name => "fr-CG" },
                0852  { name => "fr-CH", lcid => 0x0000100c, sabbrevlangname => "FRS" },
                0853  { name => "fr-CI", lcid => 0x0000300c, sabbrevlangname => "FRI" },
                0854  { name => "fr-CM", lcid => 0x00002c0c, sabbrevlangname => "FRE" },
                0855  { name => "fr-DJ" },
                0856  { name => "fr-DZ" },
                0857  { name => "fr-FR", lcid => 0x0000040c },
                0858  { name => "fr-GA" },
                0859  { name => "fr-GF" },
                0860  { name => "fr-GN" },
                0861  { name => "fr-GP" },
                0862  { name => "fr-GQ" },
                0863  { name => "fr-HT", lcid => 0x00003c0c, sabbrevlangname => "FRH" },
                0864  { name => "fr-KM" },
                0865  { name => "fr-LU", lcid => 0x0000140c, sabbrevlangname => "FRL" },
                0866  { name => "fr-MA", lcid => 0x0000380c, sabbrevlangname => "FRO" },
                0867  { name => "fr-MC", lcid => 0x0000180c, sabbrevlangname => "FRM" },
                0868  { name => "fr-MF" },
                0869  { name => "fr-MG" },
                0870  { name => "fr-ML", lcid => 0x0000340c, sabbrevlangname => "FRF" },
                0871  { name => "fr-MQ" },
                0872  { name => "fr-MR" },
                0873  { name => "fr-MU" },
                0874  { name => "fr-NC" },
                0875  { name => "fr-NE" },
                0876  { name => "fr-PF" },
                0877  { name => "fr-PM" },
                0878  { name => "fr-RE", lcid => 0x0000200c, sabbrevlangname => "FRR" },
                0879  { name => "fr-RW" },
                0880  { name => "fr-SC" },
                0881  { name => "fr-SN", lcid => 0x0000280c, sabbrevlangname => "FRN" },
                0882  { name => "fr-SY" },
                0883  { name => "fr-TD" },
                0884  { name => "fr-TG" },
                0885  { name => "fr-TN" },
                0886  { name => "fr-VU" },
                0887  { name => "fr-WF" },
                0888  { name => "fr-YT" },
                0889  { name => "fur", sopentypelang => "FRL" },
                0890  { name => "fur-IT" },
b591b3167 Alex*0891  { name => "fuv-NG", alias => "ff-Latn-NG" },
8cdb593f5 Alex*0892  { name => "fy", lcid => 0x00000062, oemcp => 850, sabbrevlangname => "FYN", sopentypelang => "FRI" },
                0893  { name => "fy-NL", lcid => 0x00000462 },
                0894  { name => "ga", lcid => 0x0000003c, oemcp => 850, sabbrevlangname => "IRE", sopentypelang => "IRI" },
b591b3167 Alex*0895  { name => "ga-GB", oemcp => 65001 },
8cdb593f5 Alex*0896  { name => "ga-IE", lcid => 0x0000083c },
                0897  { name => "gd", lcid => 0x00000091, oemcp => 850, ebcdiccp => 20285, sopentypelang => "GAE" },
                0898  { name => "gd-GB", lcid => 0x00000491 },
                0899  { name => "gd-Latn", alias => "gd" },
                0900  { name => "gl", lcid => 0x00000056, oemcp => 850, sabbrevlangname => "GLC", sopentypelang => "GAL" },
                0901  { name => "gl-ES", lcid => 0x00000456 },
50c5eb31c Alex*0902  { name => "gn", lcid => 0x00000074, oemcp => 850, ebcdiccp => 20284, slist => ",", sopentypelang => "GUA" },
                0903  { name => "gn-PY", lcid => 0x00000474 },
8cdb593f5 Alex*0904  { name => "gsw", lcid => 0x00000084, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "ZZZ", sopentypelang => "ALS" },
                0905  { name => "gsw-CH" },
                0906  { name => "gsw-FR", lcid => 0x00000484, sabbrevlangname => "GSW" },
                0907  { name => "gsw-LI" },
                0908  { name => "gu", lcid => 0x00000047, slist => ",", group => 15 },
                0909  { name => "gu-IN", lcid => 0x00000447 },
                0910  { name => "guz" },
                0911  { name => "guz-KE" },
                0912  { name => "gv", sopentypelang => "MNX" },
b591b3167 Alex*0913  { name => "gv-GB", file => "gv" },
8cdb593f5 Alex*0914  { name => "gv-IM" },
                0915  { name => "ha", lcid => 0x00000068, oemcp => 437 },
                0916  { name => "ha-GH", alias => "ha-Latn-GH" },
                0917  { name => "ha-Latn", lcid => 0x00007c68, file => "ha" },
                0918  { name => "ha-Latn-GH", file => "ha_GH", ebcdiccp => 500 },
                0919  { name => "ha-Latn-NE", file => "ha_NE", ebcdiccp => 500 },
                0920  { name => "ha-Latn-NG", lcid => 0x00000468, file => "ha_NG" },
                0921  { name => "ha-NE", alias => "ha-Latn-NE" },
                0922  { name => "ha-NG", alias => "ha-Latn-NG" },
                0923  { name => "haw", lcid => 0x00000075, oemcp => 437 },
                0924  { name => "haw-Latn", alias => "haw" },
                0925  { name => "haw-Latn-US", alias => "haw-US" },
                0926  { name => "haw-US", lcid => 0x00000475 },
                0927  { name => "he", lcid => 0x0000000d, oemcp => 862, slist => ",", group => 12, sopentypelang => "IWR" },
                0928  { name => "he-IL", lcid => 0x0000040d },
                0929  { name => "hi", lcid => 0x00000039, slist => ",", group => 15 },
                0930  { name => "hi-IN", lcid => 0x00000439 },
                0931  { name => "hr", lcid => 0x0000001a, inegnumber => 2, oemcp => 852, maccp => 10082, group => 2 },
                0932  { name => "hr-BA", lcid => 0x0000101a, ebcdiccp => 870, inegnumber => 1, sabbrevlangname => "HRB" },
                0933  { name => "hr-HR", lcid => 0x0000041a },
                0934  { name => "hsb", lcid => 0x0000002e, oemcp => 850, ebcdiccp => 870, sopentypelang => "USB" },
                0935  { name => "hsb-DE", lcid => 0x0000042e },
                0936  { name => "hu", lcid => 0x0000000e, oemcp => 852, group => 2 },
                0937  { name => "hu-HU", lcid => 0x0000040e },
                0938  { name => "hu-HU_technl", lcid => 0x0001040e, alias => "hu-HU" },
                0939  { name => "hy", lcid => 0x0000002b, slist => ",", group => 17 },
                0940  { name => "hy-AM", lcid => 0x0000042b },
                0941  { name => "ia" },
                0942  { name => "ia-001" },
                0943 ## name => "ibb", lcid => 0x00000069 },
                0944 ## name => "ibb-NG", lcid => 0x00000469 },
                0945  { name => "id", lcid => 0x00000021, oemcp => 850 },
                0946  { name => "id-ID", lcid => 0x00000421 },
6d046dd98 Alex*0947  { name => "ie" },
                0948  { name => "ie-EE" },
8cdb593f5 Alex*0949  { name => "ig", lcid => 0x00000070, oemcp => 437 },
                0950  { name => "ig-Latn", alias => "ig" },
                0951  { name => "ig-Latn-NG", alias => "ig-NG" },
                0952  { name => "ig-NG", lcid => 0x00000470 },
                0953  { name => "ii", lcid => 0x00000078, group => 9, sopentypelang => "YIM" },
                0954  { name => "ii-CN", lcid => 0x00000478 },
                0955  { name => "ii-Yiii", alias => "ii" },
                0956  { name => "ii-Yiii-CN", alias => "ii-CN" },
                0957  { name => "is", lcid => 0x0000000f, oemcp => 850, maccp => 10079, ebcdiccp => 20871 },
                0958  { name => "is-IS", lcid => 0x0000040f },
                0959  { name => "it", lcid => 0x00000010, oemcp => 850, ebcdiccp => 20280 },
                0960  { name => "it-CH", lcid => 0x00000810, ebcdiccp => 500, sabbrevlangname => "ITS" },
                0961  { name => "it-IT", lcid => 0x00000410 },
                0962  { name => "it-SM" },
                0963  { name => "it-VA", oemcp => 65001 },
50c5eb31c Alex*0964  { name => "iu", lcid => 0x0000005d, oemcp => 437, slist => ",", sortlocale => "iu-Latn-CA", sabbrevlangname => "IUK", sopentypelang => "INU" },
                0965  { name => "iu-Cans", lcid => 0x0000785d, file => "iu", oemcp => 65001, sabbrevlangname => "IUS" },
                0966  { name => "iu-Cans-CA", lcid => 0x0000045d, file => "iu_CA" },
                0967  { name => "iu-Latn", lcid => 0x00007c5d },
                0968  { name => "iu-Latn-CA", lcid => 0x0000085d },
8cdb593f5 Alex*0969  { name => "ja", lcid => 0x00000011, ireadinglayout => 2, oemcp => 932, slist => ",",  sscripts => "Hani Hira Jpan Kana", group => 7, sopentypelang => "JAN" },
                0970  { name => "ja-JP", lcid => 0x00000411 },
                0971  { name => "ja-JP_radstr", lcid => 0x00040411, alias => "ja-JP" },
                0972  { name => "ja-JP-u-co-unihan", alias => "ja-JP_radstr" },
                0973  { name => "jgo" },
                0974  { name => "jgo-CM" },
                0975  { name => "jmc" },
                0976  { name => "jmc-TZ" },
4172c0482 Alex*0977  { name => "jv", oemcp => 850, nativedigits => "0123456789" },
8cdb593f5 Alex*0978  { name => "jv-ID", alias => "jv-Latn-ID" },
                0979 ## name => "jv-Java" },
                0980 ## name => "jv-Java-ID" },
                0981  { name => "jv-Latn", file => "jv" },
                0982  { name => "jv-Latn-ID", file => "jv_ID" },
                0983  { name => "ka", lcid => 0x00000037, group => 16 },
                0984  { name => "ka-GE", lcid => 0x00000437 },
                0985  { name => "ka-GE_modern", lcid => 0x00010437, alias => "ka-GE" },
                0986  { name => "kab", sopentypelang => "KAB0" },
                0987  { name => "kab-DZ" },
                0988  { name => "kam", sopentypelang => "KMB" },
                0989  { name => "kam-KE" },
                0990  { name => "kde" },
                0991  { name => "kde-TZ" },
                0992  { name => "kea" },
                0993  { name => "kea-CV" },
                0994  { name => "kgp" },
                0995  { name => "kgp-BR" },
                0996  { name => "khq" },
                0997  { name => "khq-ML" },
                0998  { name => "ki" },
                0999  { name => "ki-KE" },
                1000  { name => "kk", lcid => 0x0000003f, group => 5, sabbrevlangname => "KKZ" },
                1001  { name => "kk-Cyrl", alias => "kk" },
                1002  { name => "kk-Cyrl-KZ", alias => "kk-KZ" },
                1003  { name => "kk-KZ", lcid => 0x0000043f },
                1004  { name => "kkj" },
                1005  { name => "kkj-CM" },
                1006  { name => "kl", lcid => 0x0000006f, oemcp => 850, ebcdiccp => 20277, sopentypelang => "GRN" },
                1007  { name => "kl-GL", lcid => 0x0000046f },
                1008  { name => "kln", sopentypelang => "KAL" },
                1009  { name => "kln-KE" },
                1010  { name => "km", lcid => 0x00000053, inegnumber => 2, slist => ",", group => 15 },
                1011  { name => "km-KH", lcid => 0x00000453 },
                1012  { name => "kn", lcid => 0x0000004b, slist => ",", group => 15, sabbrevlangname => "KDI" },
                1013  { name => "kn-IN", lcid => 0x0000044b },
                1014  { name => "ko", lcid => 0x00000012, ireadinglayout => 2, slist => ",", oemcp => 949, ebcdiccp => 20833, sscripts => "Hang Hani Kore", group => 8 },
                1015  { name => "ko-KP", oemcp => 65001 },
                1016  { name => "ko-KR", lcid => 0x00000412 },
                1017  { name => "kok", lcid => 0x00000057, slist => ",", group => 15, sabbrevlangname => "KNK" },
                1018  { name => "kok-IN", lcid => 0x00000457 },
                1019  { name => "kr", lcid => 0x00000071, sortlocale => "kr-Latn-NG", oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "KNR" },
                1020  { name => "kr-Latn", file => "kr", dir => "exemplars" },
                1021  { name => "kr-Latn-NG", lcid => 0x00000471, file => "kr", dir => "exemplars" },
                1022  { name => "kr-NG", alias => "kr-Latn-NG" },
                1023  { name => "ks", lcid => 0x00000060, group => 15, sabbrevlangname => "ZZZ", sopentypelang => "KSH" },
                1024  { name => "ks-Arab", lcid => 0x00000460 },
                1025  { name => "ks-Arab-IN" },
6644b6b66 Alex*1026  { name => "ks-Deva", slist => "," },
                1027  { name => "ks-Deva-IN", lcid => 0x00000860 },
8cdb593f5 Alex*1028  { name => "ks-IN", alias => "ks-Arab-IN" },
                1029  { name => "ksb" },
                1030  { name => "ksb-TZ" },
                1031  { name => "ksf" },
                1032  { name => "ksf-CM" },
                1033  { name => "ksh", sopentypelang => "KSH0" },
                1034  { name => "ksh-DE" },
                1035  { name => "ku", lcid => 0x00000092, file => "ckb", slist => "\x{061b}", sortlocale => "ku-Arab-IQ", oemcp => 720 },
                1036  { name => "ku-Arab", lcid => 0x00007c92, file => "ckb", group => 13 },
                1037  { name => "ku-Arab-IQ", lcid => 0x00000492, file => "ckb_IQ" },
                1038  { name => "ku-Arab-IR", file => "ckb_IR", oemcp => 65001 },
                1039  { name => "kw" },
                1040  { name => "kw-GB" },
                1041  { name => "ky", lcid => 0x00000040, oemcp => 866, group => 5, sabbrevlangname => "KYR" },
                1042  { name => "ky-Cyrl", alias => "ky" },
                1043  { name => "ky-Cyrl-KG", alias => "ky-KG" },
                1044  { name => "ky-KG", lcid => 0x00000440 },
50c5eb31c Alex*1045  { name => "la", lcid => 0x00000076, oemcp => 437, slist => ",", sabbrevlangname => "ZZZ" },
                1046  { name => "la-VA", lcid => 0x00000476 },
b591b3167 Alex*1047  { name => "la-001", alias => "la-VA" },
8cdb593f5 Alex*1048  { name => "lag" },
                1049  { name => "lag-TZ" },
                1050  { name => "lb", lcid => 0x0000006e, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "LBX" },
                1051  { name => "lb-LU", lcid => 0x0000046e },
                1052  { name => "lg" },
                1053  { name => "lg-UG" },
                1054  { name => "lkt" },
                1055  { name => "lkt-US" },
                1056  { name => "ln" },
                1057  { name => "ln-AO" },
                1058  { name => "ln-CD" },
                1059  { name => "ln-CF" },
                1060  { name => "ln-CG" },
                1061  { name => "lo", lcid => 0x00000054, group => 15 },
                1062  { name => "lo-LA", lcid => 0x00000454 },
                1063  { name => "lrc" },
                1064  { name => "lrc-IQ" },
                1065  { name => "lrc-IR" },
                1066  { name => "lt", lcid => 0x00000027, oemcp => 775, group => 3, sabbrevlangname => "LTH", sopentypelang => "LTH" },
                1067  { name => "lt-LT", lcid => 0x00000427 },
                1068  { name => "lu" },
                1069  { name => "lu-CD" },
                1070  { name => "luo" },
                1071  { name => "luo-KE" },
                1072  { name => "luy", sopentypelang => "LUH" },
                1073  { name => "luy-KE" },
                1074  { name => "lv", lcid => 0x00000026, oemcp => 775, group => 3, sabbrevlangname => "LVI", sopentypelang => "LVI" },
                1075  { name => "lv-LV", lcid => 0x00000426 },
                1076  { name => "mai" },
                1077  { name => "mai-IN" },
                1078  { name => "mas" },
                1079  { name => "mas-KE" },
                1080  { name => "mas-TZ" },
                1081  { name => "mer" },
                1082  { name => "mer-KE" },
                1083  { name => "mfe" },
                1084  { name => "mfe-MU" },
                1085  { name => "mg" },
                1086  { name => "mg-MG" },
                1087  { name => "mgh" },
                1088  { name => "mgh-MZ" },
                1089  { name => "mgo" },
                1090  { name => "mgo-CM" },
                1091  { name => "mi", lcid => 0x00000081, slist => "," },
                1092  { name => "mi-Latn", alias => "mi" },
                1093  { name => "mi-Latn-NZ", alias => "mi-NZ" },
                1094  { name => "mi-NZ", lcid => 0x00000481 },
6d046dd98 Alex*1095  { name => "mic" },
                1096  { name => "mic-CA" },
8cdb593f5 Alex*1097  { name => "mk", lcid => 0x0000002f, oemcp => 866, ebcdiccp => 500, group => 5, sabbrevlangname => "MKI" },
                1098  { name => "mk-MK", lcid => 0x0000042f },
                1099  { name => "ml", lcid => 0x0000004c, group => 15, sabbrevlangname => "MYM", sopentypelang => "MLR" },
                1100  { name => "ml-IN", lcid => 0x0000044c },
                1101  { name => "mn", lcid => 0x00000050, oemcp => 866, sopentypelang => "MNG" },
                1102  { name => "mn-Cyrl", lcid => 0x00007850, file => "mn", sabbrevlangname => "MNN" },
                1103  { name => "mn-Cyrl-MN", alias => "mn-MN" },
                1104  { name => "mn-MN", lcid => 0x00000450, sparent => "mn-Cyrl", group => 5 },
50c5eb31c Alex*1105  { name => "mn-Mong", lcid => 0x00007c50, oemcp => 65001, slist => ",", group => 15, sabbrevlangname => "MNG", nativedigits => "0123456789" },
                1106  { name => "mn-Mong-CN", lcid => 0x00000850 },
                1107  { name => "mn-Mong-MN", lcid => 0x00000c50, sabbrevlangname => "MNM" },
8cdb593f5 Alex*1108  { name => "mni", lcid => 0x00000058, slist => ",", sabbrevlangname => "ZZZ" },
                1109  { name => "mni-IN", lcid => 0x00000458, file => "mni_Beng_IN" },
b591b3167 Alex*1110  { name => "mni-Beng" },
                1111  { name => "mni-Beng-IN", alias => "mni-IN" },
50c5eb31c Alex*1112  { name => "moh", lcid => 0x0000007c, oemcp => 850, ebcdiccp => 37, slist => ",", sabbrevlangname => "MWK" },
                1113  { name => "moh-CA", lcid => 0x0000047c },
8cdb593f5 Alex*1114  { name => "moh-Latn", alias => "moh" },
                1115  { name => "moh-Latn-CA", alias => "moh-CA" },
                1116  { name => "mr", lcid => 0x0000004e, slist => ",", group => 15 },
                1117  { name => "mr-IN", lcid => 0x0000044e },
                1118  { name => "ms", lcid => 0x0000003e, oemcp => 850, sabbrevlangname => "MSL", sopentypelang => "MLY" },
                1119  { name => "ms-BN", lcid => 0x0000083e, sabbrevlangname => "MSB" },
                1120  { name => "ms-ID" },
                1121  { name => "ms-Latn", alias => "ms" },
                1122  { name => "ms-Latn-BN", alias => "ms-BN" },
                1123  { name => "ms-Latn-MY", alias => "ms-MY" },
                1124  { name => "ms-Latn-SG", alias => "ms-SG" },
                1125  { name => "ms-MY", lcid => 0x0000043e },
                1126  { name => "ms-SG" },
                1127  { name => "mt", lcid => 0x0000003a, sopentypelang => "MTS" },
                1128  { name => "mt-MT", lcid => 0x0000043a },
                1129  { name => "mua" },
                1130  { name => "mua-CM" },
                1131  { name => "my", lcid => 0x00000055, sopentypelang => "BRM" },
                1132  { name => "my-MM", lcid => 0x00000455 },
                1133  { name => "mzn" },
                1134  { name => "mzn-IR" },
                1135  { name => "naq" },
                1136  { name => "naq-NA" },
                1137  { name => "nb", lcid => 0x00007c14, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NOR", sopentypelang => "NOR" },
                1138  { name => "nb-NO", lcid => 0x00000414 },
                1139  { name => "nb-SJ" },
                1140  { name => "nd", sopentypelang => "NDB" },
                1141  { name => "nd-ZW" },
                1142  { name => "nds" },
                1143  { name => "nds-DE" },
                1144  { name => "nds-NL" },
                1145  { name => "ne", lcid => 0x00000061, slist => "," },
                1146  { name => "ne-IN", lcid => 0x00000861, sabbrevlangname => "NEI" },
                1147  { name => "ne-NP", lcid => 0x00000461, group => 15 },
                1148  { name => "nl", lcid => 0x00000013, oemcp => 850 },
                1149  { name => "nl-AW" },
                1150  { name => "nl-BE", lcid => 0x00000813, sabbrevlangname => "NLB" },
                1151  { name => "nl-BQ" },
                1152  { name => "nl-CW" },
                1153  { name => "nl-NL", lcid => 0x00000413 },
                1154  { name => "nl-SR" },
                1155  { name => "nl-SX" },
                1156  { name => "nmg" },
                1157  { name => "nmg-CM" },
                1158  { name => "nn", lcid => 0x00007814, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NON", sopentypelang => "NYN" },
                1159  { name => "nn-NO", lcid => 0x00000814 },
                1160  { name => "nnh" },
                1161  { name => "nnh-CM" },
                1162  { name => "no", lcid => 0x00000014, oemcp => 850, ebcdiccp => 20277, sortlocale => "nb-NO" },
50c5eb31c Alex*1163  { name => "nqo", idigits => 3, inegnumber => 3, slist => "\x{060c}", sopentypelang => "NKO" },
                1164  { name => "nqo-GN" },
                1165  { name => "nr", sopentypelang => "NDB" },
                1166  { name => "nr-ZA" },
                1167  { name => "nso", lcid => 0x0000006c, oemcp => 850, sopentypelang => "SOT" },
                1168  { name => "nso-ZA", lcid => 0x0000046c },
8cdb593f5 Alex*1169  { name => "nus" },
                1170  { name => "nus-SD", alias => "nus-SS" },
                1171  { name => "nus-SS" },
                1172  { name => "nyn", sopentypelang => "NKL" },
                1173  { name => "nyn-UG" },
198de0dcb Alex*1174  { name => "oc", lcid => 0x00000082, oemcp => 850, ebcdiccp => 20297 },
                1175  { name => "oc-FR", lcid => 0x00000482 },
8cdb593f5 Alex*1176  { name => "oc-Latn", alias => "oc" },
                1177  { name => "oc-Latn-FR", alias => "oc-FR" },
                1178  { name => "om", lcid => 0x00000072, sopentypelang => "ORO" },
                1179  { name => "om-ET", lcid => 0x00000472 },
                1180  { name => "om-KE" },
                1181  { name => "or", lcid => 0x00000048, slist => ",", group => 15 },
                1182  { name => "or-IN", lcid => 0x00000448 },
                1183  { name => "os" },
                1184  { name => "os-GE" },
                1185  { name => "os-RU" },
                1186  { name => "pa", lcid => 0x00000046, slist => "," },
                1187  { name => "pa-Arab", lcid => 0x00007c46, slist => ";", inegnumber => 2, oemcp => 720, group => 13, sabbrevlangname => "PAP" },
                1188  { name => "pa-Arab-PK", lcid => 0x00000846 },
                1189  { name => "pa-Guru" },
                1190  { name => "pa-Guru-IN", alias => "pa-IN" },
                1191  { name => "pa-IN", lcid => 0x00000446, sparent => "pa-Guru", file => "pa_Guru_IN", group => 15 },
50c5eb31c Alex*1192  { name => "pap", lcid => 0x00000079, oemcp => 850, sopentypelang => "PAP0" },
8cdb593f5 Alex*1193 ## name => "pap-029", lcid => 0x00000479 },
                1194  { name => "pcm" },
b591b3167 Alex*1195  { name => "pcm-NG", alias => "pcm-Latn-NG" },
                1196  { name => "pcm-Latn", file => "pcm" },
                1197  { name => "pcm-Latn-NG", file => "pcm_NG" },
8cdb593f5 Alex*1198  { name => "pl", lcid => 0x00000015, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "PLK", sopentypelang => "PLK" },
                1199  { name => "pl-PL", lcid => 0x00000415 },
50c5eb31c Alex*1200  { name => "prg" },
6d046dd98 Alex*1201  { name => "prg-001", file => "prg" },
                1202  { name => "prg-PL" },
8cdb593f5 Alex*1203  { name => "prs", lcid => 0x0000008c, file => "fa", inegnumber => 3, oemcp => 720, group => 13, sopentypelang => "DRI" },
                1204  { name => "prs-AF", lcid => 0x0000048c, file => "fa_AF" },
                1205  { name => "prs-Arab", alias => "prs" },
                1206  { name => "prs-Arab-AF", alias => "prs-AF" },
                1207  { name => "ps", lcid => 0x00000063, group => 13, sabbrevlangname => "PAS", sopentypelang => "PAS" },
                1208  { name => "ps-AF", lcid => 0x00000463 },
                1209  { name => "ps-PK" },
                1210  { name => "pt", lcid => 0x00000016, oemcp => 850, sabbrevlangname => "PTB", sopentypelang => "PTG" },
                1211  { name => "pt-AO" },
                1212  { name => "pt-BR", lcid => 0x00000416 },
                1213  { name => "pt-CH", oemcp => 65001 },
                1214  { name => "pt-CV" },
                1215  { name => "pt-GQ", oemcp => 65001 },
                1216  { name => "pt-GW" },
                1217  { name => "pt-LU", oemcp => 65001 },
                1218  { name => "pt-MO" },
                1219  { name => "pt-MZ" },
                1220  { name => "pt-PT", lcid => 0x00000816, sabbrevlangname => "PTG" },
                1221  { name => "pt-ST" },
                1222  { name => "pt-TL" },
                1223 ## name => qps-Latn-x-sh", lcid => 0x80000901 },
                1224 ## name => qps-ploc", lcid => 0x80000501 },
                1225 ## name => qps-ploca", lcid => 0x800005fe },
                1226 ## name => qps-plocm", lcid => 0x800009ff },
                1227  { name => "qu", alias => "quz" },
                1228  { name => "qu-BO", alias => "quz-BO" },
                1229  { name => "qu-EC", alias => "quz-EC" },
                1230  { name => "qu-PE", alias => "quz-PE" },
50c5eb31c Alex*1231  { name => "quc", lcid => 0x00000086, oemcp => 850, ebcdiccp => 20284, slist => "," },
                1232  { name => "quc-Latn", lcid => 0x00007c86, file => "quc" },
                1233  { name => "quc-Latn-GT", lcid => 0x00000486, file => "quc_GT" },
8cdb593f5 Alex*1234  { name => "qut", alias => "quc" },
                1235  { name => "qut-GT", alias => "quc-Latn-GT" },
                1236  { name => "quz", lcid => 0x0000006b, file => "qu", territory => "BO", oemcp => 850, ebcdiccp => 20284, slist => "," },
                1237  { name => "quz-BO", lcid => 0x0000046b, file => "qu_BO" },
                1238  { name => "quz-EC", lcid => 0x0000086b, file => "qu_EC" },
                1239  { name => "quz-Latn", alias => "quz" },
                1240  { name => "quz-Latn-BO", alias => "quz-BO" },
                1241  { name => "quz-Latn-EC", alias => "quz-EC" },
                1242  { name => "quz-Latn-PE", alias => "quz-PE" },
                1243  { name => "quz-PE", lcid => 0x00000c6b, file => "qu_PE" },
                1244  { name => "rm", lcid => 0x00000017, oemcp => 850, ebcdiccp => 20273, sabbrevlangname => "RMC", sopentypelang => "RMS" },
                1245  { name => "rm-CH", lcid => 0x00000417 },
                1246  { name => "rn" },
                1247  { name => "rn-BI" },
                1248  { name => "ro", lcid => 0x00000018, oemcp => 852, ebcdiccp => 20880, sabbrevlangname => "ROM", sopentypelang => "ROM" },
                1249  { name => "ro-MD", lcid => 0x00000818, maccp => 65001, sabbrevlangname => "ROD" },
                1250  { name => "ro-RO", lcid => 0x00000418, group => 2 },
                1251  { name => "rof" },
                1252  { name => "rof-TZ" },
                1253  { name => "ru", lcid => 0x00000019, oemcp => 866 },
                1254  { name => "ru-BY", maccp => 65001 },
                1255  { name => "ru-KG", maccp => 65001 },
                1256  { name => "ru-KZ", maccp => 65001 },
                1257  { name => "ru-MD", lcid => 0x00000819, maccp => 65001, sabbrevlangname => "RUM" },
                1258  { name => "ru-RU", lcid => 0x00000419, group => 5 },
                1259  { name => "ru-UA", maccp => 65001 },
                1260  { name => "rw", lcid => 0x00000087, oemcp => 437, sopentypelang => "RUA" },
                1261  { name => "rw-RW", lcid => 0x00000487 },
                1262  { name => "rwk" },
                1263  { name => "rwk-TZ" },
                1264  { name => "sa", lcid => 0x0000004f, slist => ",", group => 15 },
                1265  { name => "sa-Deva", alias => "sa" },
                1266  { name => "sa-Deva-IN", alias => "sa-IN" },
                1267  { name => "sa-IN", lcid => 0x0000044f },
                1268  { name => "sah", lcid => 0x00000085, oemcp => 866, group => 5, sopentypelang => "YAK" },
                1269  { name => "sah-Cyrl", alias => "sah" },
                1270  { name => "sah-Cyrl-RU", alias => "sah-RU" },
                1271  { name => "sah-RU", lcid => 0x00000485 },
                1272  { name => "saq" },
                1273  { name => "saq-KE" },
                1274  { name => "sat" },
                1275  { name => "sat-Olck" },
                1276  { name => "sat-Olck-IN" },
                1277  { name => "sbp" },
                1278  { name => "sbp-TZ" },
                1279  { name => "sc" },
                1280  { name => "sc-IT" },
                1281  { name => "sd", lcid => 0x00000059, inegnumber => 3, oemcp => 720, sabbrevlangname => "SIP" },
                1282  { name => "sd-Arab", lcid => 0x00007c59, group => 13 },
                1283  { name => "sd-Arab-PK", lcid => 0x00000859 },
                1284  { name => "sd-Deva", inegnumber => 1, slist => ",", oemcp => 65001, group => 15 },
                1285  { name => "sd-Deva-IN", lcid => 0x00000459, sabbrevlangname => "ZZZ" },
                1286  { name => "sd-PK", alias => "sd-Arab-PK" },
                1287  { name => "se", lcid => 0x0000003b, oemcp => 850, ebcdiccp => 20277, sopentypelang => "NSM" },
                1288  { name => "se-FI", lcid => 0x00000c3b, ebcdiccp => 20278, sabbrevlangname => "SMG" },
                1289  { name => "se-NO", lcid => 0x0000043b },
                1290  { name => "se-SE", lcid => 0x0000083b, ebcdiccp => 20278, sabbrevlangname => "SMF" },
                1291  { name => "se-Latn", alias => "se" },
                1292  { name => "se-Latn-FI", alias => "se-FI" },
                1293  { name => "se-Latn-NO", alias => "se-NO" },
                1294  { name => "se-Latn-SE", alias => "se-SE" },
                1295  { name => "seh" },
                1296  { name => "seh-MZ" },
                1297  { name => "ses" },
                1298  { name => "ses-ML" },
                1299  { name => "sg", sopentypelang => "SGO" },
                1300  { name => "sg-CF" },
                1301  { name => "shi" },
                1302  { name => "shi-Latn" },
                1303  { name => "shi-Latn-MA" },
                1304  { name => "shi-Tfng" },
                1305  { name => "shi-Tfng-MA" },
                1306  { name => "si", lcid => 0x0000005b, group => 15, sopentypelang => "SNH" },
                1307  { name => "si-LK", lcid => 0x0000045b },
                1308  { name => "sk", lcid => 0x0000001b, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "SKY", sopentypelang => "SKY" },
                1309  { name => "sk-SK", lcid => 0x0000041b },
6d046dd98 Alex*1310  { name => "skr" },
                1311  { name => "skr-PK" },
8cdb593f5 Alex*1312  { name => "sl", lcid => 0x00000024, oemcp => 852, ebcdiccp => 20880, group => 2 },
                1313  { name => "sl-SI", lcid => 0x00000424 },
50c5eb31c Alex*1314  { name => "sma", lcid => 0x0000783b, sparent => "se", ebcdiccp => 20278, sabbrevlangname => "SMB", sopentypelang => "SSM" },
8cdb593f5 Alex*1315  { name => "sma-Latn", alias => "sma" },
                1316  { name => "sma-Latn-NO", alias => "sma-NO" },
                1317  { name => "sma-Latn-SE", alias => "sma-SE" },
50c5eb31c Alex*1318  { name => "sma-NO", lcid => 0x0000183b, ebcdiccp => 20277, sabbrevlangname => "SMA" },
                1319  { name => "sma-SE", lcid => 0x00001c3b },
                1320  { name => "smj", lcid => 0x00007c3b, sparent => "se", ebcdiccp => 20278, sabbrevlangname => "SMK", sopentypelang => "LSM" },
8cdb593f5 Alex*1321  { name => "smj-Latn", alias => "smj" },
                1322  { name => "smj-Latn-NO", alias => "smj-NO" },
                1323  { name => "smj-Latn-SE", alias => "smj-SE" },
50c5eb31c Alex*1324  { name => "smj-NO", lcid => 0x0000103b, ebcdiccp => 20277, sabbrevlangname => "SMJ" },
                1325  { name => "smj-SE", lcid => 0x0000143b },
8cdb593f5 Alex*1326  { name => "smn", lcid => 0x0000703b, sparent => "se", ebcdiccp => 20278, sopentypelang => "ISM" },
                1327  { name => "smn-FI", lcid => 0x0000243b },
                1328  { name => "smn-Latn", alias => "smn" },
                1329  { name => "smn-Latn-FI", alias => "smn-FI" },
198de0dcb Alex*1330  { name => "sms", lcid => 0x0000743b, sparent => "se", ebcdiccp => 20278, sopentypelang => "SKS" },
                1331  { name => "sms-FI", lcid => 0x0000203b },
8cdb593f5 Alex*1332  { name => "sms-Latn", alias => "sms" },
                1333  { name => "sms-Latn-FI", alias => "sms-FI" },
                1334  { name => "sn", sopentypelang => "SNA0" },
                1335  { name => "sn-Latn", file => "sn" },
                1336  { name => "sn-Latn-ZW", file => "sn_ZW" },
                1337  { name => "sn-ZW", alias => "sn-Latn-ZW" },
                1338  { name => "so", lcid => 0x00000077, sopentypelang => "SML" },
                1339  { name => "so-DJ" },
                1340  { name => "so-ET" },
                1341  { name => "so-KE" },
                1342  { name => "so-SO", lcid => 0x00000477 },
                1343  { name => "sq", lcid => 0x0000001c, oemcp => 852, ebcdiccp => 20880, group => 2 },
                1344  { name => "sq-AL", lcid => 0x0000041c },
                1345  { name => "sq-MK" },
                1346  { name => "sq-XK" },
                1347  { name => "sr", lcid => 0x00007c1a, sortlocale => "sr-Latn-RS", oemcp => 852, group => 2, sabbrevlangname => "SRB", sopentypelang => "SRB" },
                1348  { name => "sr-Cyrl", lcid => 0x00006c1a, oemcp => 855, ebcdiccp => 21025, group => 5, sabbrevlangname => "SRO" },
                1349  { name => "sr-Cyrl-BA", lcid => 0x00001c1a, sabbrevlangname => "SRN" },
                1350  { name => "sr-Cyrl-ME", lcid => 0x0000301a, sabbrevlangname => "SRQ" },
                1351  { name => "sr-Cyrl-RS", lcid => 0x0000281a },
                1352  { name => "sr-Cyrl-XK" },
                1353  { name => "sr-Latn", lcid => 0x0000701a, sabbrevlangname => "SRM" },
                1354  { name => "sr-Latn-BA", lcid => 0x0000181a, maccp => 10082, ebcdiccp => 870, sabbrevlangname => "SRS" },
                1355  { name => "sr-Latn-ME", lcid => 0x00002c1a, sabbrevlangname => "SRP" },
                1356  { name => "sr-Latn-RS", lcid => 0x0000241a, sabbrevlangname => "SRM" },
                1357  { name => "sr-Latn-XK" },
                1358 ## name => "sr-Cyrl-CS", lcid => 0x00000c1a },
                1359 ## name => "sr-Latn-CS", lcid => 0x0000081a },
50c5eb31c Alex*1360  { name => "ss", sopentypelang => "SWZ" },
                1361  { name => "ss-SZ" },
                1362  { name => "ss-ZA" },
                1363  { name => "ssy" },
                1364  { name => "ssy-ER" },
                1365  { name => "st", lcid => 0x00000030 },
                1366  { name => "st-LS" },
                1367  { name => "st-ZA", lcid => 0x00000430 },
8cdb593f5 Alex*1368  { name => "su" },
                1369  { name => "su-Latn" },
                1370  { name => "su-Latn-ID" },
                1371  { name => "sv", lcid => 0x0000001d, oemcp => 850, ebcdiccp => 20278, sabbrevlangname => "SVE", sopentypelang => "SVE" },
                1372  { name => "sv-AX" },
                1373  { name => "sv-FI", lcid => 0x0000081d, sabbrevlangname => "SVF" },
                1374  { name => "sv-SE", lcid => 0x0000041d, sabbrevlangname => "SVE" },
                1375  { name => "sw", lcid => 0x00000041, territory => "KE", oemcp => 437, ebcdiccp => 500, sabbrevlangname => "SWK", sopentypelang => "SWK" },
                1376  { name => "sw-CD" },
                1377  { name => "sw-KE", lcid => 0x00000441 },
                1378  { name => "sw-TZ" },
                1379  { name => "sw-UG" },
                1380  { name => "swc-CD", alias => "sw-CD" },
50c5eb31c Alex*1381  { name => "syr", lcid => 0x0000005a, slist => ",", group => 13 },
                1382  { name => "syr-SY", lcid => 0x0000045a },
8cdb593f5 Alex*1383  { name => "syr-Syrc", alias => "syr" },
                1384  { name => "syr-Syrc-SY", alias => "syr-SY" },
                1385  { name => "ta", lcid => 0x00000049, slist => ",", group => 15, sabbrevlangname => "TAI" },
                1386  { name => "ta-IN", lcid => 0x00000449 },
                1387  { name => "ta-LK", lcid => 0x00000849, sabbrevlangname => "TAM" },
                1388  { name => "ta-MY" },
                1389  { name => "ta-SG" },
                1390  { name => "te", lcid => 0x0000004a, group => 15 },
                1391  { name => "te-IN", lcid => 0x0000044a },
                1392  { name => "teo" },
                1393  { name => "teo-KE" },
                1394  { name => "teo-UG" },
                1395  { name => "tg", lcid => 0x00000028, oemcp => 866, group => 5, sabbrevlangname => "TAJ", sopentypelang => "TAJ" },
                1396  { name => "tg-Cyrl", lcid => 0x00007c28, file => "tg" },
                1397  { name => "tg-Cyrl-TJ", lcid => 0x00000428, file => "tg_TJ" },
                1398  { name => "tg-TJ", alias => "tg-Cyrl-TJ" },
                1399  { name => "th", lcid => 0x0000001e, oemcp => 874, ebcdiccp => 20838, slist => ",", group => 11 },
                1400  { name => "th-TH", lcid => 0x0000041e },
                1401  { name => "ti", lcid => 0x00000073, territory => "ER", sopentypelang => "TGY" },
                1402  { name => "ti-ER", lcid => 0x00000873 },
                1403  { name => "ti-ET", lcid => 0x00000473, sabbrevlangname => "TIE" },
50c5eb31c Alex*1404  { name => "tig", sopentypelang => "TGR" },
                1405  { name => "tig-ER" },
b591b3167 Alex*1406  { name => "tig-Ethi-ER", alias => "tig-ER" },
8cdb593f5 Alex*1407  { name => "tk", lcid => 0x00000042, oemcp => 852, ebcdiccp => 20880, group => 2, sopentypelang => "TKM" },
                1408  { name => "tk-Latn", alias => "tk" },
                1409  { name => "tk-Latn-TM", alias => "tk-TM" },
                1410  { name => "tk-TM", lcid => 0x00000442 },
50c5eb31c Alex*1411  { name => "tn", lcid => 0x00000032, oemcp => 850, sopentypelang => "TNA" },
                1412  { name => "tn-BW", lcid => 0x00000832, sabbrevlangname => "TSB" },
                1413  { name => "tn-ZA", lcid => 0x00000432 },
8cdb593f5 Alex*1414  { name => "to", sopentypelang => "TGN" },
                1415  { name => "to-TO" },
                1416  { name => "tr", lcid => 0x0000001f, oemcp => 857, ebcdiccp => 20905, group => 6, sabbrevlangname => "TRK", sopentypelang => "TRK" },
                1417  { name => "tr-CY" },
                1418  { name => "tr-TR", lcid => 0x0000041f },
50c5eb31c Alex*1419  { name => "ts", lcid => 0x00000031, sopentypelang => "TSG" },
                1420  { name => "ts-ZA", lcid => 0x00000431 },
8cdb593f5 Alex*1421  { name => "tt", lcid => 0x00000044, oemcp => 866, group => 5, sabbrevlangname => "TTT" },
                1422  { name => "tt-Cyrl", alias => "tt" },
                1423  { name => "tt-Cyrl-RU", alias => "tt-RU" },
                1424  { name => "tt-RU", lcid => 0x00000444 },
                1425  { name => "twq" },
                1426  { name => "twq-NE" },
6d046dd98 Alex*1427  { name => "tyv" },
                1428  { name => "tyv-RU" },
8cdb593f5 Alex*1429  { name => "tzm", lcid => 0x0000005f, sortlocale => "tzm-Latn-DZ", oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "TZA" },
                1430  { name => "tzm-Latn", lcid => 0x00007c5f, territory => "DZ", file => "tzm" },
                1431  { name => "tzm-Latn-MA", file => "tzm_MA", oemcp => 65001 },
                1432  { name => "tzm-Latn-DZ", lcid => 0x0000085f, file => "tzm" },
                1433  { name => "tzm-MA", alias => "tzm-Latn-MA" },
b591b3167 Alex*1434  { name => "tzm-DZ", alias => "tzm-Latn-DZ" },
8cdb593f5 Alex*1435 ## name => "tzm-Arab", group => 13 },
                1436 ## name => "tzm-Arab-MA", lcid => 0x0000045f },
                1437 ## name => "tzm-Tfng", lcid => 0x0000785f },
                1438 ## name => "tzm-Tfng-MA", lcid => 0x0000105f },
4172c0482 Alex*1439  { name => "ug", lcid => 0x00000080, oemcp => 720, slist => ",", group => 13, sopentypelang => "UYG", nativedigits => "0123456789" },
8cdb593f5 Alex*1440  { name => "ug-Arab", alias => "ug" },
                1441  { name => "ug-Arab-CN", alias => "ug-CN" },
                1442  { name => "ug-CN", lcid => 0x00000480 },
                1443  { name => "uk", lcid => 0x00000022, oemcp => 866, maccp => 10017, ebcdiccp => 500, group => 5 },
                1444  { name => "uk-UA", lcid => 0x00000422 },
                1445  { name => "ur", lcid => 0x00000020, oemcp => 720 },
                1446  { name => "ur-IN", lcid => 0x00000820, maccp => 65001, sabbrevlangname => "URI" },
                1447  { name => "ur-PK", lcid => 0x00000420, group => 13 },
                1448  { name => "uz", lcid => 0x00000043, oemcp => 857, maccp => 10029, group => 2 },
                1449  { name => "uz-Arab", oemcp => 65001, maccp => 65001 },
                1450  { name => "uz-Arab-AF" },
                1451  { name => "uz-Cyrl", lcid => 0x00007843, oemcp => 866, maccp => 10007, group => 5, sabbrevlangname => "UZC" },
                1452  { name => "uz-Cyrl-UZ", lcid => 0x00000843 },
                1453  { name => "uz-Latn", lcid => 0x00007c43 },
                1454  { name => "uz-Latn-UZ", lcid => 0x00000443 },
                1455  { name => "vai" },
                1456  { name => "vai-Latn" },
                1457  { name => "vai-Latn-LR" },
                1458  { name => "vai-Vaii" },
                1459  { name => "vai-Vaii-LR" },
50c5eb31c Alex*1460  { name => "ve", lcid => 0x00000033, sabbrevlangname => "ZZZ" },
                1461  { name => "ve-ZA", lcid => 0x00000433 },
8cdb593f5 Alex*1462  { name => "vi", lcid => 0x0000002a, oemcp => 1258, slist => ",", group => 14, sabbrevlangname => "VIT", sopentypelang => "VIT" },
                1463  { name => "vi-VN", lcid => 0x0000042a },
6d046dd98 Alex*1464  { name => "vmw" },
                1465  { name => "vmw-MZ" },
50c5eb31c Alex*1466  { name => "vo" },
                1467  { name => "vo-001" },
8cdb593f5 Alex*1468  { name => "vun" },
                1469  { name => "vun-TZ" },
50c5eb31c Alex*1470  { name => "wa", oemcp => 850 },
                1471  { name => "wa-BE" },
8cdb593f5 Alex*1472  { name => "wae" },
                1473  { name => "wae-CH" },
50c5eb31c Alex*1474  { name => "wal" },
                1475  { name => "wal-ET" },
8cdb593f5 Alex*1476  { name => "wo", lcid => 0x00000088, oemcp => 850, ebcdiccp => 20297, sopentypelang => "WLF" },
                1477  { name => "wo-Latn", alias => "wo" },
                1478  { name => "wo-Latn-SN", alias => "wo-SN" },
                1479  { name => "wo-SN", lcid => 0x00000488 },
                1480  { name => "x-IV_mathan", lcid => 0x0001007f, alias => "" },
                1481  { name => "xh", lcid => 0x00000034, oemcp => 850, sopentypelang => "XHS" },
                1482  { name => "xh-ZA", lcid => 0x00000434 },
6d046dd98 Alex*1483  { name => "xnr" },
                1484  { name => "xnr-IN" },
8cdb593f5 Alex*1485  { name => "xog" },
                1486  { name => "xog-UG" },
                1487  { name => "yav" },
                1488  { name => "yav-CM" },
                1489  { name => "yi", lcid => 0x0000003d, sabbrevlangname => "ZZZ", sopentypelang => "JII" },
6d046dd98 Alex*1490  { name => "yi-001", lcid => 0x0000043d, file => "yi" },
                1491  { name => "yi-UA" },
8cdb593f5 Alex*1492  { name => "yo", lcid => 0x0000006a, oemcp => 437, sopentypelang => "YBA" },
                1493  { name => "yo-BJ", ebcdiccp => 500 },
                1494  { name => "yo-Latn", alias => "yo" },
                1495  { name => "yo-Latn-NG", alias => "yo-NG" },
                1496  { name => "yo-NG", lcid => 0x0000046a },
                1497  { name => "yrl" },
                1498  { name => "yrl-BR" },
                1499  { name => "yrl-CO" },
                1500  { name => "yrl-VE" },
                1501  { name => "yue" },
                1502  { name => "yue-Hans" },
                1503  { name => "yue-Hans-CN" },
                1504  { name => "yue-Hant" },
                1505  { name => "yue-Hant-HK" },
                1506  { name => "zgh" },
                1507  { name => "zgh-MA", alias => "zgh-Tfng-MA" },
                1508  { name => "zgh-Tfng", file => "zgh" },
                1509  { name => "zgh-Tfng-MA", file => "zgh_MA" },
6d046dd98 Alex*1510  { name => "za" },
                1511  { name => "za-CN" },
4172c0482 Alex*1512  { name => "zh", lcid => 0x00007804, ireadinglayout => 2, oemcp => 936, slist => ",", sscripts => "Hani Hans", sabbrevlangname => "CHS", sopentypelang => "ZHS", nativedigits => "0123456789" },
8cdb593f5 Alex*1513  { name => "zh-CN", lcid => 0x00000804, file => "zh_Hans_CN", sparent => "zh-Hans" },
                1514  { name => "zh-CN_phoneb", lcid => 0x00050804, alias => "zh-CN" },
                1515  { name => "zh-CN_stroke", lcid => 0x00020804, alias => "zh-CN" },
                1516  { name => "zh-Hans", lcid => 0x00000004, group => 10 },
                1517  { name => "zh-Hans-CN", alias => "zh-CN" },
                1518  { name => "zh-Hans-CN-u-co-phonebk", alias => "zh-CN_phoneb" },
                1519  { name => "zh-Hans-CN-u-co-stroke", alias => "zh-CN_stroke" },
4172c0482 Alex*1520  { name => "zh-Hans-HK", slist => ";", nativedigits => "" },
                1521  { name => "zh-Hans-MO", slist => ";", nativedigits => "" },
8cdb593f5 Alex*1522  { name => "zh-Hans-SG", alias => "zh-SG" },
                1523  { name => "zh-Hans-SG-u-co-phonebk", alias => "zh-SG_phoneb" },
                1524  { name => "zh-Hans-SG-u-co-stroke", alias => "zh-SG_stroke" },
                1525  { name => "zh-Hant", lcid => 0x00007c04, sortlocale => "zh-HK", ireadinglayout => 2, oemcp => 950, slist => ",", sscripts => "Hani Hant", group => 9, sabbrevlangname => "CHT", sopentypelang => "ZHH" },
                1526  { name => "zh-Hant-HK", alias => "zh-HK" },
                1527  { name => "zh-Hant-HK-u-co-unihan", alias => "zh-HK_radstr" },
                1528  { name => "zh-Hant-MO", alias => "zh-MO" },
                1529  { name => "zh-Hant-MO-u-co-stroke", alias => "zh-MO_stroke" },
                1530  { name => "zh-Hant-MO-u-co-unihan", alias => "zh-MO_radstr" },
                1531  { name => "zh-Hant-TW", alias => "zh-TW" },
                1532  { name => "zh-Hant-TW-u-co-phonetic", alias => "zh-TW_pronun" },
                1533  { name => "zh-Hant-TW-u-co-unihan", alias => "zh-TW_radstr" },
                1534  { name => "zh-HK", lcid => 0x00000c04, file => "zh_Hant_HK", sparent => "zh-Hant", sabbrevlangname => "ZHH" },
                1535  { name => "zh-HK_radstr", lcid => 0x00040c04, alias => "zh-HK" },
                1536  { name => "zh-MO", lcid => 0x00001404, file => "zh_Hant_MO", sparent => "zh-Hant", sabbrevlangname => "ZHM", sopentypelang => "ZHT" },
                1537  { name => "zh-MO_radstr", lcid => 0x00041404, alias => "zh-MO" },
                1538  { name => "zh-MO_stroke", lcid => 0x00021404, alias => "zh-MO" },
                1539  { name => "zh-SG", lcid => 0x00001004, file => "zh_Hans_SG", sparent => "zh-Hans", sabbrevlangname => "ZHI" },
                1540  { name => "zh-SG_phoneb", lcid => 0x00051004, alias => "zh-SG" },
                1541  { name => "zh-SG_stroke", lcid => 0x00021004, alias => "zh-SG" },
                1542  { name => "zh-TW", lcid => 0x00000404, file => "zh_Hant_TW", sparent => "zh-Hant", sopentypelang => "ZHT" },
                1543  { name => "zh-TW_pronun", lcid => 0x00030404, alias => "zh-TW" },
                1544  { name => "zh-TW_radstr", lcid => 0x00040404, alias => "zh-TW" },
                1545  { name => "zu", lcid => 0x00000035, oemcp => 850 },
                1546  { name => "zu-ZA", lcid => 0x00000435 },
                1547 );
                1548 
65fc470df Alex*1549 my @calendars =
                1550 (
                1551  { id => 1, name => "Gregorian", itwodigityearmax => 2049 },
                1552  { id => 2, type => "gregorian", locale => "en-US", itwodigityearmax => 2049 },
                1553  { id => 3, type => "japanese", locale => "ja-JP", eras => [ 232..236 ] },
                1554  { id => 4, type => "roc", locale => "zh-TW", eras => [ 1 ] },
                1555  { id => 5, type => "dangi", locale => "ko-KR", eras => [ 0 ] },
                1556  { id => 6, type => "islamic", locale => "ar-SA", itwodigityearmax => 1451 },
                1557  { id => 7, type => "buddhist", locale => "th-TH", eras => [ 0 ] },
                1558  { id => 8, type => "hebrew", locale => "he-IL", itwodigityearmax => 5810 },
                1559  { id => 9, type => "gregorian", locale => "fr-FR", itwodigityearmax => 2049 },
                1560  { id => 10, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
                1561  { id => 11, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
                1562  { id => 12, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
                1563  { id => 13, name => "Julian", locale => "en-US", itwodigityearmax => 2049 },
                1564  { id => 14, name => "Japanese Lunisolar" },
                1565  { id => 15, name => "Chinese Lunisolar" },
                1566  { id => 16, name => "Saka" },
                1567  { id => 17, name => "Lunar ETO Chinese" },
                1568  { id => 18, name => "Lunar ETO Korean" },
                1569  { id => 19, name => "Lunar ETO Rokuyou" },
                1570  { id => 20, name => "Korean Lunisolar" },
                1571  { id => 21, name => "Taiwan Lunisolar" },
                1572  { id => 22, type => "persian", locale => "prs-AF", itwodigityearmax => 1429 },
                1573  { id => 23, type => "islamic-umalqura", locale => "ar-SA", itwodigityearmax => 1451 },
                1574 );
                1575 
ad02ef7be Alex*1576 my @geoids =
                1577 (
                1578  { id => 2,   name => "AG" },     # Antigua and Barbuda
                1579  { id => 3,   name => "AF" },     # Afghanistan
                1580  { id => 4,   name => "DZ" },     # Algeria
                1581  { id => 5,   name => "AZ" },     # Azerbaijan
                1582  { id => 6,   name => "AL" },     # Albania
                1583  { id => 7,   name => "AM" },     # Armenia
                1584  { id => 8,   name => "AD" },     # Andorra
                1585  { id => 9,   name => "AO" },     # Angola
                1586  { id => 10,  name => "AS" },     # American Samoa
                1587  { id => 11,  name => "AR" },     # Argentina
                1588  { id => 12,  name => "AU" },     # Australia
                1589  { id => 14,  name => "AT" },     # Austria
                1590  { id => 17,  name => "BH" },     # Bahrain
                1591  { id => 18,  name => "BB" },     # Barbados
                1592  { id => 19,  name => "BW" },     # Botswana
                1593  { id => 20,  name => "BM" },     # Bermuda
                1594  { id => 21,  name => "BE" },     # Belgium
                1595  { id => 22,  name => "BS" },     # Bahamas, The
                1596  { id => 23,  name => "BD" },     # Bangladesh
                1597  { id => 24,  name => "BZ" },     # Belize
                1598  { id => 25,  name => "BA" },     # Bosnia and Herzegovina
                1599  { id => 26,  name => "BO" },     # Bolivia
                1600  { id => 27,  name => "MM" },     # Myanmar
                1601  { id => 28,  name => "BJ" },     # Benin
                1602  { id => 29,  name => "BY" },     # Belarus
                1603  { id => 30,  name => "SB" },     # Solomon Islands
                1604  { id => 32,  name => "BR" },     # Brazil
                1605  { id => 34,  name => "BT" },     # Bhutan
                1606  { id => 35,  name => "BG" },     # Bulgaria
                1607  { id => 37,  name => "BN" },     # Brunei
                1608  { id => 38,  name => "BI" },     # Burundi
                1609  { id => 39,  name => "CA" },     # Canada
                1610  { id => 40,  name => "KH" },     # Cambodia
                1611  { id => 41,  name => "TD" },     # Chad
                1612  { id => 42,  name => "LK" },     # Sri Lanka
                1613  { id => 43,  name => "CG" },     # Congo
                1614  { id => 44,  name => "CD" },     # Congo (DRC)
                1615  { id => 45,  name => "CN" },     # China
                1616  { id => 46,  name => "CL" },     # Chile
                1617  { id => 49,  name => "CM" },     # Cameroon
                1618  { id => 50,  name => "KM" },     # Comoros
                1619  { id => 51,  name => "CO" },     # Colombia
                1620  { id => 54,  name => "CR" },     # Costa Rica
                1621  { id => 55,  name => "CF" },     # Central African Republic
                1622  { id => 56,  name => "CU" },     # Cuba
                1623  { id => 57,  name => "CV" },     # Cape Verde
                1624  { id => 59,  name => "CY" },     # Cyprus
                1625  { id => 61,  name => "DK" },     # Denmark
                1626  { id => 62,  name => "DJ" },     # Djibouti
                1627  { id => 63,  name => "DM" },     # Dominica
                1628  { id => 65,  name => "DO" },     # Dominican Republic
                1629  { id => 66,  name => "EC" },     # Ecuador
                1630  { id => 67,  name => "EG" },     # Egypt
                1631  { id => 68,  name => "IE" },     # Ireland
                1632  { id => 69,  name => "GQ" },     # Equatorial Guinea
                1633  { id => 70,  name => "EE" },     # Estonia
                1634  { id => 71,  name => "ER" },     # Eritrea
                1635  { id => 72,  name => "SV" },     # El Salvador
                1636  { id => 73,  name => "ET" },     # Ethiopia
                1637  { id => 75,  name => "CZ" },     # Czech Republic
                1638  { id => 77,  name => "FI" },     # Finland
                1639  { id => 78,  name => "FJ" },     # Fiji Islands
                1640  { id => 80,  name => "FM" },     # Micronesia
                1641  { id => 81,  name => "FO" },     # Faroe Islands
                1642  { id => 84,  name => "FR" },     # France
                1643  { id => 86,  name => "GM" },     # Gambia, The
                1644  { id => 87,  name => "GA" },     # Gabon
                1645  { id => 88,  name => "GE" },     # Georgia
                1646  { id => 89,  name => "GH" },     # Ghana
                1647  { id => 90,  name => "GI" },     # Gibraltar
                1648  { id => 91,  name => "GD" },     # Grenada
                1649  { id => 93,  name => "GL" },     # Greenland
                1650  { id => 94,  name => "DE" },     # Germany
                1651  { id => 98,  name => "GR" },     # Greece
                1652  { id => 99,  name => "GT" },     # Guatemala
                1653  { id => 100, name => "GN" },     # Guinea
                1654  { id => 101, name => "GY" },     # Guyana
                1655  { id => 103, name => "HT" },     # Haiti
                1656  { id => 104, name => "HK" },     # Hong Kong S.A.R.
                1657  { id => 106, name => "HN" },     # Honduras
                1658  { id => 108, name => "HR" },     # Croatia
                1659  { id => 109, name => "HU" },     # Hungary
                1660  { id => 110, name => "IS" },     # Iceland
                1661  { id => 111, name => "ID" },     # Indonesia
                1662  { id => 113, name => "IN" },     # India
                1663  { id => 114, name => "IO" },     # British Indian Ocean Territory
                1664  { id => 116, name => "IR" },     # Iran
                1665  { id => 117, name => "IL" },     # Israel
                1666  { id => 118, name => "IT" },     # Italy
                1667  { id => 119, name => "CI" },     # Côte d'Ivoire
                1668  { id => 121, name => "IQ" },     # Iraq
                1669  { id => 122, name => "JP" },     # Japan
                1670  { id => 124, name => "JM" },     # Jamaica
                1671  { id => 125, name => "SJ" },     # Jan Mayen
                1672  { id => 126, name => "JO" },     # Jordan
                1673  { id => 127, parent => "UM" },   # Johnston Atoll
                1674  { id => 129, name => "KE" },     # Kenya
                1675  { id => 130, name => "KG" },     # Kyrgyzstan
                1676  { id => 131, name => "KP" },     # North Korea
                1677  { id => 133, name => "KI" },     # Kiribati
                1678  { id => 134, name => "KR" },     # Korea
                1679  { id => 136, name => "KW" },     # Kuwait
                1680  { id => 137, name => "KZ" },     # Kazakhstan
                1681  { id => 138, name => "LA" },     # Laos
                1682  { id => 139, name => "LB" },     # Lebanon
                1683  { id => 140, name => "LV" },     # Latvia
                1684  { id => 141, name => "LT" },     # Lithuania
                1685  { id => 142, name => "LR" },     # Liberia
                1686  { id => 143, name => "SK" },     # Slovakia
                1687  { id => 145, name => "LI" },     # Liechtenstein
                1688  { id => 146, name => "LS" },     # Lesotho
                1689  { id => 147, name => "LU" },     # Luxembourg
                1690  { id => 148, name => "LY" },     # Libya
                1691  { id => 149, name => "MG" },     # Madagascar
                1692  { id => 151, name => "MO" },     # Macao S.A.R.
                1693  { id => 152, name => "MD" },     # Moldova
                1694  { id => 154, name => "MN" },     # Mongolia
                1695  { id => 156, name => "MW" },     # Malawi
                1696  { id => 157, name => "ML" },     # Mali
                1697  { id => 158, name => "MC" },     # Monaco
                1698  { id => 159, name => "MA" },     # Morocco
                1699  { id => 160, name => "MU" },     # Mauritius
                1700  { id => 162, name => "MR" },     # Mauritania
                1701  { id => 163, name => "MT" },     # Malta
                1702  { id => 164, name => "OM" },     # Oman
                1703  { id => 165, name => "MV" },     # Maldives
                1704  { id => 166, name => "MX" },     # Mexico
                1705  { id => 167, name => "MY" },     # Malaysia
                1706  { id => 168, name => "MZ" },     # Mozambique
                1707  { id => 173, name => "NE" },     # Niger
                1708  { id => 174, name => "VU" },     # Vanuatu
                1709  { id => 175, name => "NG" },     # Nigeria
                1710  { id => 176, name => "NL" },     # Netherlands
                1711  { id => 177, name => "NO" },     # Norway
                1712  { id => 178, name => "NP" },     # Nepal
                1713  { id => 180, name => "NR" },     # Nauru
                1714  { id => 181, name => "SR" },     # Suriname
                1715  { id => 182, name => "NI" },     # Nicaragua
                1716  { id => 183, name => "NZ" },     # New Zealand
                1717  { id => 184, name => "PS" },     # Palestinian Authority
                1718  { id => 185, name => "PY" },     # Paraguay
                1719  { id => 187, name => "PE" },     # Peru
                1720  { id => 190, name => "PK" },     # Pakistan
                1721  { id => 191, name => "PL" },     # Poland
                1722  { id => 192, name => "PA" },     # Panama
                1723  { id => 193, name => "PT" },     # Portugal
                1724  { id => 194, name => "PG" },     # Papua New Guinea
                1725  { id => 195, name => "PW" },     # Palau
                1726  { id => 196, name => "GW" },     # Guinea-Bissau
                1727  { id => 197, name => "QA" },     # Qatar
                1728  { id => 198, name => "RE" },     # Reunion
                1729  { id => 199, name => "MH" },     # Marshall Islands
                1730  { id => 200, name => "RO" },     # Romania
                1731  { id => 201, name => "PH" },     # Philippines
                1732  { id => 202, name => "PR" },     # Puerto Rico
                1733  { id => 203, name => "RU" },     # Russia
                1734  { id => 204, name => "RW" },     # Rwanda
                1735  { id => 205, name => "SA" },     # Saudi Arabia
                1736  { id => 206, name => "PM" },     # St. Pierre and Miquelon
                1737  { id => 207, name => "KN" },     # St. Kitts and Nevis
                1738  { id => 208, name => "SC" },     # Seychelles
                1739  { id => 209, name => "ZA" },     # South Africa
                1740  { id => 210, name => "SN" },     # Senegal
                1741  { id => 212, name => "SI" },     # Slovenia
                1742  { id => 213, name => "SL" },     # Sierra Leone
                1743  { id => 214, name => "SM" },     # San Marino
                1744  { id => 215, name => "SG" },     # Singapore
                1745  { id => 216, name => "SO" },     # Somalia
                1746  { id => 217, name => "ES" },     # Spain
                1747  { id => 218, name => "LC" },     # St. Lucia
                1748  { id => 219, name => "SD" },     # Sudan
                1749  { id => 220, name => "SJ" },     # Svalbard
                1750  { id => 221, name => "SE" },     # Sweden
                1751  { id => 222, name => "SY" },     # Syria
                1752  { id => 223, name => "CH" },     # Switzerland
                1753  { id => 224, name => "AE" },     # United Arab Emirates
                1754  { id => 225, name => "TT" },     # Trinidad and Tobago
                1755  { id => 227, name => "TH" },     # Thailand
                1756  { id => 228, name => "TJ" },     # Tajikistan
                1757  { id => 231, name => "TO" },     # Tonga
                1758  { id => 232, name => "TG" },     # Togo
                1759  { id => 233, name => "ST" },     # São Tomé and Príncipe
                1760  { id => 234, name => "TN" },     # Tunisia
                1761  { id => 235, name => "TR" },     # Turkey
                1762  { id => 236, name => "TV" },     # Tuvalu
                1763  { id => 237, name => "TW" },     # Taiwan
                1764  { id => 238, name => "TM" },     # Turkmenistan
                1765  { id => 239, name => "TZ" },     # Tanzania
                1766  { id => 240, name => "UG" },     # Uganda
                1767  { id => 241, name => "UA" },     # Ukraine
                1768  { id => 242, name => "GB" },     # United Kingdom
                1769  { id => 244, name => "US" },     # United States
                1770  { id => 245, name => "BF" },     # Burkina Faso
                1771  { id => 246, name => "UY" },     # Uruguay
                1772  { id => 247, name => "UZ" },     # Uzbekistan
                1773  { id => 248, name => "VC" },     # St. Vincent and the Grenadines
                1774  { id => 249, name => "VE" },     # Bolivarian Republic of Venezuela
                1775  { id => 251, name => "VN" },     # Vietnam
                1776  { id => 252, name => "VI" },     # Virgin Islands
                1777  { id => 253, name => "VA" },     # Vatican City
                1778  { id => 254, name => "NA" },     # Namibia
                1779  { id => 257, name => "EH" },     # Western Sahara (disputed)
                1780  { id => 258, parent => "UM" },   # Wake Island
                1781  { id => 259, name => "WS" },     # Samoa
                1782  { id => 260, name => "SZ" },     # Swaziland
                1783  { id => 261, name => "YE" },     # Yemen
                1784  { id => 263, name => "ZM" },     # Zambia
                1785  { id => 264, name => "ZW" },     # Zimbabwe
                1786  { id => 269, name => "CS" },     # Serbia and Montenegro (Former)
                1787  { id => 270, name => "ME" },     # Montenegro
                1788  { id => 271, name => "RS" },     # Serbia
                1789  { id => 273, name => "CW" },     # Curaçao
                1790  { id => 276, name => "SS" },     # South Sudan
                1791  { id => 300, name => "AI" },     # Anguilla
                1792  { id => 301, name => "AQ" },     # Antarctica
                1793  { id => 302, name => "AW" },     # Aruba
                1794  { id => 303, parent => "SH" },   # Ascension Island
                1795  { id => 304, parent => "053" },  # Ashmore and Cartier Islands
                1796  { id => 305, parent => "UM" },   # Baker Island
                1797  { id => 306, name => "BV" },     # Bouvet Island
                1798  { id => 307, name => "KY" },     # Cayman Islands
                1799  { id => 308, name => "830", parent => "155" }, # Channel Islands
                1800  { id => 309, name => "CX" },     # Christmas Island
                1801  { id => 310, parent => "009" },  # Clipperton Island
                1802  { id => 311, name => "CC" },     # Cocos (Keeling) Islands
                1803  { id => 312, name => "CK" },     # Cook Islands
                1804  { id => 313, parent => "053" },  # Coral Sea Islands
                1805  { id => 314, parent => "IO" },   # Diego Garcia
                1806  { id => 315, name => "FK" },     # Falkland Islands (Islas Malvinas)
                1807  { id => 317, name => "GF" },     # French Guiana
                1808  { id => 318, name => "PF" },     # French Polynesia
                1809  { id => 319, name => "TF" },     # French Southern and Antarctic Lands
                1810  { id => 321, name => "GP" },     # Guadeloupe
                1811  { id => 322, name => "GU" },     # Guam
                1812  { id => 323 },                   # Guantanamo Bay
                1813  { id => 324, name => "GG" },     # Guernsey
                1814  { id => 325, name => "HM" },     # Heard Island and McDonald Islands
                1815  { id => 326, parent => "UM" },   # Howland Island
                1816  { id => 327, parent => "UM" },   # Jarvis Island
                1817  { id => 328, name => "JE" },     # Jersey
                1818  { id => 329, parent => "UM" },   # Kingman Reef
                1819  { id => 330, name => "MQ" },     # Martinique
                1820  { id => 331, name => "YT" },     # Mayotte
                1821  { id => 332, name => "MS" },     # Montserrat
                1822  { id => 333, name => "AN", region => 1 }, # Netherlands Antilles (Former)
                1823  { id => 334, name => "NC" },     # New Caledonia
                1824  { id => 335, name => "NU" },     # Niue
                1825  { id => 336, name => "NF" },     # Norfolk Island
                1826  { id => 337, name => "MP" },     # Northern Mariana Islands
                1827  { id => 338, parent => "UM" },   # Palmyra Atoll
                1828  { id => 339, name => "PN" },     # Pitcairn Islands
                1829  { id => 340, parent => "MP" },   # Rota Island
                1830  { id => 341, parent => "MP" },   # Saipan
                1831  { id => 342, name => "GS" },     # South Georgia and the South Sandwich Islands
                1832  { id => 343, name => "SH" },     # St. Helena
                1833  { id => 346, parent => "MP" },   # Tinian Island
                1834  { id => 347, name => "TK" },     # Tokelau
                1835  { id => 348, parent => "SH" },   # Tristan da Cunha
                1836  { id => 349, name => "TC" },     # Turks and Caicos Islands
                1837  { id => 351, name => "VG" },     # Virgin Islands, British
                1838  { id => 352, name => "WF" },     # Wallis and Futuna
                1839  { id => 742, name => "002" },    # Africa
                1840  { id => 2129, name => "142" },   # Asia
                1841  { id => 10541, name => "150" },  # Europe
                1842  { id => 15126, name => "IM" },   # Man, Isle of
                1843  { id => 19618, name => "MK" },   # Macedonia, Former Yugoslav Republic of
                1844  { id => 20900, name => "054" },  # Melanesia
                1845  { id => 21206, name => "057" },  # Micronesia
                1846  { id => 21242, parent => "UM" }, # Midway Islands
                1847  { id => 23581, name => "021" },  # Northern America
                1848  { id => 26286, name => "061" },  # Polynesia
                1849  { id => 27082, name => "013" },  # Central America
                1850  { id => 27114, name => "009" },  # Oceania
                1851  { id => 30967, name => "SX" },   # Sint Maarten (Dutch part)
                1852  { id => 31396, name => "005" },  # South America
                1853  { id => 31706, name => "MF" },   # Saint Martin (French part)
                1854  { id => 39070, name => "001" },  # World
                1855  { id => 42483, name => "011" },  # Western Africa
                1856  { id => 42484, name => "017" },  # Middle Africa
                1857  { id => 42487, name => "015" },  # Northern Africa
                1858  { id => 47590, name => "143" },  # Central Asia
                1859  { id => 47599, name => "035" },  # South-Eastern Asia
                1860  { id => 47600, name => "030" },  # Eastern Asia
                1861  { id => 47603, name => "014" },  # Eastern Africa
                1862  { id => 47609, name => "151" },  # Eastern Europe
                1863  { id => 47610, name => "039" },  # Southern Europe
                1864  { id => 47611, name => "145" },  # Middle East
                1865  { id => 47614, name => "034" },  # Southern Asia
                1866  { id => 7299303, name => "TL" }, # Democratic Republic of Timor-Leste
                1867  { id => 9914689, name => "XK" }, # Kosovo
                1868  { id => 10026358, name => "019" }, # Americas
                1869  { id => 10028789, name => "AX" },  # Åland Islands
                1870  { id => 10039880, name => "029", sintlsymbol => "XCD" }, # Caribbean
                1871  { id => 10039882, name => "154" }, # Northern Europe
                1872  { id => 10039883, name => "018" }, # Southern Africa
                1873  { id => 10210824, name => "155" }, # Western Europe
                1874  { id => 10210825, name => "053" }, # Australia and New Zealand
                1875  { id => 161832015, name => "BL" }, # Saint Barthélemy
                1876  { id => 161832256, name => "UM" }, # U.S. Minor Outlying Islands
                1877  { id => 161832257, name => "419", parent => "019" }, # Latin America and the Caribbean
                1878  { id => 161832258, name => "BQ" },   # Bonaire, Sint Eustatius and Saba
                1879 );
                1880 
dc727fa7b Alex*1881 my @cp2uni = ();
09d97e968 Alex*1882 my @glyph2uni = ();
dc727fa7b Alex*1883 my @lead_bytes = ();
                1884 my @uni2cp = ();
                1885 my @tolower_table = ();
                1886 my @toupper_table = ();
                1887 my @digitmap_table = ();
9e6d0e459 Alex*1888 my @halfwidth_table = ();
                1889 my @fullwidth_table = ();
                1890 my @cjk_compat_table = ();
                1891 my @chinese_traditional_table = ();
                1892 my @chinese_simplified_table = ();
575a97961 Alex*1893 my @category_table = ();
bea9c706e Niko*1894 my @initial_joining_table = ();
dc727fa7b Alex*1895 my @direction_table = ();
                1896 my @decomp_table = ();
dc919db2b Huw *1897 my @combining_class_table = ();
a646e4e6f Alex*1898 my @decomp_compat_table = ();
2b9d720ea Alex*1899 my @comp_exclusions = ();
f9f3e57cf Alex*1900 my @idna_decomp_table = ();
                1901 my @idna_disallowed = ();
712839d58 Alex*1902 my %registry_keys;
f54c2f65b Alex*1903 my $default_char;
                1904 my $default_wchar;
fb270ddca Alex*1905 
a5fe68870 Alex*1906 my %joining_forms =
                1907 (
                1908    "isolated" => [],
                1909    "final" => [],
                1910    "initial" => [],
                1911    "medial" => []
                1912 );
fb270ddca Alex*1913 
cfaa28933 Alex*1914 my $current_data_file;
                1915 
ce41b3211 Alex*1916 sub to_utf16(@)
0576fa43c Alex*1917 {
ce41b3211 Alex*1918     my @ret;
                1919     foreach my $ch (@_)
                1920     {
                1921         if ($ch < 0x10000)
                1922         {
                1923             push @ret, $ch;
                1924         }
                1925         else
                1926         {
                1927             my $val = $ch - 0x10000;
                1928             push @ret, 0xd800 | ($val >> 10), 0xdc00 | ($val & 0x3ff);
                1929         }
                1930     }
                1931     return @ret;
0576fa43c Alex*1932 }
                1933 
65a82cb18 Alex*1934 ################################################################
                1935 # fetch a unicode.org file and open it
cfaa28933 Alex*1936 sub open_data_file($@)
65a82cb18 Alex*1937 {
cfaa28933 Alex*1938     my ($id, $name) = @_;
                1939     my $data = $data_files{$id};
ec19bbf43 Alex*1940     my $cache = ($ENV{XDG_CACHE_HOME} || "$ENV{HOME}/.cache") . "/wine";
65a82cb18 Alex*1941     local *FILE;
eb7d7ef86 Alex*1942 
cfaa28933 Alex*1943     my $url = $data->{url};
                1944     my $filename = "$cache/" . ($data->{name} || ($url =~ s/.*\/([^\/]+)$/$1/r));
                1945     unless (-f $filename)
65a82cb18 Alex*1946     {
cfaa28933 Alex*1947         print "Fetching $url...\n";
                1948         system "mkdir", "-p", $cache;
                1949         !system "wget", "-q", "-O", $filename, $url or die "cannot fetch $url";
eb7d7ef86 Alex*1950     }
cfaa28933 Alex*1951 
3ccb5f238 Alex*1952     my $sha = Digest::SHA->new( "sha256" )->addfile( $filename )->hexdigest;
                1953     die "invalid checksum $sha for $filename" unless $sha eq $data->{sha};
                1954 
cfaa28933 Alex*1955     if ($filename =~ /\.zip$/)
3ec7c467c Alex*1956     {
cfaa28933 Alex*1957         open FILE, "-|", "unzip", "-p", $filename, $name or die "cannot extract $name from $filename";
                1958     }
                1959     elsif ($filename =~ /\.tar\.gz$/)
                1960     {
                1961         open FILE, "-|", "tar", "-x", "-f", $filename, "-O", $name or die "cannot extract $name from $filename";
3ec7c467c Alex*1962     }
eb7d7ef86 Alex*1963     else
                1964     {
cfaa28933 Alex*1965         open FILE, "<$filename" or die "cannot open $filename";
65a82cb18 Alex*1966     }
cfaa28933 Alex*1967     $current_data_file = $name ? "$url:$name" : $url;
65a82cb18 Alex*1968     return *FILE;
                1969 }
                1970 
8cdb593f5 Alex*1971 ################################################################
                1972 # load a unicode.org file as XML data
cfaa28933 Alex*1973 sub load_xml_data_file($@)
8cdb593f5 Alex*1974 {
cfaa28933 Alex*1975     my ($id, $name) = @_;
                1976     my $FILE = open_data_file( $id, $name );
8cdb593f5 Alex*1977     my $xml = XML::LibXML->load_xml( IO => $FILE );
                1978     close FILE;
                1979     return $xml;
                1980 }
                1981 
fb270ddca Alex*1982 ################################################################
a646e4e6f Alex*1983 # recursively get the decomposition for a character
                1984 sub get_decomposition($$);
                1985 sub get_decomposition($$)
                1986 {
                1987     my ($char, $table) = @_;
                1988     my @ret;
                1989 
ce41b3211 Alex*1990     return $char unless defined $table->[$char];
                1991     foreach my $ch (@{$table->[$char]})
a646e4e6f Alex*1992     {
                1993         push @ret, get_decomposition( $ch, $table );
                1994     }
                1995     return @ret;
                1996 }
                1997 
ce41b3211 Alex*1998 ################################################################
                1999 # get the composition that results in a given character
                2000 sub get_composition($$)
                2001 {
                2002     my ($ch, $compat) = @_;
                2003     return () unless defined $decomp_table[$ch];  # no decomposition
                2004     my @ret = @{$decomp_table[$ch]};
                2005     return () if @ret < 2;                        # singleton decomposition
2b9d720ea Alex*2006     return () if $comp_exclusions[$ch];           # composition exclusion
30f298f5d Alex*2007     return () if $combining_class_table[$ch];     # non-starter
                2008     return () if $combining_class_table[$ret[0]]; # first char is non-starter
f9f3e57cf Alex*2009     return () if $compat == 1 && !defined $decomp_table[$ret[0]] &&
                2010         defined $decomp_compat_table[$ret[0]];    # first char has compat decomposition
                2011     return () if $compat == 2 && !defined $decomp_table[$ret[0]] &&
                2012         defined $idna_decomp_table[$ret[0]];      # first char has IDNA decomposition
                2013     return () if $compat == 2 && defined $idna_decomp_table[$ret[0]] &&
                2014         defined $idna_decomp_table[$idna_decomp_table[$ret[0]]->[0]];  # first char's decomposition has IDNA decomposition
                2015     return () if $compat == 2 && defined $idna_decomp_table[$ret[1]];  # second char has IDNA decomposition
ce41b3211 Alex*2016     return @ret;
                2017 }
                2018 
a646e4e6f Alex*2019 ################################################################
                2020 # recursively build decompositions
                2021 sub build_decompositions(@)
                2022 {
                2023     my @src = @_;
                2024     my @dst;
                2025 
c65873197 Alex*2026     for (my $i = 0; $i < @src; $i++)
a646e4e6f Alex*2027     {
                2028         next unless defined $src[$i];
ce41b3211 Alex*2029         my @decomp = to_utf16( get_decomposition( $i, \@src ));
a646e4e6f Alex*2030         $dst[$i] = \@decomp;
                2031     }
                2032     return @dst;
                2033 }
                2034 
f9f3e57cf Alex*2035 ################################################################
                2036 # compose Hangul sequences
                2037 sub compose_hangul(@)
                2038 {
                2039     my $SBASE  = 0xac00;
                2040     my $LBASE  = 0x1100;
                2041     my $VBASE  = 0x1161;
                2042     my $TBASE  = 0x11a7;
                2043     my $LCOUNT = 19;
                2044     my $VCOUNT = 21;
                2045     my $TCOUNT = 28;
                2046     my $NCOUNT = $VCOUNT * $TCOUNT;
                2047     my $SCOUNT = $LCOUNT * $NCOUNT;
                2048 
                2049     my @seq = @_;
                2050     my @ret;
                2051     my $i;
                2052 
                2053     for ($i = 0; $i < @seq; $i++)
                2054     {
                2055         my $ch = $seq[$i];
                2056         if ($ch >= $LBASE && $ch < $LBASE + $LCOUNT && $i < @seq - 1 &&
                2057             $seq[$i+1] >= $VBASE && $seq[$i+1] < $VBASE + $VCOUNT)
                2058         {
                2059             $ch = $SBASE + (($seq[$i] - $LBASE) * $VCOUNT + ($seq[$i+1] - $VBASE)) * $TCOUNT;
                2060             $i++;
                2061         }
                2062         if ($ch >= $SBASE && $ch < $SBASE + $SCOUNT && !(($ch - $SBASE) % $TCOUNT) && $i < @seq - 1 &&
                2063             $seq[$i+1] > $TBASE && $seq[$i+1] < $TBASE + $TCOUNT)
                2064         {
                2065             $ch += $seq[$i+1] - $TBASE;
                2066             $i++;
                2067         }
                2068         push @ret, $ch;
                2069     }
                2070     return @ret;
                2071 }
                2072 
57a6033c0 Alex*2073 ################################################################
                2074 # remove linguistic-only mappings from the case table
                2075 sub remove_linguistic_mappings($$)
                2076 {
                2077     my ($upper, $lower) = @_;
                2078 
                2079     # remove case mappings that don't round-trip
                2080 
                2081     for (my $i = 0; $i < @{$upper}; $i++)
                2082     {
                2083         next unless defined ${$upper}[$i];
                2084         my $ch = ${$upper}[$i];
                2085         ${$upper}[$i] = undef unless defined ${$lower}[$ch] && ${$lower}[$ch] == $i;
                2086     }
                2087     for (my $i = 0; $i < @{$lower}; $i++)
                2088     {
                2089         next unless defined ${$lower}[$i];
                2090         my $ch = ${$lower}[$i];
                2091         ${$lower}[$i] = undef unless defined ${$upper}[$ch] && ${$upper}[$ch] == $i;
                2092     }
                2093 }
                2094 
a646e4e6f Alex*2095 ################################################################
                2096 # read in the Unicode database files
cb524e09a Alex*2097 sub load_data()
fb270ddca Alex*2098 {
dc727fa7b Alex*2099     my $start;
fb270ddca Alex*2100 
                2101     # now build mappings from the decomposition field of the Unicode database
                2102 
cfaa28933 Alex*2103     my $UNICODE_DATA = open_data_file( "ucd", "UnicodeData.txt" );
65a82cb18 Alex*2104     while (<$UNICODE_DATA>)
fb270ddca Alex*2105     {
                2106     # Decode the fields ...
dc727fa7b Alex*2107     my ($code, $name, $cat, $comb, $bidi,
                2108             $decomp, $dec, $dig, $num, $mirror,
                2109             $oldname, $comment, $upper, $lower, $title) = split /;/;
c97bb4c5d Alex*2110         my $src = hex $code;
261abcd5b Alex*2111 
c97bb4c5d Alex*2112         die "unknown category $cat" unless defined $categories{$cat};
                2113         die "unknown directionality $bidi" unless defined $directions{$bidi};
                2114 
                2115         $category_table[$src] = $categories{$cat};
f09dade9c Alex*2116         $direction_table[$src] = $bidi;
9c11fc642 Niko*2117         if ($cat eq "Mn" || $cat eq "Me" || $cat eq "Cf")
                2118         {
bea9c706e Niko*2119             $initial_joining_table[$src] = $joining_types{"T"};
9c11fc642 Niko*2120         }
                2121         else
                2122         {
bea9c706e Niko*2123             $initial_joining_table[$src] = $joining_types{"U"};
9c11fc642 Niko*2124         }
c97bb4c5d Alex*2125 
                2126         if ($lower ne "")
                2127         {
                2128             $tolower_table[$src] = hex $lower;
                2129         }
                2130         if ($upper ne "")
                2131         {
                2132             $toupper_table[$src] = hex $upper;
                2133         }
                2134         if ($dec ne "")
                2135         {
                2136             $category_table[$src] |= $ctype{"digit"};
                2137         }
a02ce8108 Jon *2138         if ($dig ne "")
                2139         {
                2140             $digitmap_table[$src] = ord $dig;
                2141         }
f9f3e57cf Alex*2142         $combining_class_table[$src] = ($cat ne "Co") ? $comb : 0x100; # Private Use
fb270ddca Alex*2143 
d87d4a4a0 Alex*2144         $category_table[$src] |= $ctype{"nonspacing"}    if $bidi eq "NSM";
                2145         $category_table[$src] |= $ctype{"diacritic"}     if $name =~ /^(COMBINING)|(MODIFIER LETTER)\W/;
                2146         $category_table[$src] |= $ctype{"vowelmark"}     if $name =~ /\sVOWEL/ || $oldname =~ /\sVOWEL/;
                2147         $category_table[$src] |= $ctype{"halfwidth"}     if $name =~ /^HALFWIDTH\s/;
                2148         $category_table[$src] |= $ctype{"fullwidth"}     if $name =~ /^FULLWIDTH\s/;
                2149         $category_table[$src] |= $ctype{"hiragana"}      if $name =~ /(HIRAGANA)|(\WKANA\W)/;
                2150         $category_table[$src] |= $ctype{"katakana"}      if $name =~ /(KATAKANA)|(\WKANA\W)/;
                2151         $category_table[$src] |= $ctype{"ideograph"}     if $name =~ /^<CJK Ideograph/;
                2152         $category_table[$src] |= $ctype{"ideograph"}     if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
                2153         $category_table[$src] |= $ctype{"ideograph"}     if $name =~ /^HANGZHOU/;
                2154         $category_table[$src] |= $ctype{"highsurrogate"} if $name =~ /High Surrogate/;
                2155         $category_table[$src] |= $ctype{"lowsurrogate"}  if $name =~ /Low Surrogate/;
                2156 
c97bb4c5d Alex*2157         # copy the category and direction for everything between First/Last pairs
                2158         if ($name =~ /, First>/) { $start = $src; }
                2159         if ($name =~ /, Last>/)
                2160         {
                2161             while ($start < $src)
                2162             {
                2163                 $category_table[$start] = $category_table[$src];
                2164                 $direction_table[$start] = $direction_table[$src];
f9f3e57cf Alex*2165                 $combining_class_table[$start] = $combining_class_table[$src];
c97bb4c5d Alex*2166                 $start++;
                2167             }
                2168         }
7cae558bd Alex*2169 
c97bb4c5d Alex*2170         next if $decomp eq "";  # no decomposition, skip it
fb270ddca Alex*2171 
eceb69e17 Huw *2172         if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)/)
                2173         {
a646e4e6f Alex*2174             my @seq = map { hex $_; } (split /\s+/, (split /\s+/, $decomp, 2)[1]);
                2175             $decomp_compat_table[$src] = \@seq;
eceb69e17 Huw *2176         }
                2177 
b956620d8 Alex*2178         if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
fb270ddca Alex*2179         {
                2180             # decomposition of the form "<foo> 1234" -> use char if type is known
b956620d8 Alex*2181             my $dst = hex $2;
                2182             if ($1 eq "narrow")
                2183             {
                2184                 $halfwidth_table[$dst] = $src;
                2185                 $fullwidth_table[$src] = $dst;
                2186             }
                2187             elsif ($1 eq "wide")
a5fe68870 Alex*2188             {
b956620d8 Alex*2189                 next if $dst == 0x5c; # don't remap backslash
                2190                 $fullwidth_table[$dst] = $src;
                2191                 $halfwidth_table[$src] = $dst;
                2192             }
                2193             elsif ($1 eq "font" || $1 eq "square" || $1 eq "circle")
                2194             {
                2195                 $fullwidth_table[$src] = $dst if $src >= 0x10000;
                2196             }
                2197             elsif ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial")
                2198             {
                2199                 ${joining_forms{$1}}[$dst] = $src;
a5fe68870 Alex*2200             }
fb270ddca Alex*2201         }
                2202         elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
                2203         {
                2204             # decomposition "<compat> 0020 1234" -> combining accent
                2205         }
                2206         elsif ($decomp =~ /^([0-9a-fA-F]+)/)
                2207         {
d87d4a4a0 Alex*2208             # store decomposition
e709cdbae Alex*2209             if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
                2210             {
a646e4e6f Alex*2211                 $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ];
e709cdbae Alex*2212             }
db3ae2ca9 Huw *2213             elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
a02ce8108 Jon *2214             {
9e6d0e459 Alex*2215                 my $dst = hex $1;
db3ae2ca9 Huw *2216                 # Single char decomposition
9e6d0e459 Alex*2217                 $decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ];
b956620d8 Alex*2218                 if ($name =~ /^CJK COMPATIBILITY IDEOGRAPH/)
                2219                 {
                2220                     $cjk_compat_table[$src] = $dst;
                2221                     $fullwidth_table[$src] = $dst if $src >= 0x10000;
                2222                 }
a02ce8108 Jon *2223             }
fb270ddca Alex*2224         }
                2225     }
65a82cb18 Alex*2226     close $UNICODE_DATA;
c97bb4c5d Alex*2227 
                2228     # patch the category of some special characters
                2229 
d87d4a4a0 Alex*2230     for (my $i = 0; $i < @decomp_table; $i++)
                2231     {
                2232         next unless defined $decomp_table[$i];
                2233         $category_table[$i] |= $category_table[$decomp_table[$i]->[0]];
                2234     }
dc727fa7b Alex*2235     foreach my $cat (keys %special_categories)
c97bb4c5d Alex*2236     {
                2237         my $flag = $ctype{$cat};
dc727fa7b Alex*2238         foreach my $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; }
c97bb4c5d Alex*2239     }
d87d4a4a0 Alex*2240     for (my $i = 0; $i < @decomp_compat_table; $i++)
                2241     {
                2242         next unless defined $decomp_compat_table[$i];
                2243         next unless @{$decomp_compat_table[$i]} == 2;
                2244         $category_table[$i] |= $category_table[$decomp_compat_table[$i]->[1]] & $ctype{"diacritic"};
                2245     }
4b5c67ee0 Alex*2246 
2b9d720ea Alex*2247     # load the composition exclusions
                2248 
cfaa28933 Alex*2249     my $EXCL = open_data_file( "ucd", "CompositionExclusions.txt" );
2b9d720ea Alex*2250     while (<$EXCL>)
                2251     {
                2252         s/\#.*//;  # remove comments
                2253         if (/^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s*$/)
                2254         {
                2255             foreach my $i (hex $1 .. hex $2) { $comp_exclusions[$i] = 1; }
                2256         }
                2257         elsif (/^([0-9a-fA-F]+)\s*$/)
                2258         {
                2259             $comp_exclusions[hex $1] = 1;
                2260         }
                2261     }
                2262     close $EXCL;
f9f3e57cf Alex*2263 
                2264     # load the IDNA mappings
                2265 
                2266     @idna_decomp_table = @decomp_compat_table;
cfaa28933 Alex*2267     my $IDNA = open_data_file( "idna", "IdnaMappingTable.txt" );
f9f3e57cf Alex*2268     while (<$IDNA>)
                2269     {
                2270         s/\#.*//;  # remove comments
                2271         next if /^\s*$/;
                2272         my ($char, $type, $mapping) = split /;/;
                2273         my ($ch1, $ch2);
                2274         if ($char =~ /([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)/)
                2275         {
                2276             $ch1 = hex $1;
                2277             $ch2 = hex $2;
                2278         }
                2279         elsif ($char =~ /([0-9a-fA-F]+)/)
                2280         {
                2281             $ch1 = $ch2 = hex $1;
                2282         }
                2283 
                2284         if ($type =~ /mapped/ || $type =~ /deviation/)
                2285         {
                2286             $mapping =~ s/^\s*(([0-9a-fA-F]+\s+)+)\s*$/$1/;
                2287             my @seq = map { hex $_; } split /\s+/, $mapping;
                2288             foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = @seq ? \@seq : [ 0 ]; }
                2289         }
                2290         elsif ($type =~ /valid/)
                2291         {
                2292         }
                2293         elsif ($type =~ /ignored/)
                2294         {
                2295             foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = [ 0 ]; }
                2296         }
                2297         elsif ($type =~ /disallowed/)
                2298         {
                2299             foreach my $i ($ch1 .. $ch2)
                2300             {
                2301                 $idna_decomp_table[$i] = undef;
                2302                 $idna_disallowed[$i] = 1;
                2303             }
                2304         }
                2305     }
                2306     close $IDNA;
9e6d0e459 Alex*2307 
                2308     # load the Unihan mappings
                2309 
cfaa28933 Alex*2310     my $UNIHAN = open_data_file( "unihan", "Unihan_Variants.txt" );
9e6d0e459 Alex*2311     while (<$UNIHAN>)
                2312     {
                2313         s/\#.*//;  # remove comments
                2314         next if /^\s*$/;
b956620d8 Alex*2315         if (/^U\+([0-9a-fA-F]{4})\s+kTraditionalVariant\s+U\+([0-9a-fA-F]{4})$/)
9e6d0e459 Alex*2316         {
b956620d8 Alex*2317             next if hex $1 < 0x4dc0;  # skip extension A
9e6d0e459 Alex*2318             $chinese_traditional_table[hex $1] = hex $2;
                2319         }
b956620d8 Alex*2320         elsif (/^U\+([0-9a-fA-F]{4})\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]{4})$/)
9e6d0e459 Alex*2321         {
b956620d8 Alex*2322             next if hex $1 < 0x4dc0;  # skip extension A
9e6d0e459 Alex*2323             $chinese_simplified_table[hex $1] = hex $2;
                2324         }
                2325     }
                2326     close $UNIHAN;
b956620d8 Alex*2327     foreach my $i (0xf900..0xfaff)
                2328     {
                2329         next unless defined $cjk_compat_table[$i];
                2330         next if defined $chinese_simplified_table[$cjk_compat_table[$i]];
                2331         $chinese_simplified_table[$i] = $cjk_compat_table[$i];
                2332     }
fb270ddca Alex*2333 }
                2334 
                2335 
712839d58 Alex*2336 ################################################################
                2337 # add a new registry key
630f605c2 Alex*2338 sub add_registry_key($$$)
                2339 {
                2340     my ($base, $key, $defval) = @_;
                2341     $registry_keys{"$base\\$key"} = [ $defval ] unless defined $registry_keys{"$base\\$key"};
                2342 }
                2343 
                2344 ################################################################
                2345 # add a new registry value with explicit type
                2346 sub add_registry_value($$$$)
712839d58 Alex*2347 {
630f605c2 Alex*2348     my ($base, $key, $name, $value) = @_;
                2349     add_registry_key( $base, $key, undef );
                2350     push @{$registry_keys{"$base\\$key"}}, "'$name' = $value";
712839d58 Alex*2351 }
                2352 
                2353 ################################################################
630f605c2 Alex*2354 # add a new registry string value
                2355 sub add_registry_string_value($$$$)
712839d58 Alex*2356 {
630f605c2 Alex*2357     my ($base, $key, $name, $value) = @_;
                2358     $value =~ s/\'/\'\'/g;
                2359     add_registry_value( $base, $key, $name, "s '$value'" );
                2360 }
                2361 
                2362 ################################################################
                2363 # add a new registry dword value
                2364 sub add_registry_dword_value($$$$)
                2365 {
                2366     my ($base, $key, $name, $value) = @_;
                2367     add_registry_value( $base, $key, $name, "d $value" );
                2368 }
                2369 
                2370 ################################################################
                2371 # add a new registry binary value
                2372 sub add_registry_binary_value($$$$)
                2373 {
                2374     my ($base, $key, $name, $value) = @_;
                2375     add_registry_value( $base, $key, $name, "b " . join "", map { sprintf "%02x", $_; } unpack( "C*", $value ));
712839d58 Alex*2376 }
                2377 
fb270ddca Alex*2378 ################################################################
f54c2f65b Alex*2379 # define a new lead byte
                2380 sub add_lead_byte($)
                2381 {
                2382     my $ch = shift;
a9285c513 Alex*2383     return if defined $cp2uni[$ch];
f54c2f65b Alex*2384     push @lead_bytes, $ch;
                2385     $cp2uni[$ch] = 0;
                2386 }
                2387 
                2388 ################################################################
                2389 # define a new char mapping
                2390 sub add_mapping($$)
                2391 {
                2392     my ($cp, $uni) = @_;
                2393     $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
                2394     $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
a9285c513 Alex*2395     if ($cp > 0xff) { add_lead_byte( $cp >> 8 ); }
f54c2f65b Alex*2396 }
                2397 
e6df00aa4 Alex*2398 ################################################################
                2399 # get a mapping including glyph chars for MB_USEGLYPHCHARS
                2400 sub get_glyphs_mapping(@)
                2401 {
09d97e968 Alex*2402     my @table = @_;
                2403 
                2404     for (my $i = 0; $i < @glyph2uni; $i++)
                2405     {
                2406         $table[$i] = $glyph2uni[$i] if defined $glyph2uni[$i];
                2407     }
                2408     return @table;
e6df00aa4 Alex*2409 }
494c42d01 Alex*2410 
141966b05 Alex*2411 ################################################################
d94b605a8 Alex*2412 # build EUC-JP table from the JIS 0208/0212 files
5b4bdb9fd Alex*2413 sub dump_eucjp_codepage()
141966b05 Alex*2414 {
5b4bdb9fd Alex*2415     @cp2uni = ();
                2416     @glyph2uni = ();
                2417     @lead_bytes = ();
                2418     @uni2cp = ();
d94b605a8 Alex*2419     $default_char = $DEF_CHAR;
                2420     $default_wchar = 0x30fb;
141966b05 Alex*2421 
                2422     # ASCII chars
d94b605a8 Alex*2423     foreach my $i (0x00 .. 0x7f) { add_mapping( $i, $i ); }
                2424 
                2425     # lead bytes
                2426     foreach my $i (0x8e, 0xa1 .. 0xfe) { add_lead_byte($i); }
141966b05 Alex*2427 
                2428     # JIS X 0201 right plane
d94b605a8 Alex*2429     foreach my $i (0xa1 .. 0xdf) { add_mapping( 0x8e00 + $i, 0xfec0 + $i ); }
141966b05 Alex*2430 
d94b605a8 Alex*2431     # undefined chars
                2432     foreach my $i (0x80 .. 0x8d, 0x8f .. 0x9f) { $cp2uni[$i] = $i; }
                2433     $cp2uni[0xa0] = 0xf8f0;
                2434     $cp2uni[0xff] = 0xf8f3;
                2435 
                2436     # Fix backslash conversion
                2437     add_mapping( 0xa1c0, 0xff3c );
                2438 
                2439     # Add private mappings for rows undefined in JIS 0208/0212
                2440     my $private = 0xe000;
                2441     foreach my $hi (0xf5 .. 0xfe)
141966b05 Alex*2442     {
d94b605a8 Alex*2443         foreach my $lo (0xa1 .. 0xfe)
                2444         {
                2445             add_mapping( ($hi << 8) + $lo, $private++ );
                2446         }
141966b05 Alex*2447     }
d94b605a8 Alex*2448     foreach my $hi (0xf5 .. 0xfe)
141966b05 Alex*2449     {
d94b605a8 Alex*2450         foreach my $lo (0x21 .. 0x7e)
                2451         {
                2452             add_mapping( ($hi << 8) + $lo, $private++ );
                2453         }
141966b05 Alex*2454     }
                2455 
cfaa28933 Alex*2456     my $INPUT = open_data_file( "jis0208" );
65a82cb18 Alex*2457     while (<$INPUT>)
141966b05 Alex*2458     {
                2459         next if /^\#/;  # skip comments
                2460         next if /^$/;  # skip empty lines
                2461         next if /\x1a/;  # skip ^Z
                2462         if (/^0x[0-9a-fA-F]+\s+0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
                2463         {
d94b605a8 Alex*2464             add_mapping( 0x8080 + hex $1, hex $2 );
                2465             next;
                2466         }
5b4bdb9fd Alex*2467         die "Unrecognized line $_\n";
d94b605a8 Alex*2468     }
                2469     close $INPUT;
                2470 
cfaa28933 Alex*2471     $INPUT = open_data_file( "jis0212" );
d94b605a8 Alex*2472     while (<$INPUT>)
                2473     {
                2474         next if /^\#/;  # skip comments
                2475         next if /^$/;  # skip empty lines
                2476         next if /\x1a/;  # skip ^Z
                2477         if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
                2478         {
                2479             add_mapping( 0x8000 + hex $1, hex $2 );
141966b05 Alex*2480             next;
                2481         }
5b4bdb9fd Alex*2482         die "Unrecognized line $_\n";
141966b05 Alex*2483     }
65a82cb18 Alex*2484     close $INPUT;
09d97e968 Alex*2485 
5b4bdb9fd Alex*2486     output_codepage_file( 20932 );
141966b05 Alex*2487 }
                2488 
fd3a64020 Sang*2489 ################################################################
                2490 # build Korean Wansung table from the KSX1001 file
6f6854369 Alex*2491 sub dump_krwansung_codepage(@)
fd3a64020 Sang*2492 {
6f6854369 Alex*2493     my @cp949 = @_;
fd3a64020 Sang*2494     @cp2uni = ();
                2495     @glyph2uni = ();
                2496     @lead_bytes = ();
                2497     @uni2cp = ();
                2498     $default_char = 0x3f;
                2499     $default_wchar = 0x003f;
                2500 
                2501     # ASCII and undefined chars
                2502     foreach my $i (0x00 .. 0x9f) { add_mapping( $i, $i ); }
                2503     add_mapping( 0xa0, 0xf8e6 );
                2504     add_mapping( 0xad, 0xf8e7 );
                2505     add_mapping( 0xae, 0xf8e8 );
                2506     add_mapping( 0xaf, 0xf8e9 );
                2507     add_mapping( 0xfe, 0xf8ea );
                2508     add_mapping( 0xff, 0xf8eb );
                2509 
cfaa28933 Alex*2510     my $INPUT = open_data_file( "ksx1001" );
fd3a64020 Sang*2511     while (<$INPUT>)
                2512     {
                2513         next if /^\#/;  # skip comments
                2514         next if /^$/;  # skip empty lines
                2515         next if /\x1a/;  # skip ^Z
                2516         if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
                2517         {
                2518             add_mapping( 0x8080 + hex $1, hex $2 );
                2519             next;
                2520         }
                2521         die "Unrecognized line $_\n";
                2522     }
                2523     close $INPUT;
                2524 
6f6854369 Alex*2525     # get some extra mappings from cp 949
                2526     my @defined_lb;
                2527     map { $defined_lb[$_] = 1; } @lead_bytes;
                2528     foreach my $i (0x0000 .. 0xffff)
                2529     {
                2530         next if ($i >= 0x1100 && $i <= 0x11ff);  # range not used in 20949
                2531         next unless defined $cp949[$i];
                2532         if ($cp949[$i] >= 0xff)
                2533         {
                2534             # only add chars for lead bytes that exist in 20949
                2535             my $hi = $cp949[$i] >> 8;
                2536             my $lo = $cp949[$i] & 0xff;
                2537             next unless $defined_lb[$hi];
                2538             next unless $lo >= 0xa1 && $lo <= 0xfe;
                2539         }
                2540         add_mapping( $cp949[$i], $i );
                2541     }
                2542 
fd3a64020 Sang*2543     output_codepage_file( 20949 );
                2544 }
141966b05 Alex*2545 
8c4b96fc4 Alex*2546 
261abcd5b Alex*2547 ################################################################
                2548 # dump an array of integers
9c3a25b85 Alex*2549 sub dump_array($$@)
261abcd5b Alex*2550 {
9c3a25b85 Alex*2551     my ($bit_width, $default, @array) = @_;
                2552     my $format = sprintf "0x%%0%ux", $bit_width / 4;
dc727fa7b Alex*2553     my $i;
                2554     my $ret = "    ";
261abcd5b Alex*2555     for ($i = 0; $i < $#array; $i++)
                2556     {
                2557         $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
                2558         $ret .= (($i % 8) != 7) ? ", " : ",\n    ";
                2559     }
                2560     $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
                2561     return $ret;
                2562 }
fb270ddca Alex*2563 
194c43ac3 Alex*2564 
fb316c337 Alex*2565 ################################################################
                2566 # dump an SBCS mapping table in binary format
                2567 sub dump_binary_sbcs_table($)
                2568 {
                2569     my $codepage = shift;
                2570 
                2571     my @header = ( 13, $codepage, 1, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
                2572     my $wc_offset = 256 + 3 + (@glyph2uni ? 256 : 0);
                2573 
                2574     print OUTPUT pack "S<*", @header;
                2575     print OUTPUT pack "C12", (0) x 12;
                2576     print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
                2577 
                2578     if (@glyph2uni)
                2579     {
                2580         print OUTPUT pack "S<*", 256, get_glyphs_mapping(@cp2uni[0 .. 255]);
                2581     }
                2582     else
                2583     {
                2584         print OUTPUT pack "S<*", 0;
                2585     }
                2586 
                2587     print OUTPUT pack "S<*", 0, 0;
                2588 
                2589     print OUTPUT pack "C*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
                2590 }
                2591 
                2592 
                2593 ################################################################
                2594 # dump a DBCS mapping table in binary format
                2595 sub dump_binary_dbcs_table($)
                2596 {
                2597     my $codepage = shift;
                2598     my @lb_ranges = get_lb_ranges();
                2599     my @header = ( 13, $codepage, 2, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
                2600 
                2601     my @offsets = (0) x 256;
                2602     my $pos = 0;
                2603     foreach my $i (@lead_bytes)
                2604     {
                2605         $offsets[$i] = ($pos += 256);
                2606         $cp2uni[$i] = 0;
                2607     }
                2608 
                2609     my $wc_offset = 256 + 3 + 256 * (1 + scalar @lead_bytes);
                2610 
                2611     print OUTPUT pack "S<*", @header;
                2612     print OUTPUT pack "C12", @lb_ranges, 0 x 12;
                2613     print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
                2614     print OUTPUT pack "S<*", 0, scalar @lb_ranges / 2, @offsets;
                2615 
                2616     foreach my $i (@lead_bytes)
                2617     {
                2618         my $base = $i << 8;
                2619         print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_wchar; } @cp2uni[$base .. $base + 255];
                2620     }
                2621 
                2622     print OUTPUT pack "S<", 4;
                2623     print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
                2624 }
                2625 
                2626 
fb270ddca Alex*2627 ################################################################
97d31ec78 Alex*2628 # get the list of defined lead byte ranges
                2629 sub get_lb_ranges()
fb270ddca Alex*2630 {
                2631     my @list = ();
97d31ec78 Alex*2632     my @ranges = ();
fb316c337 Alex*2633 
                2634     foreach my $i (@lead_bytes) { $list[$i] = 1; }
fb270ddca Alex*2635     my $on = 0;
dc727fa7b Alex*2636     for (my $i = 0; $i < 256; $i++)
fb270ddca Alex*2637     {
                2638         if ($on)
                2639         {
97d31ec78 Alex*2640             if (!defined $list[$i]) { push @ranges, $i-1; $on = 0; }
fb270ddca Alex*2641         }
                2642         else
                2643         {
97d31ec78 Alex*2644             if ($list[$i]) { push @ranges, $i; $on = 1; }
fb270ddca Alex*2645         }
                2646     }
97d31ec78 Alex*2647     if ($on) { push @ranges, 0xff; }
                2648     return @ranges;
fb270ddca Alex*2649 }
                2650 
7339c04b7 Aric*2651 ################################################################
                2652 # dump the Indic Syllabic Category table
                2653 sub dump_indic($)
                2654 {
                2655     my $filename = shift;
575a97961 Alex*2656     my @indic_table;
7339c04b7 Aric*2657 
cfaa28933 Alex*2658     my $INPUT = open_data_file( "ucd", "IndicSyllabicCategory.txt" );
7339c04b7 Aric*2659     while (<$INPUT>)
                2660     {
                2661         next if /^\#/;  # skip comments
                2662         next if /^\s*$/;  # skip empty lines
                2663         next if /\x1a/;  # skip ^Z
                2664         if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
                2665         {
                2666             my $type = $2;
                2667             die "unknown indic $type" unless defined $indic_types{$type};
                2668             if (hex $1 < 65536)
                2669             {
                2670                 $indic_table[hex $1] = $indic_types{$type};
                2671             }
                2672             next;
                2673         }
dec6f0773 Alex*2674         elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
7339c04b7 Aric*2675         {
                2676             my $type = $3;
                2677             die "unknown indic $type" unless defined $indic_types{$type};
cb65a3fbf Alex*2678             if (hex $1 < 65536 and hex $2 < 65536)
7339c04b7 Aric*2679             {
                2680                 foreach my $i (hex $1 .. hex $2)
                2681                 {
                2682                     $indic_table[$i] = $indic_types{$type};
                2683                 }
                2684             }
                2685             next;
                2686         }
                2687         die "malformed line $_";
                2688     }
                2689     close $INPUT;
                2690 
cfaa28933 Alex*2691     my $prev_data_file = $current_data_file;
                2692     $INPUT = open_data_file( "ucd", "IndicPositionalCategory.txt" );
7339c04b7 Aric*2693     while (<$INPUT>)
                2694     {
                2695         next if /^\#/;  # skip comments
                2696         next if /^\s*$/;  # skip empty lines
                2697         next if /\x1a/;  # skip ^Z
dec6f0773 Alex*2698         if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
7339c04b7 Aric*2699         {
                2700             my $type = $2;
                2701             die "unknown matra $type" unless defined $matra_types{$type};
dec6f0773 Alex*2702             $indic_table[hex $1] |= $matra_types{$type} << 8;
7339c04b7 Aric*2703             next;
                2704         }
dec6f0773 Alex*2705         elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
7339c04b7 Aric*2706         {
                2707             my $type = $3;
                2708             die "unknown matra $type" unless defined $matra_types{$type};
                2709             foreach my $i (hex $1 .. hex $2)
                2710             {
dec6f0773 Alex*2711                 $indic_table[$i] |= $matra_types{$type} << 8;
7339c04b7 Aric*2712             }
                2713             next;
                2714         }
                2715         die "malformed line $_";
                2716     }
                2717     close $INPUT;
                2718 
                2719     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                2720     print "Building $filename\n";
                2721     print OUTPUT "/* Unicode Indic Syllabic Category */\n";
cfaa28933 Alex*2722     print OUTPUT "/* generated from $prev_data_file */\n";
                2723     print OUTPUT "/*       and from $current_data_file */\n";
7339c04b7 Aric*2724     print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6 Alex*2725     print OUTPUT "#include \"windef.h\"\n\n";
7339c04b7 Aric*2726 
bfeb0a97f Alex*2727     dump_two_level_mapping( "indic_syllabic_table", $indic_types{'Other'}, 16, @indic_table );
7339c04b7 Aric*2728 
                2729     close OUTPUT;
                2730     save_file($filename);
                2731 }
                2732 
95166855b Aric*2733 ################################################################
                2734 # dump the Line Break Properties table
                2735 sub dump_linebreak($)
                2736 {
                2737     my $filename = shift;
575a97961 Alex*2738     my @break_table;
95166855b Aric*2739 
cfaa28933 Alex*2740     my $INPUT = open_data_file( "ucd", "LineBreak.txt" );
95166855b Aric*2741     while (<$INPUT>)
                2742     {
                2743         next if /^\#/;  # skip comments
                2744         next if /^\s*$/;  # skip empty lines
                2745         next if /\x1a/;  # skip ^Z
58e0972c5 Niko*2746         if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
                2747         {
                2748             my $type = $2;
                2749             die "unknown breaktype $type" unless defined $break_types{$type};
                2750             $break_table[hex $1] = $break_types{$type};
                2751             next;
                2752         }
dec6f0773 Alex*2753         elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
58e0972c5 Niko*2754         {
                2755             my $type = $3;
                2756             die "unknown breaktype $type" unless defined $break_types{$type};
                2757             foreach my $i (hex $1 .. hex $2)
                2758             {
                2759                 $break_table[$i] = $break_types{$type};
                2760             }
                2761             next;
                2762         }
                2763         elsif (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
95166855b Aric*2764         {
                2765             my $type = $2;
                2766             die "unknown breaktype $type" unless defined $break_types{$type};
                2767             $break_table[hex $1] = $break_types{$type};
                2768             next;
                2769         }
dec6f0773 Alex*2770         elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
95166855b Aric*2771         {
                2772             my $type = $3;
                2773             die "unknown breaktype $type" unless defined $break_types{$type};
                2774             foreach my $i (hex $1 .. hex $2)
                2775             {
                2776                 $break_table[$i] = $break_types{$type};
                2777             }
                2778             next;
                2779         }
                2780         die "malformed line $_";
                2781     }
                2782     close $INPUT;
                2783 
                2784     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                2785     print "Building $filename\n";
                2786     print OUTPUT "/* Unicode Line Break Properties */\n";
cfaa28933 Alex*2787     print OUTPUT "/* generated from $current_data_file */\n";
95166855b Aric*2788     print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6 Alex*2789     print OUTPUT "#include \"windef.h\"\n\n";
95166855b Aric*2790 
ba58338b2 Alex*2791     dump_three_level_mapping( "wine_linebreak_table", $break_types{'XX'}, 16, @break_table );
95166855b Aric*2792 
                2793     close OUTPUT;
                2794     save_file($filename);
                2795 }
                2796 
c38be9106 Niko*2797 my %scripts =
                2798 (
                2799     "Unknown"                => 0,
                2800     "Common"                 => 1,
                2801     "Inherited"              => 2,
                2802     "Arabic"                 => 3,
                2803     "Armenian"               => 4,
                2804     "Avestan"                => 5,
                2805     "Balinese"               => 6,
                2806     "Bamum"                  => 7,
                2807     "Batak"                  => 8,
                2808     "Bengali"                => 9,
                2809     "Bopomofo"               => 10,
                2810     "Brahmi"                 => 11,
                2811     "Braille"                => 12,
                2812     "Buginese"               => 13,
                2813     "Buhid"                  => 14,
                2814     "Canadian_Aboriginal"    => 15,
                2815     "Carian"                 => 16,
                2816     "Cham"                   => 17,
                2817     "Cherokee"               => 18,
                2818     "Coptic"                 => 19,
                2819     "Cuneiform"              => 20,
                2820     "Cypriot"                => 21,
                2821     "Cyrillic"               => 22,
                2822     "Deseret"                => 23,
                2823     "Devanagari"             => 24,
                2824     "Egyptian_Hieroglyphs"   => 25,
                2825     "Ethiopic"               => 26,
                2826     "Georgian"               => 27,
                2827     "Glagolitic"             => 28,
                2828     "Gothic"                 => 29,
                2829     "Greek"                  => 30,
                2830     "Gujarati"               => 31,
                2831     "Gurmukhi"               => 32,
                2832     "Han"                    => 33,
                2833     "Hangul"                 => 34,
                2834     "Hanunoo"                => 35,
                2835     "Hebrew"                 => 36,
                2836     "Hiragana"               => 37,
                2837     "Imperial_Aramaic"       => 38,
                2838     "Inscriptional_Pahlavi"  => 39,
                2839     "Inscriptional_Parthian" => 40,
                2840     "Javanese"               => 41,
                2841     "Kaithi"                 => 42,
                2842     "Kannada"                => 43,
                2843     "Katakana"               => 44,
                2844     "Kayah_Li"               => 45,
                2845     "Kharoshthi"             => 46,
                2846     "Khmer"                  => 47,
                2847     "Lao"                    => 48,
                2848     "Latin"                  => 49,
                2849     "Lepcha"                 => 50,
                2850     "Limbu"                  => 51,
                2851     "Linear_B"               => 52,
                2852     "Lisu"                   => 53,
                2853     "Lycian"                 => 54,
                2854     "Lydian"                 => 55,
                2855     "Malayalam"              => 56,
                2856     "Mandaic"                => 57,
                2857     "Meetei_Mayek"           => 58,
                2858     "Mongolian"              => 59,
                2859     "Myanmar"                => 60,
                2860     "New_Tai_Lue"            => 61,
                2861     "Nko"                    => 62,
                2862     "Ogham"                  => 63,
                2863     "Ol_Chiki"               => 64,
                2864     "Old_Italic"             => 65,
                2865     "Old_Persian"            => 66,
                2866     "Old_South_Arabian"      => 67,
                2867     "Old_Turkic"             => 68,
                2868     "Oriya"                  => 69,
                2869     "Osmanya"                => 70,
                2870     "Phags_Pa"               => 71,
                2871     "Phoenician"             => 72,
                2872     "Rejang"                 => 73,
                2873     "Runic"                  => 74,
                2874     "Samaritan"              => 75,
                2875     "Saurashtra"             => 76,
                2876     "Shavian"                => 77,
                2877     "Sinhala"                => 78,
                2878     "Sundanese"              => 79,
                2879     "Syloti_Nagri"           => 80,
                2880     "Syriac"                 => 81,
                2881     "Tagalog"                => 82,
                2882     "Tagbanwa"               => 83,
                2883     "Tai_Le"                 => 84,
                2884     "Tai_Tham"               => 85,
                2885     "Tai_Viet"               => 86,
                2886     "Tamil"                  => 87,
                2887     "Telugu"                 => 88,
                2888     "Thaana"                 => 89,
                2889     "Thai"                   => 90,
                2890     "Tibetan"                => 91,
                2891     "Tifinagh"               => 92,
                2892     "Ugaritic"               => 93,
                2893     "Vai"                    => 94,
                2894     "Yi"                     => 95,
                2895     # Win8/Win8.1
                2896     "Chakma"                 => 96,
                2897     "Meroitic_Cursive"       => 97,
                2898     "Meroitic_Hieroglyphs"   => 98,
                2899     "Miao"                   => 99,
                2900     "Sharada"                => 100,
                2901     "Sora_Sompeng"           => 101,
                2902     "Takri"                  => 102,
                2903     # Win10
                2904     "Bassa_Vah"              => 103,
                2905     "Caucasian_Albanian"     => 104,
                2906     "Duployan"               => 105,
                2907     "Elbasan"                => 106,
                2908     "Grantha"                => 107,
                2909     "Khojki"                 => 108,
                2910     "Khudawadi"              => 109,
                2911     "Linear_A"               => 110,
                2912     "Mahajani"               => 111,
                2913     "Manichaean"             => 112,
                2914     "Mende_Kikakui"          => 113,
                2915     "Modi"                   => 114,
                2916     "Mro"                    => 115,
                2917     "Nabataean"              => 116,
                2918     "Old_North_Arabian"      => 117,
                2919     "Old_Permic"             => 118,
                2920     "Pahawh_Hmong"           => 119,
                2921     "Palmyrene"              => 120,
                2922     "Pau_Cin_Hau"            => 121,
                2923     "Psalter_Pahlavi"        => 122,
                2924     "Siddham"                => 123,
                2925     "Tirhuta"                => 124,
                2926     "Warang_Citi"            => 125,
4c7e13601 Niko*2927     # Win10 RS1
                2928     "Adlam"                  => 126,
                2929     "Ahom"                   => 127,
                2930     "Anatolian_Hieroglyphs"  => 128,
                2931     "Bhaiksuki"              => 129,
                2932     "Hatran"                 => 130,
                2933     "Marchen"                => 131,
                2934     "Multani"                => 132,
                2935     "Newa"                   => 133,
                2936     "Old_Hungarian"          => 134,
                2937     "Osage"                  => 135,
                2938     "SignWriting"            => 136,
d4eeb162b Niko*2939     "Tangut"                 => 137,
                2940     # Win10 RS4
                2941     "Masaram_Gondi"          => 138,
                2942     "Nushu"                  => 139,
                2943     "Soyombo"                => 140,
                2944     "Zanabazar_Square"       => 141,
cd7dd5e89 Niko*2945     # Win10 1903
                2946     "Dogra"                  => 142,
                2947     "Gunjala_Gondi"          => 143,
                2948     "Hanifi_Rohingya"        => 144,
                2949     "Makasar"                => 145,
                2950     "Medefaidrin"            => 146,
                2951     "Old_Sogdian"            => 147,
                2952     "Sogdian"                => 148,
75a34d940 Niko*2953     # Win10 2004
                2954     "Elymaic"                => 149,
                2955     "Nyiakeng_Puachue_Hmong" => 150,
                2956     "Nandinagari"            => 151,
                2957     "Wancho"                 => 152,
992e0a604 Niko*2958     # Win11
                2959     "Chorasmian"             => 153,
                2960     "Dives_Akuru"            => 154,
                2961     "Khitan_Small_Script"    => 155,
                2962     "Yezidi"                 => 156,
c38be9106 Niko*2963 );
                2964 
e36025a2a Niko*2965 ################################################################
                2966 # dump Script IDs table
                2967 sub dump_scripts($)
                2968 {
                2969     my $filename = shift;
                2970     my $header = $filename;
575a97961 Alex*2971     my @scripts_table;
59bdda006 Niko*2972     my $script_index;
e36025a2a Niko*2973     my $i;
                2974 
cfaa28933 Alex*2975     my $INPUT = open_data_file( "ucd", "Scripts.txt" );
c38be9106 Niko*2976     # Fill the table
                2977     # Unknown script id is always 0, so undefined scripts are automatically treated as such
e36025a2a Niko*2978     while (<$INPUT>)
                2979     {
                2980         my $type = "";
                2981 
                2982         next if /^\#/;  # skip comments
                2983         next if /^\s*$/;  # skip empty lines
                2984         next if /\x1a/;  # skip ^Z
c38be9106 Niko*2985         if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
e36025a2a Niko*2986         {
                2987             $type = $2;
                2988             if (defined $scripts{$type})
                2989             {
                2990                 $scripts_table[hex $1] = $scripts{$type};
                2991             }
                2992             next;
                2993         }
dec6f0773 Alex*2994         elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
e36025a2a Niko*2995         {
                2996             $type = $3;
                2997             if (defined $scripts{$type})
                2998             {
                2999                 foreach my $i (hex $1 .. hex $2)
                3000                 {
                3001                     $scripts_table[$i] = $scripts{$type};
                3002                 }
                3003             }
                3004             next;
                3005         }
                3006     }
                3007 
                3008     close $INPUT;
                3009 
                3010     $header = "$filename.h";
                3011     open OUTPUT,">$header.new" or die "Cannot create $header";
                3012     print "Building $header\n";
                3013     print OUTPUT "/* Unicode Script IDs */\n";
cfaa28933 Alex*3014     print OUTPUT "/* generated from $current_data_file */\n";
e36025a2a Niko*3015     print OUTPUT "/* DO NOT EDIT!! */\n\n";
                3016 
                3017     print OUTPUT "enum unicode_script_id {\n";
7dd90faaa Niko*3018     foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts)
e36025a2a Niko*3019     {
                3020         print OUTPUT "    Script_$script = $scripts{$script},\n";
                3021     }
c38be9106 Niko*3022     print OUTPUT "    Script_LastId = ", (scalar keys %scripts) - 1, "\n";
e36025a2a Niko*3023     print OUTPUT "};\n";
                3024 
                3025     close OUTPUT;
                3026     save_file($header);
                3027 
                3028     $filename = "$filename.c";
                3029     open OUTPUT,">$filename.new" or die "Cannot create $header";
                3030     print "Building $filename\n";
                3031     print OUTPUT "/* Unicode Script IDs */\n";
cfaa28933 Alex*3032     print OUTPUT "/* generated from $current_data_file */\n";
e36025a2a Niko*3033     print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6 Alex*3034     print OUTPUT "#include \"windef.h\"\n\n";
e36025a2a Niko*3035 
c848f42aa Alex*3036     dump_three_level_mapping( "wine_scripts_table", 0, 16, @scripts_table );
e36025a2a Niko*3037     close OUTPUT;
                3038     save_file($filename);
                3039 }
fb270ddca Alex*3040 
6f3f505f0 Alex*3041 ################################################################
                3042 # dump the BiDi mirroring table
                3043 sub dump_mirroring($)
                3044 {
                3045     my $filename = shift;
                3046     my @mirror_table = ();
                3047 
cfaa28933 Alex*3048     my $INPUT = open_data_file( "ucd", "BidiMirroring.txt" );
6f3f505f0 Alex*3049     while (<$INPUT>)
                3050     {
                3051         next if /^\#/;  # skip comments
                3052         next if /^$/;  # skip empty lines
                3053         next if /\x1a/;  # skip ^Z
                3054         if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+)/)
                3055         {
                3056             $mirror_table[hex $1] = hex $2;
                3057             next;
                3058         }
                3059         die "malformed line $_";
                3060     }
                3061     close $INPUT;
                3062 
                3063     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                3064     print "Building $filename\n";
                3065     print OUTPUT "/* Unicode BiDi mirroring */\n";
cfaa28933 Alex*3066     print OUTPUT "/* generated from $current_data_file */\n";
6f3f505f0 Alex*3067     print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6 Alex*3068     print OUTPUT "#include \"windef.h\"\n\n";
bfeb0a97f Alex*3069     dump_two_level_mapping( "wine_mirror_map", 0, 16, @mirror_table );
6f3f505f0 Alex*3070     close OUTPUT;
                3071     save_file($filename);
                3072 }
                3073 
2a12c6a7d Aric*3074 ################################################################
                3075 # dump the Bidi Brackets
                3076 sub dump_bracket($)
                3077 {
                3078     my $filename = shift;
575a97961 Alex*3079     my @bracket_table;
2a12c6a7d Aric*3080 
cfaa28933 Alex*3081     my $INPUT = open_data_file( "ucd", "BidiBrackets.txt" );
2a12c6a7d Aric*3082     while (<$INPUT>)
                3083     {
                3084         next if /^\#/;  # skip comments
                3085         next if /^\s*$/;  # skip empty lines
                3086         next if /\x1a/;  # skip ^Z
                3087         if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+);\s*([con])/)
                3088         {
                3089             my $type = $3;
                3090             die "unknown bracket $type" unless defined $bracket_types{$type};
4ac5d66df Huw *3091             die "characters too distant $1 and $2" if abs(hex($2) - hex($1)) >= 128;
2a12c6a7d Aric*3092             $bracket_table[hex $1] = (hex($2) - hex($1)) % 255;
                3093             $bracket_table[hex $1] += $bracket_types{$type} << 8;
                3094             next;
                3095         }
                3096         die "malformed line $_";
                3097     }
                3098     close $INPUT;
                3099 
                3100     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                3101     print "Building $filename\n";
                3102     print OUTPUT "/* Unicode Bidirectional Bracket table */\n";
cfaa28933 Alex*3103     print OUTPUT "/* generated from $current_data_file */\n";
2a12c6a7d Aric*3104     print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6 Alex*3105     print OUTPUT "#include \"windef.h\"\n\n";
2a12c6a7d Aric*3106 
bfeb0a97f Alex*3107     dump_two_level_mapping( "bidi_bracket_table", 0, 16, @bracket_table );
2a12c6a7d Aric*3108 
                3109     close OUTPUT;
                3110     save_file($filename);
                3111 }
6f3f505f0 Alex*3112 
a5fe68870 Alex*3113 ################################################################
                3114 # dump the Arabic shaping table
                3115 sub dump_shaping($)
                3116 {
                3117     my $filename = shift;
bea9c706e Niko*3118     my @joining_table = @initial_joining_table;
a5fe68870 Alex*3119 
cfaa28933 Alex*3120     my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
a5fe68870 Alex*3121     while (<$INPUT>)
                3122     {
                3123         next if /^\#/;  # skip comments
                3124         next if /^\s*$/;  # skip empty lines
                3125         next if /\x1a/;  # skip ^Z
                3126         if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
                3127         {
                3128             my $type = $2;
016eeb510 Niko*3129             $joining_table[hex $1] = $joining_types{$type};
a5fe68870 Alex*3130             next;
                3131         }
                3132         die "malformed line $_";
                3133     }
                3134     close $INPUT;
                3135 
                3136     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                3137     print "Building $filename\n";
                3138     print OUTPUT "/* Unicode Arabic shaping */\n";
cfaa28933 Alex*3139     print OUTPUT "/* generated from $current_data_file */\n";
a5fe68870 Alex*3140     print OUTPUT "/* DO NOT EDIT!! */\n\n";
333e6b3c6 Alex*3141     print OUTPUT "#include \"windef.h\"\n\n";
a5fe68870 Alex*3142 
bfeb0a97f Alex*3143     dump_two_level_mapping( "wine_shaping_table", 0, 16, @joining_table );
a5fe68870 Alex*3144 
a5f62b3ef Jace*3145     print OUTPUT "\nconst unsigned short wine_shaping_forms[256][4] =\n{\n";
a5fe68870 Alex*3146     for (my $i = 0x600; $i <= 0x6ff; $i++)
                3147     {
                3148         printf OUTPUT "    { 0x%04x, 0x%04x, 0x%04x, 0x%04x },\n",
                3149             ${joining_forms{"isolated"}}[$i] || $i,
                3150             ${joining_forms{"final"}}[$i] || $i,
                3151             ${joining_forms{"initial"}}[$i] || $i,
                3152             ${joining_forms{"medial"}}[$i] || $i;
                3153     }
                3154     print OUTPUT "};\n";
                3155 
                3156     close OUTPUT;
                3157     save_file($filename);
                3158 }
                3159 
bea9c706e Niko*3160 ################################################################
                3161 # dump the Arabic shaping table
                3162 sub dump_arabic_shaping($)
                3163 {
                3164     my $filename = shift;
                3165     my @joining_table = @initial_joining_table;
                3166 
cfaa28933 Alex*3167     my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
bea9c706e Niko*3168     while (<$INPUT>)
                3169     {
                3170         next if /^\#/;  # skip comments
                3171         next if /^\s*$/;  # skip empty lines
                3172         next if /\x1a/;  # skip ^Z
                3173         if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
                3174         {
                3175             my $type = $2;
                3176             my $group = $3;
                3177 
                3178             if ($group eq "ALAPH" || $group eq "DALATH RISH")
                3179             {
                3180                 $joining_table[hex $1] = $joining_types{$group};
                3181             }
                3182             else
                3183             {
                3184                 $joining_table[hex $1] = $joining_types{$type};
                3185             }
                3186 
                3187             next;
                3188         }
                3189         die "malformed line $_";
                3190     }
                3191     close $INPUT;
                3192 
                3193     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                3194     print "Building $filename\n";
                3195     print OUTPUT "/* Unicode Arabic shaping */\n";
cfaa28933 Alex*3196     print OUTPUT "/* generated from $current_data_file */\n";
bea9c706e Niko*3197     print OUTPUT "/* DO NOT EDIT!! */\n\n";
                3198     print OUTPUT "#include \"windef.h\"\n\n";
                3199 
aeff5099f Alex*3200     dump_three_level_mapping( "arabic_shaping_table", 0, 16, @joining_table );
bea9c706e Niko*3201 
                3202     close OUTPUT;
                3203     save_file($filename);
                3204 }
                3205 
742cde4fa Aric*3206 ################################################################
                3207 # dump the Vertical Orientation table
6857cb569 Jace*3208 sub dump_vertical($$)
742cde4fa Aric*3209 {
6857cb569 Jace*3210     my ($filename, $unix) = @_;
575a97961 Alex*3211     my @vertical_table;
742cde4fa Aric*3212 
cfaa28933 Alex*3213     my $INPUT = open_data_file( "ucd", "VerticalOrientation.txt" );
742cde4fa Aric*3214     while (<$INPUT>)
                3215     {
                3216         next if /^\#/;  # skip comments
                3217         next if /^\s*$/;  # skip empty lines
                3218         next if /\x1a/;  # skip ^Z
                3219         if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
                3220         {
                3221             my $type = $2;
                3222             die "unknown vertical $type" unless defined $vertical_types{$type};
                3223             if (hex $1 < 65536)
                3224             {
                3225                 $vertical_table[hex $1] = $vertical_types{$type};
                3226             }
                3227             next;
                3228         }
dec6f0773 Alex*3229         elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*/)
742cde4fa Aric*3230         {
                3231             my $type = $3;
                3232             die "unknown vertical $type" unless defined $vertical_types{$type};
                3233             foreach my $i (hex $1 .. hex $2)
                3234             {
                3235                 $vertical_table[$i] = $vertical_types{$type};
                3236             }
                3237             next;
                3238         }
                3239         die "malformed line $_";
                3240     }
                3241     close $INPUT;
                3242 
                3243     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                3244     print "Building $filename\n";
                3245     print OUTPUT "/* Unicode Vertical Orientation */\n";
cfaa28933 Alex*3246     print OUTPUT "/* generated from $current_data_file */\n";
742cde4fa Aric*3247     print OUTPUT "/* DO NOT EDIT!! */\n\n";
6857cb569 Jace*3248     if ($unix)
                3249     {
                3250         print OUTPUT "#if 0\n";
                3251         print OUTPUT "#pragma makedep unix\n";
                3252         print OUTPUT "#endif\n\n";
                3253     }
333e6b3c6 Alex*3254     print OUTPUT "#include \"windef.h\"\n\n";
742cde4fa Aric*3255 
bfeb0a97f Alex*3256     dump_two_level_mapping( "vertical_orientation_table", $vertical_types{'R'}, 16, @vertical_table );
742cde4fa Aric*3257 
                3258     close OUTPUT;
                3259     save_file($filename);
                3260 }
a5fe68870 Alex*3261 
                3262 ################################################################
b99b9565e Alex*3263 # compress a mapping table by removing identical rows
575a97961 Alex*3264 sub compress_array($$@)
a5fe68870 Alex*3265 {
b99b9565e Alex*3266     my $rows = shift;
575a97961 Alex*3267     my $def = shift;
a5fe68870 Alex*3268     my @table = @_;
b99b9565e Alex*3269     my $len = @table / $rows;
f17a228d4 Alex*3270     my @array;
                3271     my $data = "";
a5fe68870 Alex*3272 
                3273     # try to merge table rows
b99b9565e Alex*3274     for (my $row = 0; $row < $rows; $row++)
a5fe68870 Alex*3275     {
f17a228d4 Alex*3276         my $rowtxt = pack "U*", map { defined($_) ? $_ : $def; } @table[($row * $len)..(($row + 1) * $len - 1)];
                3277         my $pos = index $data, $rowtxt;
                3278         if ($pos == -1)
a5fe68870 Alex*3279         {
f17a228d4 Alex*3280             # check if the tail of the data can match the start of the new row
                3281             my $first = substr( $rowtxt, 0, 1 );
                3282             for (my $i = length($data) - 1; $i > 0; $i--)
                3283             {
                3284                 $pos = index( substr( $data, -$i ), $first );
                3285                 last if $pos == -1;
                3286                 $i -= $pos;
                3287                 next unless substr( $data, -$i ) eq substr( $rowtxt, 0, $i );
                3288                 substr( $data, -$i ) = "";
                3289                 last;
                3290             }
                3291             $pos = length $data;
                3292             $data .= $rowtxt;
a5fe68870 Alex*3293         }
f17a228d4 Alex*3294         $array[$row] = $rows + $pos;
a5fe68870 Alex*3295     }
f17a228d4 Alex*3296     return @array, unpack "U*", $data;
b99b9565e Alex*3297 }
                3298 
930a13dd8 Alex*3299 ################################################################
c848f42aa Alex*3300 # dump a char -> value mapping table using two-level tables
b956620d8 Alex*3301 sub dump_two_level_mapping($$$@)
b99b9565e Alex*3302 {
                3303     my $name = shift;
575a97961 Alex*3304     my $def = shift;
bfeb0a97f Alex*3305     my $size = shift;
                3306     my $type = $size == 16 ? "unsigned short" : "unsigned int";
b956620d8 Alex*3307     my (@array, @row_array, @data, @row_data);
                3308     (@row_array[0..4095], @data) = compress_array( 4096, $def, @_[0..65535] );
                3309     (@array[0..255], @row_data) = compress_array( 256, 0, @row_array );
b99b9565e Alex*3310 
b956620d8 Alex*3311     for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += @row_data + 256 - 4096; }
b99b9565e Alex*3312 
a5f62b3ef Jace*3313     printf OUTPUT "const %s %s[%d] =\n{\n", $type, $name, @array + @row_data + @data;
b956620d8 Alex*3314     printf OUTPUT "    /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array );
                3315     printf OUTPUT "    /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @row_data );
                3316     printf OUTPUT "    /* values */\n%s\n};\n", dump_array( $size, 0, @data );
b99b9565e Alex*3317 }
                3318 
148f564d1 Alex*3319 ################################################################
                3320 # dump a char -> value mapping table using three-level tables
                3321 sub dump_three_level_mapping($$@)
                3322 {
                3323     my $name = shift;
                3324     my $def = shift;
                3325     my $size = shift;
                3326     my $type = $size == 16 ? "unsigned short" : "unsigned int";
                3327     my $level3 = ($MAX_CHAR + 1) / 16;
                3328     my $level2 = $level3 / 16;
                3329     my $level1 = $level2 / 16;
                3330     my @array3 = compress_array( $level3, $def, @_[0..$MAX_CHAR] );
                3331     my @array2 = compress_array( $level2, 0, @array3[0..$level3-1] );
                3332     my @array1 = compress_array( $level1, 0, @array2[0..$level2-1] );
                3333 
                3334     for (my $i = $level2; $i < @array2; $i++) { $array2[$i] += @array1 + @array2 - $level2 - $level3; }
                3335     for (my $i = $level1; $i < @array1; $i++) { $array1[$i] += @array1 - $level2; }
                3336 
a5f62b3ef Jace*3337     printf OUTPUT "const %s %s[%u] =\n{\n", $type, $name, @array1 + (@array2 - $level2) + (@array3 - $level3);
148f564d1 Alex*3338     printf OUTPUT "    /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array1[0..$level1-1] );
                3339     printf OUTPUT "    /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array1[$level1..$#array1] );
                3340     printf OUTPUT "    /* level 3 offsets */\n%s,\n", dump_array( $size, 0, @array2[$level2..$#array2] );
                3341     printf OUTPUT "    /* values */\n%s\n};\n", dump_array( $size, 0, @array3[$level3..$#array3] );
                3342 }
                3343 
b99b9565e Alex*3344 ################################################################
930a13dd8 Alex*3345 # dump a binary case mapping table in l_intl.nls format
                3346 sub dump_binary_case_table(@)
                3347 {
                3348     my (@table) = @_;
bd9d83b73 Alex*3349     my @difftable;
b956620d8 Alex*3350     my @res;
                3351 
bd9d83b73 Alex*3352     for (my $i = 0; $i < @table; $i++)
930a13dd8 Alex*3353     {
bd9d83b73 Alex*3354         next unless defined $table[$i];
b956620d8 Alex*3355         $difftable[$i] = ($table[$i] - $i) & 0xffffffff;
930a13dd8 Alex*3356     }
                3357 
b956620d8 Alex*3358     my (@low_array1, @low_array2, @low_data, @low_row_data);
                3359     (@low_array2[0..4095], @low_data) = compress_array( 4096, 0, @difftable[0..65535] );
                3360     (@low_array1[0..255], @low_row_data) = compress_array( 256, 0, @low_array2 );
                3361 
                3362     if (scalar @table > 0x10000)
                3363     {
                3364         my (@high_array1, @high_array2, @high_data, @high_row_data);
                3365         (@high_array2[0..32767], @high_data) = compress_array( 32768, 0, @difftable[65536..$MAX_CHAR] );
                3366         (@high_array1[0..1023], @high_row_data) = compress_array( 1024, 0, @high_array2 );
                3367 
                3368         push @res, map { $_ + 1024; } @low_array1;
                3369         push @res, map { $_ + @res + @low_row_data + @low_data; } @high_array1;
                3370         push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
                3371         push @res, @low_data;
                3372         push @res, map { 2 * ($_ - 32768) + @res + @high_row_data; } @high_row_data;
                3373         return pack( "S<*", 1 + scalar @res  + 2 * scalar @high_data, @res ) . pack( "L<*", @high_data );
                3374     }
                3375     else
                3376     {
                3377         push @res, @low_array1;
                3378         push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
                3379         push @res, @low_data;
                3380         return pack "S<*", 1 + scalar @res, @res;
                3381     }
930a13dd8 Alex*3382 }
                3383 
                3384 ################################################################
                3385 # dump case mappings for l_intl.nls
                3386 sub dump_intl_nls($)
                3387 {
57a6033c0 Alex*3388     my @upper_table = @toupper_table;
                3389     my @lower_table = @tolower_table;
                3390     remove_linguistic_mappings( \@upper_table, \@lower_table );
                3391 
b956620d8 Alex*3392     my $upper = dump_binary_case_table( @upper_table[0..65535] );
                3393     my $lower = dump_binary_case_table( @lower_table[0..65535] );
bd9d83b73 Alex*3394 
930a13dd8 Alex*3395     my $filename = shift;
                3396     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                3397     printf "Building $filename\n";
                3398 
                3399     binmode OUTPUT;
                3400     print OUTPUT pack "S<", 1;  # version
57a6033c0 Alex*3401     print OUTPUT $upper;
                3402     print OUTPUT $lower;
930a13dd8 Alex*3403     close OUTPUT;
                3404     save_file($filename);
                3405 }
                3406 
                3407 
2f6918cbe Alex*3408 ################################################################
                3409 # dump the bidi direction table
                3410 sub dump_bidi_dir_table($)
                3411 {
                3412     my $filename = shift;
                3413     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                3414     printf "Building $filename\n";
                3415     printf OUTPUT "/* Unicode BiDi direction table */\n";
                3416     printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
                3417     printf OUTPUT "#include \"windef.h\"\n\n";
                3418 
                3419     my @table;
                3420 
24da06789 Alex*3421     for (my $i = 0; $i < @direction_table; $i++)
2f6918cbe Alex*3422     {
d68f8d7b3 Alex*3423         $table[$i] = $bidi_types{$direction_table[$i]} if defined $direction_table[$i];
2f6918cbe Alex*3424     }
                3425 
24da06789 Alex*3426     dump_three_level_mapping( "bidi_direction_table", $bidi_types{"L"}, 16, @table );
2f6918cbe Alex*3427 
                3428     close OUTPUT;
                3429     save_file($filename);
                3430 }
                3431 
                3432 
f9f3e57cf Alex*3433 sub rol($$)
                3434 {
                3435     my ($byte, $count) = @_;
                3436     return (($byte << $count) | ($byte >> (8 - $count))) & 0xff;
                3437 }
                3438 
                3439 ################################################################
                3440 # compress the character properties table
                3441 sub compress_char_props_table($@)
                3442 {
                3443     my $rows = shift;
                3444     my @table = @_;
                3445     my $len = @table / $rows;
                3446     my $pos = 0;
                3447     my @array = (0) x $rows;
                3448     my %sequences;
                3449 
                3450     # add some predefined sequences
                3451     foreach my $i (0, 0xfb .. 0xff) { $sequences{pack "L*", (rol($i,5)) x $len} = $i; }
                3452 
                3453     # try to merge table rows
                3454     for (my $row = 0; $row < $rows; $row++)
                3455     {
                3456         my @table_row = map { defined $_ ? $_ : 0x7f; } @table[($row * $len)..(($row + 1) * $len - 1)];
                3457         my $rowtxt = pack "L*", @table_row;
                3458         if (defined($sequences{$rowtxt}))
                3459         {
                3460             # reuse an existing row
                3461             $array[$row] = $sequences{$rowtxt};
                3462         }
                3463         else
                3464         {
                3465             # create a new row
                3466             $sequences{$rowtxt} = $array[$row] = ++$pos;
                3467             push @array, @table_row;
                3468         }
                3469     }
                3470     return @array;
                3471 }
                3472 
                3473 ################################################################
                3474 # dump a normalization table in binary format
                3475 sub dump_norm_table($)
                3476 {
                3477     my $filename = shift;
                3478 
                3479     my %forms  = ( "nfc" => 1, "nfd" => 2, "nfkc" => 5, "nfkd" => 6, "idna" => 13 );
                3480     my %decomp = ( "nfc" => \@decomp_table,
                3481                    "nfd" => \@decomp_table,
                3482                    "nfkc" => \@decomp_compat_table,
                3483                    "nfkd" => \@decomp_compat_table ,
                3484                    "idna" => \@idna_decomp_table );
                3485 
                3486     open OUTPUT,">$filename.new" or die "Cannot create $filename";
                3487     print "Building $filename\n";
                3488 
                3489     my $type = $filename;
                3490     $type =~ s!.*/norm(\w+)\.nls!$1!;
                3491 
                3492     my $compose = $forms{$type} & 1;
                3493     my $compat = !!($forms{$type} & 4) + ($type eq "idna");
                3494 
                3495     my @version = split /\./, $UNIVERSION;
                3496 
                3497     # combining classes
                3498 
                3499     my @classes;
                3500     my @class_values;
                3501 
                3502     foreach my $c (grep defined, @combining_class_table)
                3503     {
                3504         $classes[$c] = 1 if $c < 0x100;
                3505     }
                3506     for (my $i = 0; $i < @classes; $i++)
                3507     {
                3508         next unless defined $classes[$i];
                3509         $classes[$i] = @class_values;
                3510         push @class_values, $i;
                3511     }
                3512     push @class_values, 0 if (@class_values % 2);
                3513     die "too many classes" if @class_values >= 0x40;
                3514 
                3515     # character properties
                3516 
                3517     my @char_props;
                3518     my @decomposed;
                3519     my @comp_hash_table;
                3520     my $comp_hash_size = $compose ? 254 : 0;
                3521 
                3522     for (my $i = 0; $i <= $MAX_CHAR; $i++)
                3523     {
                3524         next unless defined $combining_class_table[$i];
                3525         if (defined $decomp{$type}->[$i])
                3526         {
                3527             my @dec = get_decomposition( $i, $decomp{$type} );
                3528             if ($compose && (my @comp = get_composition( $i, $compat )))
                3529             {
                3530                 my $hash = ($comp[0] + 95 * $comp[1]) % $comp_hash_size;
                3531                 push @{$comp_hash_table[$hash]}, to_utf16( @comp, $i );
                3532 
                3533                 my $val = 0;
                3534                 foreach my $d (@dec)
                3535                 {
                3536                     $val = $combining_class_table[$d];
                3537                     last if $val;
                3538                 }
                3539                 $char_props[$i] = $classes[$val];
                3540             }
                3541             else
                3542             {
                3543                 $char_props[$i] = 0xbf;
                3544             }
                3545             @dec = compose_hangul( @dec ) if $compose;
                3546             @dec = to_utf16( @dec );
                3547             push @dec, 0 if @dec >= 7;
                3548             $decomposed[$i] = \@dec;
                3549         }
                3550         else
                3551         {
                3552             if ($combining_class_table[$i] == 0x100)
                3553             {
                3554                 $char_props[$i] = 0x7f;
                3555             }
                3556             elsif ($combining_class_table[$i])
                3557             {
                3558                 $char_props[$i] = $classes[$combining_class_table[$i]] | 0x80;
                3559             }
                3560             elsif ($type eq "idna" && defined $idna_disallowed[$i])
                3561             {
                3562                 $char_props[$i] = 0xff;
                3563             }
                3564             else
                3565             {
                3566                 $char_props[$i] = 0;
                3567             }
                3568         }
                3569     }
                3570 
                3571     if ($compose)
                3572     {
                3573         for (my $i = 0; $i <= $MAX_CHAR; $i++)
                3574         {
                3575             my @comp = get_composition( $i, $compat );
                3576             next unless @comp;
                3577             if ($combining_class_table[$comp[1]])
                3578             {
                3579                 $char_props[$comp[0]] |= 0x40 unless $char_props[$comp[0]] & 0x80;
                3580                 $char_props[$comp[1]] |= 0x40;
                3581             }
                3582             else
                3583             {
                3584                 $char_props[$comp[0]] = ($char_props[$comp[0]] & ~0x40) | 0x80;
                3585                 $char_props[$comp[1]] |= 0xc0;
                3586             }
                3587         }
                3588     }
                3589 
                3590     # surrogates
                3591     foreach my $i (0xd800..0xdbff) { $char_props[$i] = 0xdf; }
                3592     foreach my $i (0xdc00..0xdfff) { $char_props[$i] = 0x9f; }
                3593 
                3594     # Hangul
                3595     if ($type eq "nfc") { foreach my $i (0x1100..0x117f) { $char_props[$i] = 0xff; } }
                3596     elsif ($compose) { foreach my $i (0x1100..0x11ff) { $char_props[$i] = 0xff; } }
                3597     foreach my $i (0xac00..0xd7ff) { $char_props[$i] = 0xff; }
                3598 
                3599     # invalid chars
                3600     if ($type eq "idna") { foreach my $i (0x00..0x1f, 0x7f) { $char_props[$i] = 0xff; } }
                3601     foreach my $i (0xfdd0..0xfdef) { $char_props[$i] = 0xff; }
                3602     foreach my $i (0x00..0x10)
                3603     {
                3604         $char_props[($i << 16) | 0xfffe] = 0xff;
                3605         $char_props[($i << 16) | 0xffff] = 0xff;
                3606     }
                3607 
                3608     # decomposition hash table
                3609 
                3610     my @decomp_hash_table;
                3611     my @decomp_hash_index;
                3612     my @decomp_hash_data;
                3613     my $decomp_hash_size = 944;
                3614 
                3615     # build string of character data, reusing substrings when possible
                3616     my $decomp_char_data = "";
                3617     foreach my $i (sort { @{$b} <=> @{$a} } grep defined, @decomposed)
                3618     {
                3619         my $str = pack "U*", @{$i};
                3620         $decomp_char_data .= $str if index( $decomp_char_data, $str) == -1;
                3621     }
                3622     for (my $i = 0; $i < @decomposed; $i++)
                3623     {
                3624         next unless defined $decomposed[$i];
                3625         my $pos = index( $decomp_char_data, pack( "U*", @{$decomposed[$i]} ));
                3626         die "sequence not found" if $pos == -1;
                3627         my $len = @{$decomposed[$i]};
                3628         $len = 7 if $len > 7;
                3629         my $hash = $i % $decomp_hash_size;
                3630         push @{$decomp_hash_table[$hash]}, [ $i, ($len << 13) | $pos ];
                3631     }
                3632     for (my $i = 0; $i < $decomp_hash_size; $i++)
                3633     {
                3634         $decomp_hash_index[$i] = @decomp_hash_data / 2;
                3635         next unless defined $decomp_hash_table[$i];
                3636         if (@{$decomp_hash_table[$i]} == 1)
                3637         {
                3638             my $entry = $decomp_hash_table[$i]->[0];
                3639             if ($char_props[$entry->[0]] == 0xbf)
                3640             {
                3641                 $decomp_hash_index[$i] = $entry->[1];
                3642                 next;
                3643             }
                3644         }
                3645         foreach my $entry (@{$decomp_hash_table[$i]})
                3646         {
                3647             push @decomp_hash_data, $entry->[0] & 0xffff, $entry->[1];
                3648         }
                3649     }
                3650     push @decomp_hash_data, 0, 0;
                3651 
                3652     # composition hash table
                3653 
                3654     my @comp_hash_index;
                3655     my @comp_hash_data;
                3656     if (@comp_hash_table)
                3657     {
                3658         for (my $i = 0; $i < $comp_hash_size; $i++)
                3659         {
                3660             $comp_hash_index[$i] = @comp_hash_data;
                3661             push @comp_hash_data, @{$comp_hash_table[$i]} if defined $comp_hash_table[$i];
                3662         }
                3663         $comp_hash_index[$comp_hash_size] = @comp_hash_data;
                3664         push @comp_hash_data, 0, 0, 0;
                3665     }
                3666 
                3667     my $level1 = ($MAX_CHAR + 1) / 128;
                3668     my @rows = compress_char_props_table( $level1, @char_props[0..$MAX_CHAR] );
                3669 
                3670     my @header = ( $version[0], $version[1], $version[2], 0, $forms{$type}, $compat ? 18 : 3,
                3671                    0, $decomp_hash_size, $comp_hash_size, 0 );
                3672     my @tables = (0) x 8;
                3673 
                3674     $tables[0] = 16 + @header + @tables;
                3675     $tables[1] = $tables[0] + @class_values / 2;
                3676     $tables[2] = $tables[1] + $level1 / 2;
                3677     $tables[3] = $tables[2] + (@rows - $level1) / 2;
                3678     $tables[4] = $tables[3] + @decomp_hash_index;
                3679     $tables[5] = $tables[4] + @decomp_hash_data;
                3680     $tables[6] = $tables[5] + length $decomp_char_data;
                3681     $tables[7] = $tables[6] + @comp_hash_index;
                3682 
                3683     print OUTPUT pack "S<16", unpack "U*", "norm$type.nlp";
                3684     print OUTPUT pack "S<*", @header;
                3685     print OUTPUT pack "S<*", @tables;
                3686     print OUTPUT pack "C*", @class_values;
                3687 
                3688     print OUTPUT pack "C*", @rows[0..$level1-1];
                3689     print OUTPUT pack "C*", @rows[$level1..$#rows];
                3690     print OUTPUT pack "S<*", @decomp_hash_index;
                3691     print OUTPUT pack "S<*", @decomp_hash_data;
                3692     print OUTPUT pack "S<*", unpack "U*", $decomp_char_data;
                3693     print OUTPUT pack "S<*", @comp_hash_index;
                3694     print OUTPUT pack "S<*", @comp_hash_data;
                3695 
                3696     close OUTPUT;
                3697     save_file($filename);
712839d58 Alex*3698 
630f605c2 Alex*3699     add_registry_string_value( $nlskey, "Normalization", sprintf( "%x", $forms{$type} ), "norm$type.nls" );
f9f3e57cf Alex*3700 }
                3701 
                3702 
97d31ec78 Alex*3703 ################################################################
f54c2f65b Alex*3704 # output a codepage definition file from the global tables
5b4bdb9fd Alex*3705 sub output_codepage_file($)
f54c2f65b Alex*3706 {
5b4bdb9fd Alex*3707     my $codepage = shift;
fb316c337 Alex*3708 
5b4bdb9fd Alex*3709     my $output = sprintf "nls/c_%03d.nls", $codepage;
fb316c337 Alex*3710     open OUTPUT,">$output.new" or die "Cannot create $output";
                3711 
                3712     printf "Building %s\n", $output;
                3713     if (!@lead_bytes) { dump_binary_sbcs_table( $codepage ); }
                3714     else { dump_binary_dbcs_table( $codepage ); }
                3715 
                3716     close OUTPUT;
                3717     save_file($output);
712839d58 Alex*3718 
630f605c2 Alex*3719     add_registry_string_value( $nlskey, "Codepage", sprintf( "%d", $codepage ), sprintf( "c_%03d.nls", $codepage ));
f54c2f65b Alex*3720 }
                3721 
                3722 ################################################################
09d97e968 Alex*3723 # output a codepage table from a Microsoft-style mapping file
5b4bdb9fd Alex*3724 sub dump_msdata_codepage($)
97d31ec78 Alex*3725 {
5b4bdb9fd Alex*3726     my $filename = shift;
                3727 
97d31ec78 Alex*3728     my $state = "";
f54c2f65b Alex*3729     my ($codepage, $width, $count);
97d31ec78 Alex*3730     my ($lb_cur, $lb_end);
                3731 
5b4bdb9fd Alex*3732     @cp2uni = ();
                3733     @glyph2uni = ();
                3734     @lead_bytes = ();
                3735     @uni2cp = ();
                3736     $default_char = $DEF_CHAR;
                3737     $default_wchar = $DEF_CHAR;
                3738 
cfaa28933 Alex*3739     my $INPUT = open_data_file( "codepages", $filename );
97d31ec78 Alex*3740 
65a82cb18 Alex*3741     while (<$INPUT>)
97d31ec78 Alex*3742     {
                3743         next if /^;/;  # skip comments
                3744         next if /^\s*$/;  # skip empty lines
                3745         next if /\x1a/;  # skip ^Z
                3746         last if /^ENDCODEPAGE/;
                3747 
                3748         if (/^CODEPAGE\s+(\d+)/)
                3749         {
                3750             $codepage = $1;
                3751             next;
                3752         }
                3753         if (/^CPINFO\s+(\d+)\s+0x([0-9a-fA-f]+)\s+0x([0-9a-fA-F]+)/)
                3754         {
                3755             $width = $1;
f54c2f65b Alex*3756             $default_char = hex $2;
                3757             $default_wchar = hex $3;
97d31ec78 Alex*3758             next;
                3759         }
09d97e968 Alex*3760         if (/^(MBTABLE|GLYPHTABLE|WCTABLE|DBCSRANGE|DBCSTABLE)\s+(\d+)/)
97d31ec78 Alex*3761         {
                3762             $state = $1;
                3763             $count = $2;
                3764             next;
                3765         }
                3766         if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)/)
                3767         {
                3768             if ($state eq "MBTABLE")
                3769             {
dc727fa7b Alex*3770                 my $cp = hex $1;
                3771                 my $uni = hex $2;
97d31ec78 Alex*3772                 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
                3773                 next;
                3774             }
09d97e968 Alex*3775             if ($state eq "GLYPHTABLE")
                3776             {
                3777                 my $cp = hex $1;
                3778                 my $uni = hex $2;
                3779                 $glyph2uni[$cp] = $uni unless defined($glyph2uni[$cp]);
                3780                 next;
                3781             }
97d31ec78 Alex*3782             if ($state eq "WCTABLE")
                3783             {
dc727fa7b Alex*3784                 my $uni = hex $1;
                3785                 my $cp = hex $2;
97d31ec78 Alex*3786                 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
                3787                 next;
                3788             }
                3789             if ($state eq "DBCSRANGE")
                3790             {
dc727fa7b Alex*3791                 my $start = hex $1;
                3792                 my $end = hex $2;
f54c2f65b Alex*3793                 for (my $i = $start; $i <= $end; $i++) { add_lead_byte( $i ); }
97d31ec78 Alex*3794                 $lb_cur = $start;
                3795                 $lb_end = $end;
                3796                 next;
                3797             }
                3798             if ($state eq "DBCSTABLE")
                3799             {
dc727fa7b Alex*3800                 my $mb = hex $1;
                3801                 my $uni = hex $2;
                3802                 my $cp = ($lb_cur << 8) | $mb;
97d31ec78 Alex*3803                 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
                3804                 if (!--$count)
                3805                 {
                3806                     if (++$lb_cur > $lb_end) { $state = "DBCSRANGE"; }
                3807                 }
                3808                 next;
                3809             }
                3810         }
dc727fa7b Alex*3811         die "$filename: Unrecognized line $_\n";
97d31ec78 Alex*3812     }
65a82cb18 Alex*3813     close $INPUT;
09d97e968 Alex*3814 
5b4bdb9fd Alex*3815     output_codepage_file( $codepage );
6f6854369 Alex*3816 
                3817     if ($codepage == 949) { dump_krwansung_codepage( @uni2cp ); }
3d086ca2b Alex*3818 }
                3819 
95aeb41c8 Alex*3820 ################################################################
                3821 # align a string length
                3822 sub align_string($$)
                3823 {
                3824     my ($align, $str) = @_;
                3825     $str .= pack "C*", (0) x ($align - length($str) % $align) if length($str) % $align;
                3826     return $str;
                3827 }
                3828 
ad02ef7be Alex*3829 ################################################################
                3830 # pad a string with zeros
                3831 sub pad_string($$)
                3832 {
                3833     my ($pad, $str) = @_;
                3834     $str .= pack "C*", (0) x ($pad - length($str)) if length($str) < $pad;
                3835     return $str;
                3836 }
                3837 
95aeb41c8 Alex*3838 ################################################################
                3839 # pack a GUID string
                3840 sub pack_guid($)
                3841 {
                3842     $_ = shift;
                3843     /([0-9A-Fa-f]{8})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})/;
                3844     return pack "L<S<2C8", hex $1, hex $2, hex $3, hex $4, hex $5, hex $6, hex $7, hex $8, hex $9, hex $10, hex $11;
                3845 }
                3846 
                3847 ################################################################
                3848 # comparison function for compression sort
                3849 sub cmp_compression
                3850 {
                3851     return scalar @{$a} <=> scalar @{$b} ||
                3852         $a->[4] <=> $b->[4] ||
                3853         $a->[5] <=> $b->[5] ||
                3854         $a->[6] <=> $b->[6] ||
                3855         $a->[7] <=> $b->[7] ||
                3856         $a->[8] <=> $b->[8] ||
                3857         $a->[9] <=> $b->[9] ||
                3858         $a->[10] <=> $b->[10] ||
                3859         $a->[11] <=> $b->[11] ||
                3860         $a->[12] <=> $b->[12];
                3861 }
                3862 
                3863 ################################################################
                3864 # build a binary sort keys table
cfaa28933 Alex*3865 sub dump_sortkey_table($)
95aeb41c8 Alex*3866 {
cfaa28933 Alex*3867     my $filename = shift;
95aeb41c8 Alex*3868     my @keys;
94397d96b Alex*3869     my ($part, $section, $subsection, $guid, $version, $ling_flag);
95aeb41c8 Alex*3870     my @multiple_weights;
                3871     my @expansions;
                3872     my @compressions;
94397d96b Alex*3873     my %exceptions;
95aeb41c8 Alex*3874     my %guids;
94397d96b Alex*3875     my %compr_flags;
95aeb41c8 Alex*3876     my %locales;
                3877     my $default_guid = "00000001-57ee-1e5c-00b4-d0000bb1e11e";
                3878     my $jamostr = "";
                3879 
                3880     my $re_hex = '0x[0-9A-Fa-f]+';
                3881     my $re_key = '(\d+\s+\d+\s+\d+\s+\d+)';
                3882     $guids{$default_guid} = { };
                3883 
                3884     my %flags = ( "HAS_3_BYTE_WEIGHTS" => 0x01, "REVERSEDIACRITICS" => 0x10, "DOUBLECOMPRESSION" => 0x20, "INVERSECASING" => 0x40 );
                3885 
cfaa28933 Alex*3886     my $KEYS = open_data_file( "sorting" );
95aeb41c8 Alex*3887 
                3888     printf "Building $filename\n";
                3889 
                3890     while (<$KEYS>)
                3891     {
                3892         s/\s*;.*$//;
                3893         next if /^\s*$/;  # skip empty lines
                3894         if (/^\s*(SORTKEY|SORTTABLES)/)
                3895         {
                3896             $part = $1;
                3897             next;
                3898         }
                3899         if (/^\s*(ENDSORTKEY|ENDSORTTABLES)/)
                3900         {
                3901             $part = $section = "";
                3902             next;
                3903         }
                3904         if (/^\s*(DEFAULT|RELEASE|REVERSEDIACRITICS|DOUBLECOMPRESSION|INVERSECASING|MULTIPLEWEIGHTS|EXPANSION|COMPATIBILITY|COMPRESSION|EXCEPTION|JAMOSORT)\s+/)
                3905         {
                3906             $section = $1;
                3907             $guid = undef;
                3908             next;
                3909         }
                3910         next unless $part;
                3911         if ("$part.$section" eq "SORTKEY.DEFAULT")
                3912         {
                3913             if (/^\s*($re_hex)\s+$re_key/)
                3914             {
                3915                 $keys[hex $1] = [ split(/\s+/,$2) ];
                3916                 next;
                3917             }
                3918         }
                3919         elsif ("$part.$section" eq "SORTTABLES.RELEASE")
                3920         {
                3921             if (/^\s*NLSVERSION\s+0x([0-9A-Fa-f]+)/)
                3922             {
                3923                 $version = hex $1;
                3924                 next;
                3925             }
                3926             if (/^\s*DEFINEDVERSION\s+0x([0-9A-Fa-f]+)/)
                3927             {
                3928                 # ignore for now
                3929                 next;
                3930             }
                3931         }
                3932         elsif ("$part.$section" eq "SORTTABLES.REVERSEDIACRITICS" ||
                3933                "$part.$section" eq "SORTTABLES.DOUBLECOMPRESSION" ||
                3934                "$part.$section" eq "SORTTABLES.INVERSECASING")
                3935         {
                3936             if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)/)
                3937             {
                3938                 $guid = lc $1;
                3939                 $guids{$guid} = { } unless defined $guids{$guid};
                3940                 $guids{$guid}->{flags} |= $flags{$section};
                3941                 next;
                3942             }
                3943             if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
                3944             {
                3945                 $locales{$1} = $guid;
                3946                 next;
                3947             }
                3948         }
                3949         elsif ("$part.$section" eq "SORTTABLES.MULTIPLEWEIGHTS")
                3950         {
                3951             if (/^\s*(\d+)\s+(\d+)/)
                3952             {
                3953                 push @multiple_weights, $1, $2;
                3954                 next;
                3955             }
                3956         }
                3957         elsif ("$part.$section" eq "SORTTABLES.EXPANSION")
                3958         {
                3959             if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
                3960             {
                3961                 my $pos = scalar @expansions / 2;
                3962                 $keys[hex $1] = [ 2, 0, $pos & 0xff, $pos >> 8 ] unless defined $keys[hex $1];
                3963                 push @expansions, hex $2, hex $3;
                3964                 next;
                3965             }
                3966         }
                3967         elsif ("$part.$section" eq "SORTTABLES.COMPATIBILITY")
                3968         {
                3969             if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
                3970             {
                3971                 $keys[hex $1] = $keys[hex $2];
                3972                 next;
                3973             }
                3974         }
                3975         elsif ("$part.$section" eq "SORTTABLES.COMPRESSION")
                3976         {
                3977             if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*([A-Z0-9_]+)?/)
                3978             {
                3979                 if ($subsection || !$guid)  # start a new one
                3980                 {
                3981                     $guid = lc $1;
                3982                     $subsection = "";
                3983                     $guids{$guid} = { } unless defined $guids{$guid};
                3984                     $guids{$guid}->{flags} |= $flags{$2} if $2;
                3985                     $guids{$guid}->{compr} = @compressions;
94397d96b Alex*3986                     $exceptions{"$guid-"} = [ ] unless defined $exceptions{"$guid-"};
                3987                     $compr_flags{$guid} = [ ] unless defined $compr_flags{$guid};
95aeb41c8 Alex*3988                     push @compressions, [ ];
                3989                 }
                3990                 else  # merge with current one
                3991                 {
                3992                     $guids{lc $1} = { } unless defined $guids{lc $1};
                3993                     $guids{lc $1}->{flags} |= $flags{$2} if $2;
                3994                     $guids{lc $1}->{compr} = $guids{$guid}->{compr};
94397d96b Alex*3995                     $compr_flags{lc $1} = $compr_flags{$guid};
95aeb41c8 Alex*3996                 }
                3997                 next;
                3998             }
                3999             if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
                4000             {
                4001                 $locales{$1} = $guid;
                4002                 next;
                4003             }
                4004             if (/^\s*(TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT)/)
                4005             {
                4006                 $subsection = $1;
                4007                 next;
                4008             }
                4009             if ($subsection && /^\s*(($re_hex\s+){2,8})$re_key/)
                4010             {
94397d96b Alex*4011                 my @comp = map { hex $_; } split(/\s+/,$1);
                4012                 push @{$compressions[$#compressions]}, [ split(/\s+/,$3), @comp ];
                4013                 # add compression flags
                4014                 $compr_flags{$guid}->[$comp[0]] |= @comp >= 6 ? 0xc0 : @comp >= 4 ? 0x80 : 0x40;
95aeb41c8 Alex*4015                 next;
                4016             }
                4017         }
                4018         elsif ("$part.$section" eq "SORTTABLES.EXCEPTION")
                4019         {
                4020             if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*(LINGUISTIC_CASING)?/)
                4021             {
                4022                 $guid = lc $1;
                4023                 $guids{$guid} = { } unless defined $guids{lc $1};
94397d96b Alex*4024                 $ling_flag = ($2 ? "+" : "-");
                4025                 $exceptions{"$guid$ling_flag"} = [ ] unless defined $exceptions{"$guid$ling_flag"};
95aeb41c8 Alex*4026                 next;
                4027             }
                4028             if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
                4029             {
                4030                 $locales{$1} = $guid;
                4031                 next;
                4032             }
                4033             if (/^\s*($re_hex)\s+$re_key/)
                4034             {
94397d96b Alex*4035                 $exceptions{"$guid$ling_flag"}->[hex $1] = [ split(/\s+/,$2) ];
95aeb41c8 Alex*4036                 next;
                4037             }
                4038         }
                4039         elsif ("$part.$section" eq "SORTTABLES.JAMOSORT")
                4040         {
                4041             if (/^\s*$re_hex\s+(($re_hex\s*){5})/)
                4042             {
                4043                 $jamostr .= pack "C8", map { hex $_; } split /\s+/, $1;
                4044                 next;
                4045             }
                4046         }
cfaa28933 Alex*4047         die "$current_data_file: $part.$section: unrecognized line $_\n";
95aeb41c8 Alex*4048     }
                4049     close $KEYS;
                4050 
                4051     # Sortkey table
                4052 
                4053     my $table;
                4054     for (my $i = 0; $i < 0x10000; $i++)
                4055     {
                4056         my @k = defined $keys[$i] ? @{$keys[$i]} : (0) x 4;
                4057         $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
                4058     }
                4059 
94397d96b Alex*4060     foreach my $id (sort keys %exceptions)
95aeb41c8 Alex*4061     {
                4062         my $pos = length($table) / 4;
94397d96b Alex*4063         my @exc = @{$exceptions{$id}};
95aeb41c8 Alex*4064         my @filled;
94397d96b Alex*4065         my $key = (substr( $id, -1 ) eq "+" ? "ling_except" : "except");
                4066         my $guid = substr( $id, 0, -1 );
                4067         $guids{$guid}->{$key} = $pos;
95aeb41c8 Alex*4068         $pos += 0x100;
94397d96b Alex*4069         my @flags = @{$compr_flags{$guid}} if defined $compr_flags{$guid};
95aeb41c8 Alex*4070         for (my $j = 0; $j < 0x10000; $j++)
                4071         {
94397d96b Alex*4072             next unless defined $exc[$j] || defined $flags[$j];
95aeb41c8 Alex*4073             $filled[$j >> 8] = 1;
                4074             $j |= 0xff;
                4075         }
                4076         for (my $j = 0; $j < 0x100; $j++)
                4077         {
                4078             $table .= pack "L<", $filled[$j] ? $pos : $j * 0x100;
                4079             $pos += 0x100 if $filled[$j];
                4080         }
                4081         for (my $j = 0; $j < 0x10000; $j++)
                4082         {
                4083             next unless $filled[$j >> 8];
                4084             my @k = defined $exc[$j] ? @{$exc[$j]} : defined $keys[$j] ? @{$keys[$j]} : (0) x 4;
94397d96b Alex*4085             $k[3] |= $flags[$j] || 0;
95aeb41c8 Alex*4086             $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
                4087         }
                4088     }
                4089 
                4090     # Case mapping tables
                4091 
                4092     # standard table
                4093     my @casemaps;
                4094     my @upper = @toupper_table;
                4095     my @lower = @tolower_table;
                4096     remove_linguistic_mappings( \@upper, \@lower );
                4097     $casemaps[0] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
                4098 
                4099     # linguistic table
                4100     $casemaps[1] = pack( "S<*", 1) . dump_binary_case_table( @toupper_table ) . dump_binary_case_table( @tolower_table );
                4101 
                4102     # Turkish table
                4103     @upper = @toupper_table;
                4104     @lower = @tolower_table;
                4105     $upper[ord 'i'] = 0x130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
                4106     $lower[ord 'I'] = 0x131; # LATIN SMALL LETTER DOTLESS I
                4107     $casemaps[2] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
                4108     my $casemaps = align_string( 8, $casemaps[0] . $casemaps[1] . $casemaps[2] );
                4109 
                4110     # Char type table
                4111 
                4112     my @table;
                4113     my $types = "";
                4114     my %typestr;
                4115     for (my $i = 0; $i < 0x10000; $i++)
                4116     {
                4117         my $str = pack "S<3",
                4118             ($category_table[$i] || 0) & 0xffff,
                4119             defined($direction_table[$i]) ? $c2_types{$direction_table[$i]} : 0,
                4120             ($category_table[$i] || 0) >> 16;
                4121 
                4122         if (!defined($typestr{$str}))
                4123         {
                4124             $typestr{$str} = length($types) / 6;
                4125             $types .= $str;
                4126         }
                4127         $table[$i] = $typestr{$str};
                4128     }
                4129 
b956620d8 Alex*4130     my (@rows, @array, @data, @row_data);
                4131     (@rows[0..4095], @data) = compress_array( 4096, 0, @table[0..65535] );
                4132     (@array[0..255], @row_data) = compress_array( 256, 0, @rows );
95aeb41c8 Alex*4133     for (my $i = 0; $i < 256; $i++) { $array[$i] *= 2; }  # we need byte offsets
b956620d8 Alex*4134     for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += 2 * @row_data + 512 - 4096; }
95aeb41c8 Alex*4135 
b956620d8 Alex*4136     my $arraystr = pack("S<*", @array, @row_data) . pack("C*", @data);
95aeb41c8 Alex*4137     my $chartypes = pack "S<2", 4 + length($types) + length($arraystr), 2 + length($types);
                4138     $chartypes = align_string( 8, $chartypes . $types . $arraystr );
                4139 
                4140     # Sort tables
                4141 
                4142     # guids
                4143     my $sorttables = pack "L<2", $version, scalar %guids;
                4144     foreach my $id (sort keys %guids)
                4145     {
                4146         my %guid = %{$guids{$id}};
                4147         my $flags = $guid{flags} || 0;
                4148         my $map = length($casemaps[0]) + (defined $guid{ling_except} ? length($casemaps[1]) : 0);
                4149         $sorttables .= pack_guid($id) . pack "L<5",
                4150             $flags,
                4151             defined($guid{compr}) ? $guid{compr} : 0xffffffff,
                4152             $guid{except} || 0,
                4153             $guid{ling_except} || 0,
                4154             $map / 2;
                4155     }
                4156 
                4157     # expansions
                4158     $sorttables .= pack "L<S<*", scalar @expansions / 2, @expansions;
                4159 
                4160     # compressions
                4161     $sorttables .= pack "L<", scalar @compressions;
                4162     my $rowstr = "";
                4163     foreach my $c (@compressions)
                4164     {
                4165         my $pos = length($rowstr) / 2;
                4166         my $min = 0xffff;
                4167         my $max = 0;
                4168         my @lengths = (0) x 8;
                4169         foreach my $r (sort cmp_compression @{$c})
                4170         {
                4171             my @row = @{$r};
                4172             $lengths[scalar @row - 6]++;
                4173             foreach my $val (@row[4..$#row])
                4174             {
                4175                 $min = $val if $min > $val;
                4176                 $max = $val if $max < $val;
                4177             }
                4178             $rowstr .= align_string( 4, pack "S<*", @row[4..$#row] );
                4179             $rowstr .= pack "C4", $row[1], $row[0], $row[2], $row[3];
                4180         }
                4181         $sorttables .= pack "L<S<10", $pos, $min, $max, @lengths;
                4182     }
                4183     $sorttables .= $rowstr;
                4184 
                4185     # multiple weights
                4186     $sorttables .= align_string( 4, pack "L<C*", scalar @multiple_weights / 2, @multiple_weights );
                4187 
                4188     # jamo sort
                4189     $sorttables .= pack("L<", length($jamostr) / 8) . $jamostr;
                4190 
                4191     # Locales
                4192 
630f605c2 Alex*4193     add_registry_key( $nlskey, "Sorting\\Ids", "{$default_guid}" );
95aeb41c8 Alex*4194     foreach my $loc (sort keys %locales)
                4195     {
                4196         # skip specific locales that match more general ones
                4197         my @parts = split /[-_]/, $loc;
                4198         next if @parts > 1 && defined($locales{$parts[0]}) && $locales{$parts[0]} eq $locales{$loc};
                4199         next if @parts > 2 && defined($locales{"$parts[0]-$parts[1]"}) && $locales{"$parts[0]-$parts[1]"} eq $locales{$loc};
630f605c2 Alex*4200         add_registry_string_value( $nlskey, "Sorting\\Ids", $loc, "\{$locales{$loc}\}" );
95aeb41c8 Alex*4201     }
                4202 
                4203     # File header
                4204 
                4205     my @header;
                4206     $header[0] = 16;
                4207     $header[1] = $header[0] + length $table;
                4208     $header[2] = $header[1] + length $casemaps;
                4209     $header[3] = $header[2] + length $chartypes;
                4210 
                4211     open OUTPUT, ">$filename.new" or die "Cannot create $filename";
                4212     print OUTPUT pack "L<*", @header;
                4213     print OUTPUT $table, $casemaps, $chartypes, $sorttables;
                4214     close OUTPUT;
                4215     save_file($filename);
8cdb593f5 Alex*4216     return $chartypes;
                4217 }
                4218 
                4219 
                4220 my %lcnames;
                4221 
                4222 sub locale_parent($)
                4223 {
                4224     my $loc = shift;
                4225 
                4226     return undef unless $loc;
                4227     return $lcnames{$loc}->{sparent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{sparent};
                4228     return $lcnames{$loc}->{parent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{parent};
                4229     if ($loc =~ /(.*)-[0-9A-Za-z]+/) { return $1; }
                4230     return "";
                4231 }
                4232 
                4233 sub compare_locales
                4234 {
                4235     (my $n1 = $a) =~ tr/A-Z_/a-z-/;
                4236     (my $n2 = $b) =~ tr/A-Z_/a-z-/;
                4237     return $n1 cmp $n2;
                4238 }
                4239 
                4240 # query an xml key
                4241 sub xml_query($$)
                4242 {
                4243     my ($xml, $query) = @_;
                4244     my $ret = $xml->find( $query );
                4245     return undef unless $ret;
                4246     printf STDERR "multiple entries for %s\n", $query if (@{$ret} > 1);
                4247     return @{$ret}[0]->textContent;
                4248 }
                4249 
                4250 # query an xml key for a locale, with fallback to the parents
                4251 sub loc_query($$)
                4252 {
                4253     my ($loc, $query) = @_;
                4254 
                4255     $loc = $lcnames{"en-US"} unless $loc->{name};  # fallback to "en-US" for root locale
                4256 
                4257     for (my $cur = $loc->{name}; defined $cur; $cur = locale_parent( $cur ))
                4258     {
                4259         next unless defined $lcnames{$cur};
                4260         my $xml = $lcnames{$cur}->{xml};
                4261         my $ret = $xml->find( $query );
                4262         next unless $ret;
                4263         printf STDERR "%s: multiple entries for %s\n", $cur, $query if (@{$ret} > 1);
                4264         next if @{$ret}[0]->textContent eq "\x{2191}\x{2191}\x{2191}"; 

** Warning **

Wide character in print at /home/winehq/opt/source/source line 975, <$git> line 4270.

# "↑↑↑" 4265 return @{$ret}[0]->textContent; 4266 } 4267 return undef; 4268 } 4269 4270 # retrieve a locale field entry by going up the parents tree 4271 sub locale_entry($$$) 4272 { 4273 my ($loc, $field, $def) = @_; 4274 4275 return $loc->{$field} if defined $loc->{$field}; 4276 4277 unless ($loc->{name}) # fallback to "en-US" for root locale 4278 { 4279 $loc = $lcnames{"en-US"}; 4280 return $loc->{$field} if defined $loc->{$field}; 4281 } 4282 while (defined $loc->{alias}) # resolve aliases 4283 { 4284 $loc = $lcnames{$loc->{alias}}; 4285 return $loc->{$field} if defined $loc->{$field}; 4286 } 4287 my $cur = $loc->{name}; 4288 while ($cur) 4289 { 4290 if (defined $lcnames{$cur} && defined $lcnames{$cur}->{sparent}) 4291 { 4292 $cur = $lcnames{$cur}->{sparent}; 4293 } 4294 elsif ($cur =~ /(.*)-[0-9A-Za-z]+/) 4295 { 4296 $cur = $1; 4297 } 4298 else 4299 { 4300 return $def; 4301 } 4302 return $lcnames{$cur}->{$field} if defined $lcnames{$cur} && defined $lcnames{$cur}->{$field}; 4303 } 4304 return $def; 4305 } 4306 4307 my $string_data; 4308 4309 sub add_str_data($) 4310 { 4311 my $txt = shift; 4312 my $ret = index( $string_data, $txt ); 4313 if ($ret == -1) 4314 { 4315 $ret = length($string_data); 4316 $string_data .= $txt 4317 } 4318 return $ret / 2; 4319 } 4320 4321 sub add_string($) 4322 { 4323 my $str = shift; 4324 return 0 unless defined($str) && $str ne ""; 4325 my $utf = encode( "UTF16LE", $str ); 4326 return add_str_data( (pack "S<", length($utf) / 2) . $utf . (pack "S", 0) ); 4327 } 4328 4329 sub add_fontsig(@) 4330 { 4331 return add_str_data( pack "S<L<*", scalar(@_) * 2, @_ ); 4332 } 4333 4334 sub add_strarray(@) 4335 { 4336 return 0 unless @_; 4337 return add_str_data( pack "S<L<*", scalar @_, map { add_string($_) } @_); 4338 } 4339 4340 sub format_to_grouping($) 4341 { 4342 my $format = shift; 4343 if ($format =~ /#,(#+),(#+0)/) { return chr(length($2)) . chr(length($1)); } 4344 if ($format =~ /#,(#+0)/) { return chr(length($1)); } 4345 # printf STDERR "unknown format %s\n", $format; 4346 return chr(3); 4347 } 4348 4349 sub parse_currency_format($$) 4350 { 4351 my $name = shift; 4352 my ($posfmt, $negfmt) = split /;/, shift; 4353 my @pospatterns = ( "\xa4[^\xa0]*#", # $1.1 4354 "00[^\xa0]*\xa4", # 1.1$ 4355 "\xa4.*\xa0.*#", # $ 1.1 4356 "00.*\xa0.*\xa4" ); # 1.1 $ 4357 my @negpatterns = ( "\\(\xa4[^\xa0]*#", # ($1.1) 4358 "-\xa4[^\xa0]*#", # -$1.1 4359 "\xa4[^\xa0]*-#", # $-1.1 4360 "\xa4[^\xa0]*#.*00-", # $1.1- 4361 "00[^\xa0]*\xa4\\)", # (1.1$) 4362 "-#.*00[^\xa0]*\xa4", # -1.1$ 4363 "00-[^\xa0]*\xa4", # 1.1-$ 4364 "00[^\xa0]*\xa4-", # 1.1$- 4365 "-#.*00.*\xa0.*\xa4", # -1.1 $ 4366 "-\xa4.*\xa0.*#", # -$ 1.1 4367 "00.*\xa0.*\xa4-", # 1.1 $- 4368 "\xa4.*\xa0.*#.*00-", # $ 1.1- 4369 "\xa4.*\xa0.*-#", # $ -1.1 4370 "00-.*\xa0.*\xa4", # 1.1- $ 4371 "\\(\xa4.*\xa0.*#", # ($ 1.1) 4372 "00.*\xa0.*\xa4\\)"); # (1.1 $) 4373 my ($pos, $neg); 4374 4375 for ($pos = 0; $pos < @pospatterns; $pos++) 4376 { 4377 last if ($posfmt =~ /$pospatterns[$pos]/); 4378 } 4379 #printf STDERR "$name: unknown format '%s'\n", $posfmt if ($pos == @pospatterns); 4380 $pos = 0 if ($pos == @pospatterns); 4381 4382 if (defined $negfmt) 4383 { 4384 for ($neg = 0; $neg < @negpatterns; $neg++) 4385 { 4386 last if ($negfmt =~ /$negpatterns[$neg]/); 4387 } 4388 #printf STDERR "$name: unknown format '%s'\n", $negfmt if ($neg == @negpatterns); 4389 $neg = 0 if ($neg == @negpatterns); 4390 } 4391 elsif ($pos == 0) { $neg = 1; } 4392 elsif ($pos == 1) { $neg = 5; } 4393 elsif ($pos == 2) { $neg = 9; } 4394 elsif ($pos == 3) { $neg = 8; } 4395 4396 return ($pos, $neg); 4397 } 4398 4399 sub parse_percent_format($) 4400 { 4401 my $fmt = shift; 4402 my @patterns = ( "0.+%", # 1 % 4403 "0%", # 1% 4404 "%#", # %1 4405 "%.+#" ); # % 1 4406 my $pos; 4407 for ($pos = 0; $pos < @patterns; $pos++) 4408 { 4409 last if ($fmt =~ /$patterns[$pos]/); 4410 } 4411 printf STDERR "unknown format '%s'\n", $fmt if ($pos == @patterns); 4412 return ($pos, ($pos == 3) ? 7 : $pos); 4413 } 4414 4415 sub convert_date_format($) 4416 { 4417 my $fmt = shift; 65fc470df Alex*4418 $fmt =~ s/G+/gg/; 8cdb593f5 Alex*4419 $fmt =~ s/LLLL/MMMM/; 4420 $fmt =~ s/LLL/MMM/; 4421 $fmt =~ s/E+/dddd/; 4422 $fmt =~ s/ccc+/dddd/; 65fc470df Alex*4423 $fmt =~ s/([^gy])y([^y])/$1yyyy$2/; 8cdb593f5 Alex*4424 $fmt =~ s/^y([^y])/yyyy$1/; 65fc470df Alex*4425 $fmt =~ s/([^gy])y$/$1yyyy/; 8cdb593f5 Alex*4426 return $fmt; 4427 } 4428 4429 sub convert_time_format($) 4430 { 4431 my $fmt = shift; 4432 $fmt =~ s/a+/tt/; 4433 $fmt =~ s/B+/tt/; 198de0dcb Alex*4434 $fmt =~ s/\x{202f}/ /; 8cdb593f5 Alex*4435 return $fmt; 4436 } 4437 4438 sub load_iso639() 4439 { 4440 my %iso639; 50c5eb31c Alex*4441 my $DATA = open_data_file( "iso639", "iso-639-3_Code_Tables_$ISO639VERSION/iso-639-3_$ISO639VERSION.tab" ); 8cdb593f5 Alex*4442 while (<$DATA>) 4443 { 4444 if (/^\s*[a-z]{3}\s+[a-z]{3}\s+([a-z]{3})\s+([a-z]{2})\s/) { $iso639{$2} = $1; } 4445 } 4446 close $DATA; 4447 return %iso639; 4448 } 4449 4450 4451 ################################################################ 4452 # build the locale table for locale.nls 4453 sub build_locale_data() 4454 { 4455 my $base = "cldr-release-$CLDRVERSION"; cfaa28933 Alex*4456 my $suppl = load_xml_data_file( "cldr", "$base/common/supplemental/supplementalData.xml" ); 4457 my $subtags = load_xml_data_file( "cldr", "$base/common/supplemental/likelySubtags.xml" ); 4458 my $numbers = load_xml_data_file( "cldr", "$base/common/supplemental/numberingSystems.xml" ); 8cdb593f5 Alex*4459 # obsolete phone data from CLDR version 33 cfaa28933 Alex*4460 my $phone = load_xml_data_file( "cldr33", "common/supplemental/telephoneCodeData.xml" ); 8cdb593f5 Alex*4461 my %iso639 = load_iso639(); 4462 $string_data = pack "S2", 0, 0; # offset 0 == empty string 4463 4464 %lcnames = map { $_->{name} => $_ } @locales; 4465 4466 my %lcids; 4467 foreach my $loc (@locales) { $lcids{$loc->{lcid}} = $loc if defined $loc->{lcid}; } 4468 4469 my %days = ( "sun" => 0, "mon" => 1, "tue" => 2, "wed" => 3, "thu" => 4, "fri" => 5, "sat" => 6 ); 4470 4471 # assign locale parents 4472 4473 foreach my $loc (@locales) 4474 { 4475 next if $loc->{name} eq ""; 4476 next if defined $loc->{parent}; 4477 (my $unix_name = $loc->{name}) =~ s/-/_/g; 50c5eb31c Alex*4478 my $parent = xml_query( $suppl, "/supplementalData/parentLocales[not(\@component)]/parentLocale[contains(concat(' ',\@locales,' '),' $unix_name ')]/\@parent" ); 8cdb593f5 Alex*4479 if ($parent) 4480 { 4481 $parent =~ s/_/-/g; 4482 $parent = "" if $parent eq "root"; 4483 } 4484 elsif ($loc->{name} =~ /(.*)-[0-9A-Za-z]+/) { $parent = $1; } 4485 $loc->{parent} = $parent || ""; 4486 } 4487 4488 # load per-locale XML files 4489 4490 foreach my $loc (@locales) 4491 { 4492 next if defined $loc->{alias}; 4493 (my $file = $loc->{file} || $loc->{name}) =~ s/-/_/g; 4494 $file = "$base/" . ($loc->{dir} || "common") . "/main/$file.xml"; cfaa28933 Alex*4495 my $xml = load_xml_data_file( "cldr", $file ); 8cdb593f5 Alex*4496 $loc->{xml} = $xml; 4497 $loc->{language} ||= xml_query( $xml, "/ldml/identity/language/\@type" ); 4498 $loc->{territory} ||= xml_query( $xml, "/ldml/identity/territory/\@type" ); 4499 $loc->{script} = xml_query( $xml, "/ldml/identity/script/\@type" ); 4500 if (!defined($loc->{territory}) && $loc->{name} =~ /-([A-Z]{2}|[0-9]{3})$/) { $loc->{territory} = $1; } 4501 if (!defined($loc->{script}) && $loc->{name} =~ /-([A-Z][a-z]{3})(-[A-Z]{2})?$/) { $loc->{script} = $1; } 4502 } 4503 4504 # assign a default territory and sort locale 4505 4506 foreach my $loc (@locales) 4507 { 4508 next if defined $loc->{alias}; 4509 next if defined $loc->{territory}; 4510 my $id = $loc->{sortlocale}; 4511 if (defined $id && ($id =~ /[-_]([A-Z0-9]+)$/)) 4512 { 4513 $loc->{territory} = $1; 4514 next; 4515 } 4516 my @children = grep /^$loc->{name}-[A-Z0-9]+$/ && !defined $lcnames{$_}->{alias}, keys %lcnames; 4517 if (@children == 1) 4518 { 4519 $id = $children[0]; 4520 } 4521 else 4522 { 4523 my $name = $loc->{file} || $loc->{name}; b591b3167 Alex*4524 $name =~ s/-(Arab|Beng|Cyrl|Deva|Guru|Hans|Hant|Latn|Tfng|Vaii)$//; 8cdb593f5 Alex*4525 $name =~ s/-/_/g; 4526 $id = xml_query( $subtags, "/supplementalData/likelySubtags/likelySubtag[\@from='$name']/\@to" ); 4527 $id =~ s/_/-/g if $id; 4528 } 4529 if ($id =~ /[-_]([A-Z0-9]+)$/) 4530 { 4531 $loc->{territory} = $1; 4532 next if defined $loc->{sortlocale}; 4533 next unless $id =~ /^$loc->{name}/; 4534 while (defined $lcnames{$id} && defined $lcnames{$id}->{alias}) { $id = $lcnames{$id}->{alias}; } 4535 $loc->{sortlocale} = $id if defined $lcnames{$id}; 4536 next; 4537 } 4538 print STDERR "no territory found for $loc->{name}\n"; 4539 } 4540 ad02ef7be Alex*4541 # fill geoid table 4542 4543 my %geotable; 4544 foreach my $geo (@geoids) 4545 { 4546 my $name = $geo->{name}; 4547 next unless defined $name; 4548 $geo->{alias} = $geotable{$name} if defined $geotable{$name}; 4549 $geotable{$name} ||= $geo; 4550 } 4551 foreach my $loc (@locales) 4552 { 4553 next if defined $loc->{alias}; 4554 my $territory = $loc->{territory}; 4555 $geotable{$territory} ||= { name => $territory }; 4556 } 4557 foreach my $name (keys %geotable) 4558 { 4559 my $geo = $geotable{$name}; 4560 $geo->{dialcode} = xml_query( $phone, "(/supplementalData/telephoneCodeData/codesByTerritory[\@territory='$name']/telephoneCountryCode)[1]/\@code" ); 4561 if ($name =~ /\d+/) 4562 { 4563 $geo->{uncode} = $name; 4564 next; 4565 } 4566 $geo->{iso2} = $name; 4567 $geo->{iso3} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@alpha3"); 4568 $geo->{uncode} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@numeric"); 4569 $geo->{sintlsymbol} ||= xml_query( $suppl, "(/supplementalData/currencyData/region[\@iso3166='$name']/currency[not(\@to)])[1]/\@iso4217") || "XXX"; 4570 $geo->{sintlsymbol} =~ s/XXX/XDR/; 4571 } 4572 foreach my $geo (@geoids) 4573 { 4574 $geo->{parentid} = $geotable{$geo->{parent}}->{id} if defined $geo->{parent}; 4575 next if defined $geo->{iso2}; 4576 next if defined $geo->{alias}; 4577 next unless defined $geo->{uncode}; 4578 my @contains; 4579 my $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and not(\@status)]/\@contains"); 4580 push @contains, split /\s+/, $list if defined $list; 4581 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and \@status='deprecated']/\@contains"); 4582 push @contains, split /\s+/, $list if defined $list; 4583 while (@contains) 4584 { 4585 my $territory = pop @contains; 4586 if (defined $geotable{$territory}) 4587 { 4588 $geotable{$territory}->{parentid} ||= $geo->{id}; 4589 } 4590 elsif ($territory =~ /\d+/) 4591 { 4592 # expand region recursively 4593 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$territory' and not(\@status)]/\@contains" ); 4594 push @contains, split /\s+/, $list if defined $list; 4595 } 4596 } 4597 } 4598 65fc470df Alex*4599 # assign calendars to their locale 4600 4601 foreach my $cal (@calendars) 4602 { 4603 next unless defined $cal->{locale}; 4604 my $loc = $lcnames{$cal->{locale}}; 4605 $loc->{calendar} = [ ] unless defined $loc->{calendar}; 4606 push @{$loc->{calendar}}, $cal; 4607 } 4608 8cdb593f5 Alex*4609 # assign default lcid to aliases 4610 4611 foreach my $loc (@locales) 4612 { 4613 next unless defined $loc->{alias}; 4614 next if defined $loc->{lcid}; 4615 my $alias = $loc->{alias}; 4616 my $lcid = $lcnames{$alias}->{lcid} || 0x1000; 4617 $loc->{lcid} = $lcid | 0x80000000; 4618 } 4619 4620 # assign sort aliases to parent locale 4621 4622 foreach my $loc (@locales) 4623 { 4624 next unless $loc->{name} =~ /_/; 4625 next unless defined $loc->{alias}; 4626 my $alias = $loc->{alias}; 4627 my $parent = $lcnames{$alias}; 4628 my $basename = $parent->{name}; 4629 while (1) 4630 { 4631 @{$parent->{sortnames}}[($loc->{lcid} >> 16) - 1] = $loc->{name}; 4632 $alias = locale_parent( $alias ); 4633 last unless $alias && defined $lcnames{$alias}; 4634 $parent = $lcnames{$alias}; 4635 last if defined $parent->{sortbase} && $parent->{sortbase} ne $basename; 4636 $parent->{sortbase} = $basename; 4637 } 4638 } 4639 4640 # assign an array index to all locales 4641 4642 my $idx = 0; 4643 foreach my $loc (@locales) 4644 { 4645 next if defined $loc->{alias}; 4646 $loc->{idx} = $idx++; 4647 } 4648 foreach my $loc (@locales) 4649 { 4650 my $alias = $loc->{alias}; 4651 next unless defined $alias; 4652 while (defined $lcnames{$alias}->{alias}) { $alias = $lcnames{$alias}->{alias}; } 4653 $loc->{idx} = $lcnames{$alias}->{idx}; 4654 } 4655 4656 # output lcids table 4657 4658 my $lcid_data = ""; 4659 foreach my $id (sort { $a <=> $b } keys %lcids) 4660 { 4661 my $loc = $lcids{$id}; 4662 $lcid_data .= pack "L<S<2", $id, $loc->{idx}, add_string($loc->{name}); 4663 } 4664 4665 # output lcnames table 4666 4667 my $lcname_data = ""; 4668 foreach my $name (sort compare_locales keys %lcnames) 4669 { 4670 my $loc = $lcnames{$name}; 4671 $lcname_data .= pack "S<2L<", add_string($name), $loc->{idx}, $loc->{lcid} || 0x1000; 4672 } 4673 4674 # output locales array 4675 4676 my $locale_data = ""; 4677 my $default_lcid = 0x8001; 4678 foreach my $loc (@locales) 4679 { 4680 next if defined $loc->{alias}; 4681 my $sname = $loc->{name}; 4682 my $language = $loc->{language}; 4683 my $territory = $loc->{territory}; 4684 my $script = $loc->{script}; 4685 my $neutral = ($sname && $sname !~ /-$territory/); 4686 my $sparent = $loc->{sparent} || (($sname =~ /(.*)-[0-9A-Za-z]+/) ? $1 : $loc->{parent}); 4687 my $unique_lcid = $loc->{lcid}; 4688 unless (defined $unique_lcid) { $unique_lcid = $default_lcid++; } ad02ef7be Alex*4689 my $geo = $geotable{$territory}; 8cdb593f5 Alex*4690 my $territory_match = "contains(concat(' ',normalize-space(\@territories),' '),' $territory ')"; 4691 4692 # languages and scripts 4693 4694 my $ssortlocale = $loc->{sortlocale} || ($neutral ? "$sname-$territory" : $sname); 4695 my $idefaultlanguage = defined $lcnames{$ssortlocale} ? $lcnames{$ssortlocale}->{lcid} : undef; 4696 $idefaultlanguage = $lcnames{"en-US"}->{lcid} unless $ssortlocale; 4697 (my $siso639langname = $sname) =~ s/-.*$//; 4698 my $siso639langname2 = $iso639{$siso639langname} || $siso639langname; 4699 my $sopentypelang = sprintf "%-4s", locale_entry( $loc, "sopentypelang", uc $siso639langname2 ); 4700 my $sabbrevlangname = defined $loc->{lcid} ? locale_entry( $loc, "sabbrevlangname", uc $siso639langname2 ) : "ZZZ"; ad02ef7be Alex*4701 my $siso3166ctryname2 = $geo->{iso3} || $geo->{uncode}; 8cdb593f5 Alex*4702 my $senglanguage = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" ) || ""; 4703 my $sengcountry = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" ) || ""; 4704 my $snativelangname = loc_query( $loc, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" ); 4705 my $snativectryname = loc_query( $loc, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" ); 4706 $sengcountry =~ s/South Korea/Korea/; 8b442c29a Piot*4707 $sengcountry =~ s/T\xfcrkiye/Turkey/; 8cdb593f5 Alex*4708 $snativelangname ||= $senglanguage; 4709 $snativectryname ||= $sengcountry; 4710 if ($script) 4711 { 4712 my $engscript = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" ); 4713 my $nativescript = loc_query( $loc, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" ); 4714 $senglanguage .= " ($engscript)" if $engscript; 4715 $snativelangname .= " ($nativescript)" if $nativescript; 4716 } 4717 my $sengdisplayname = $neutral ? $senglanguage : "$senglanguage ($sengcountry)"; 4718 my $snativedisplayname = $neutral ? $snativelangname : "$snativelangname ($snativectryname)"; 4719 $sengdisplayname =~ s/\) \(/, /; 4720 $snativedisplayname =~ s/\) \(/, /; 4721 my $sscripts = locale_entry( $loc, "sscripts", $script ) || xml_query( $suppl, "/supplementalData/languageData/language[\@type='$language' and not(\@alt)]/\@scripts" ); 4722 $sscripts = (join ";", (sort split / /, ($sscripts || "Latn"))) . ";"; 4723 my $ireadinglayout = locale_entry( $loc, "ireadinglayout", 0 ); 4724 my $charlayout = loc_query( $loc, "/ldml/layout/orientation/characterOrder" ); 4725 if ($charlayout eq "right-to-left") 4726 { 4727 $ireadinglayout = 1; 4728 } 4729 elsif ($charlayout eq "top-to-bottom") 4730 { 4731 my $linelayout = loc_query( $loc, "/ldml/layout/orientation/lineOrder" ); 4732 $ireadinglayout = $linelayout eq "right-to-left" ? 2 : 3; 4733 } ad02ef7be Alex*4734 my $igeoid = $geo->{id} || 0; 8cdb593f5 Alex*4735 4736 # numbers 4737 4738 my $sdecimal = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/decimal" ); 4739 my $slist = locale_entry( $loc, "slist", ";" ); 4740 my $smondecimalsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyDecimal" ) || $sdecimal; 4741 my $sthousand = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/group" ); 4742 $sthousand =~ s/\x{202f}/\x{00a0}/; 4743 my $smonthousandsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyGroup" ) || $sthousand; 4744 my $spositivesign = ""; 1ad2cb51c Niko*4745 my $snegativesign = "-"; 8cdb593f5 Alex*4746 my $spercent = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/percentSign" ); 4747 my $snan = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/nan" ); 4748 my $sposinfinity = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/infinity" ); 4749 my $sneginfinity = $sposinfinity ? "-$sposinfinity" : ""; 4750 my $sgrouping = format_to_grouping( loc_query( $loc, "/ldml/numbers/decimalFormats[\@numberSystem='latn']/decimalFormatLength[not(\@type)]/decimalFormat/pattern" )); 4751 my $percentformat = loc_query( $loc, "/ldml/numbers/percentFormats[\@numberSystem='latn']/percentFormatLength[not(\@type)]/percentFormat/pattern" ); 198de0dcb Alex*4752 my $currencyformat = loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='accounting']/pattern[not(\@alt)]" ) || 4753 loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='standard']/pattern[not(\@alt)]" ); 8cdb593f5 Alex*4754 my $smongrouping = format_to_grouping( $currencyformat ); 4755 my ($icurrency, $inegcurr) = parse_currency_format( $sname, $currencyformat ); 4756 my ($ipospercent, $inegpercent) = parse_percent_format( $percentformat ); 4757 my $native_numbering = loc_query( $loc, "/ldml/numbers/otherNumberingSystems/native" ); 4172c0482 Alex*4758 my @snativedigits = split //, (locale_entry( $loc, "nativedigits", "" ) || xml_query( $numbers, "/supplementalData/numberingSystems/numberingSystem[\@id='$native_numbering']/\@digits" )); 8cdb593f5 Alex*4759 my $digitsubstitution = !(ord($snativedigits[0]) >= 0x600 && ord($snativedigits[0]) <= 0x6ff); 4760 my $measure = defined xml_query( $suppl, "/supplementalData/measurementData/measurementSystem[\@type='US' and $territory_match]" ); 4761 my $papersize = defined xml_query( $suppl, "/supplementalData/measurementData/paperSize[\@type='US-Letter' and $territory_match]" ); 4762 4763 # currencies 4764 ad02ef7be Alex*4765 my $sintlsymbol = $geo->{sintlsymbol} || "XDR"; 4766 my $scurrency = $geo->{scurrency} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[\@alt='narrow']" ); 8cdb593f5 Alex*4767 $scurrency ||= loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[not(\@alt)]" ); 6d046dd98 Alex*4768 $scurrency ||= $geo->{sintlsymbol}; ad02ef7be Alex*4769 $geo->{scurrency} = $scurrency if $scurrency; 8cdb593f5 Alex*4770 my $sengcurrname = $loc->{sengcurrname} || loc_query( $lcnames{en}, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" ); 4771 my $snativecurrname = $loc->{sengcurrname} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" ) || $sengcurrname; 4772 my $icurrdigits = xml_query( $suppl, "/supplementalData/currencyData/fractions/info[\@iso4217='$sintlsymbol']/\@digits" ); 4773 $icurrdigits = 2 unless defined $icurrdigits; 4774 4775 # calendars 4776 4777 my $firstday = xml_query( $suppl, "/supplementalData/weekData/firstDay[not(\@alt) and $territory_match]/\@day" ); 4778 my $ifirstdayofweek = $firstday ? $days{$firstday} : 1; 4779 my $firstweekofyear = (xml_query( $suppl, "/supplementalData/weekData/minDays[$territory_match]/\@count" ) || 0) == 4 ? 2 : 0; 4780 my $serastring = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/eras/eraAbbr/era[\@type='1' and not(\@alt)]" ); 4781 my (@sdayname, @sabbrevdayname, @sshortestdayname); 4782 foreach my $d (sort { $days{$a} <=> $days{$b} } keys %days) 4783 { 4784 my $n = $days{$d}; 4785 my %name; 4786 foreach my $type (qw(wide abbreviated short)) 4787 { 4788 $name{$type} = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/days/dayContext[\@type='format']/dayWidth[\@type='$type']/day[\@type='$d' and not(\@alt)]" ); 4789 } 4790 push @sdayname, $name{wide}; 4791 push @sabbrevdayname, $name{abbreviated} || $name{wide}; 4792 push @sshortestdayname, $name{short} || $name{abbreviated} || $name{wide}; 4793 } 4794 my (@smonthname, @sabbrevmonthname, @sgenitivemonth, @sabbrevgenitivemonth); 4795 foreach my $n (1..13) 4796 { 4797 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='wide']/month[\@type='$n']" ); 4798 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='abbreviated']/month[\@type='$n']" ); 4799 my $genitive = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n']" ); 4800 my $abbrevgen = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n']" ); 4801 push @smonthname, $name || $genitive || ""; 4802 push @sabbrevmonthname, $abbrev || $abbrevgen || $name || $genitive || ""; 4803 push @sgenitivemonth, $genitive || ""; 4804 push @sabbrevgenitivemonth, $abbrevgen || $genitive || ""; 4805 } 4806 @sgenitivemonth = () if join("|",@smonthname) eq join("|",@sgenitivemonth); 4807 @sabbrevgenitivemonth = () if join("|",@sabbrevmonthname) eq join("|",@sabbrevgenitivemonth); 4808 my %caltypes = ( "gregorian" => 1, "japanese" => 3, "chinese" => 4, "dangi" => 5, "islamic" => 6, "buddhist" => 7, "hebrew" => 8, 4809 "persian" => 22, "islamic-civil" => 23, "islamic-umalqura" => 23 ); 4810 my $calpref = xml_query( $suppl, "/supplementalData/calendarPreferenceData/calendarPreference[$territory_match]/\@ordering" ) || "gregorian"; 4811 my $icalendartype; 4812 my @scalnames; 4813 foreach my $c (split /\s+/, $calpref) 4814 { 4815 next unless defined $caltypes{$c}; 4816 $icalendartype .= chr($caltypes{$c}); 4817 $scalnames[$caltypes{$c} - 1] = loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$c']" ); 4818 } 4819 4820 # date/time formats 4821 4822 my $s1159 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='am' and not(\@alt)]" ); 4823 my $s2359 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='pm' and not (\@alt)]" ); 4824 my $sshortestam = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='am' and not(\@alt)]" ); 4825 my $sshortestpm = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='pm' and not (\@alt)]" ); 4826 my @stimeformat = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='medium']/timeFormat/pattern[not(\@alt)]" )); 4827 push @stimeformat, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" ); 4828 pop @stimeformat if $stimeformat[0] eq $stimeformat[1]; 4829 @stimeformat = map convert_time_format($_), @stimeformat; 4830 my @sshorttime = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='short']/timeFormat/pattern[not(\@alt)]" )); 4831 push @sshorttime, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hm' and not(\@alt)]" ); 4832 pop @sshorttime if $sshorttime[0] eq $sshorttime[1]; 4833 @sshorttime = map convert_time_format($_), @sshorttime; 4834 my @sshortdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" ); 4835 push @sshortdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" ); 4836 @sshortdate = map convert_date_format($_), @sshortdate; 4837 my @slongdate = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" )); 4838 push @slongdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" ); 4839 @slongdate = map convert_date_format($_), @slongdate; 4840 my @smonthday = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMd' and not(\@alt)]" )); 4841 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Md' and not(\@alt)]" ); 4842 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMd' and not(\@alt)]" ); 4843 @smonthday = map convert_date_format($_), @smonthday; 4844 my @syearmonth = map convert_date_format($_), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMM' and not(\@alt)]" ); 4845 my @sduration = map convert_time_format( lc $_ ), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" ); 4846 my $srelativelongdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMEd' and not(\@alt)]" ) || 4847 loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMEd' and not(\@alt)]" ); 4848 $srelativelongdate = convert_date_format( $srelativelongdate ); 4849 65fc470df Alex*4850 if (defined $loc->{calendar}) 4851 { 4852 foreach my $cal (@{$loc->{calendar}}) 4853 { 4854 $cal->{sshortdate} = \@sshortdate; 4855 $cal->{syearmonth} = \@syearmonth; 4856 $cal->{slongdate} = \@slongdate; 4857 $cal->{serastring} = [ $serastring ]; 4858 $cal->{sdayname} = \@sdayname; 4859 $cal->{sabbrevdayname} = \@sabbrevdayname; 4860 $cal->{smonthname} = \@smonthname; 4861 $cal->{sabbrevmonthname} = \@sabbrevmonthname; 4862 $cal->{scalname} = $scalnames[$cal->{id}]; 4863 $cal->{smonthday} = \@smonthday; 4864 $cal->{sshortestdayname} = \@sshortestdayname; 4865 $cal->{sabbreverastring} = [ $serastring ]; 4866 $cal->{sshortestdayname} = \@sshortestdayname; 4867 $cal->{srelativelongdate} = $srelativelongdate; 4868 } 4869 } 4870 8cdb593f5 Alex*4871 # codepages 4872 4873 my %ansicpmap = ( 437 => 1252, 720 => 1256, 737 => 1253, 775 => 1257, 850 => 1252, 4874 852 => 1250, 855 => 1251, 866 => 1251, 857 => 1254, 862 => 1255 ); 4875 my %maccpmap = ( 437 => 10000, 720 => 10004, 737 => 10006, 775 => 10029, 850 => 10000, 4876 852 => 10029, 855 => 10007, 857 => 10081, 862 => 10005, 866 => 10007, 4877 874 => 10021, 932 => 10001, 936 => 10008, 949 => 10003, 950 => 10002, 4878 1258 => 10000 ); 4879 my %ebcdiccpmap = ( 437 => 37, 720 => 20420, 737 => 20273, 866 => 20880, 932 => 20290 ); 4880 my %codepagemasks = ( 874 => [ 0x01000000, 0x00000000, 0x00000000, 0, 0x00010000, 0x00000000, 0x00010000, 0x00000000 ], 4881 932 => [ 0x00000000, 0x28c70000, 0x00000010, 0, 0x00020000, 0x00000000, 0x00020000, 0x00000000 ], 4882 936 => [ 0x00000000, 0x28010000, 0x00000002, 0, 0x00040000, 0x00000000, 0x00040000, 0x00000000 ], 4883 949 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00080000, 0x00000000, 0x00080000, 0x00000000 ], 4884 950 => [ 0x00000000, 0x28c10000, 0x00000012, 0, 0x00100000, 0x00000000, 0x00100000, 0x00000000 ], 4885 1258 => [ 0x2000000f, 0x00000000, 0x00000000, 0, 0x00000100, 0x00008000, 0x00000100, 0x00008000 ], 4886 866 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x00020000, 0x00000004, 0x02020000 ], 4887 862 => [ 0x00000800, 0x40000000, 0x00000000, 0, 0x00000020, 0x00200000, 0x00000020, 0x00200000 ], 4888 857 => [ 0x0000001f, 0x00000000, 0x00000000, 0, 0x00000010, 0x01000000, 0x00000010, 0x01000000 ], 4889 855 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x02000000, 0x00000004, 0x02000000 ], 4890 852 => [ 0x00000027, 0x00000000, 0x00000000, 0, 0x00000002, 0x04000000, 0x00000002, 0x04000000 ], 4891 775 => [ 0x00000007, 0x00000000, 0x00000000, 0, 0x00000080, 0x08000000, 0x00000080, 0x08000000 ], 4892 737 => [ 0x00000080, 0x00000000, 0x00000000, 0, 0x00000008, 0x10000000, 0x00000008, 0x10010000 ], 4893 720 => [ 0x00002000, 0x00000000, 0x00000000, 0, 0x00000040, 0x20000000, 0x00000040, 0x20080000 ], 4894 850 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x40000000, 0x0000019f, 0xdfd70000 ], 4895 437 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x80000000, 0x0000019f, 0xdfd70000 ], 4896 65001 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00000000, 0x00000000, 0x0000019f, 0xdfd70000 ] ); 4897 my $oemcp = locale_entry( $loc, "oemcp", 65001 ); 4898 my $maccp = locale_entry( $loc, "maccp", undef ) || $maccpmap{$oemcp} || 65001; 4899 my $ebcdiccp = locale_entry( $loc, "ebcdiccp", undef ) || $ebcdiccpmap{$oemcp} || 500; 4900 $ebcdiccp = 500 if (defined $loc->{oemcp} && $loc->{oemcp} == 65001) || (defined $loc->{maccp} && $loc->{maccp} == 65001); 4901 my $ansicp = $ansicpmap{$oemcp} || $oemcp; 4902 my @fontsig = (0) x 8; 4903 my $sig = locale_entry( $loc, "fontsig", [] ); 4904 foreach my $i (0..7) { $fontsig[$i] |= $codepagemasks{$oemcp}->[$i]; } 4905 foreach my $i (0..$#{$sig}) { $fontsig[$i] |= $sig->[$i]; } 4906 $fontsig[3] |= 1 << 31; 4907 $fontsig[3] |= 1 << 27 if $ireadinglayout == 1; 4908 $fontsig[3] |= 1 << 28 if $ireadinglayout == 3; 4909 4910 # special cases for invariant locale 4911 4912 unless ($loc->{name}) 4913 { 4914 $siso639langname = "iv"; 4915 $siso639langname2 = "ivl"; 4916 $senglanguage = $snativelangname = "Invariant Language"; 4917 $sengcountry = $snativectryname = "Invariant Country"; 4918 $sengdisplayname = "Invariant Language (Invariant Country)"; 4919 $snativedisplayname = "Invariant Language (Invariant Region)"; 4920 $sengcurrname = $snativecurrname = "International Monetary Fund"; 4921 $scurrency = "\x{00a4}"; 4922 $ifirstdayofweek = 0; ad02ef7be Alex*4923 $igeoid = $geotable{"US"}->{id}; 8cdb593f5 Alex*4924 @stimeformat = ("HH:mm:ss"); 4925 @sshortdate = ("MM/dd/yyyy", "yyyy-MM-dd"); 4926 @slongdate = ("dddd, dd MMMM yyyy"); 4927 @syearmonth = ("yyyy MMMM"); 4928 @smonthday = ("MMMM dd", "MMMM d", "M/d", "MMM d"); 4929 @sshorttime = ("HH:mm", "hh:mm tt", "H:mm", "h:mm tt"); 4930 $srelativelongdate = "dddd, MMMM dd"; 4931 $sposinfinity = "Infinity"; 4932 $sneginfinity = "-Infinity"; 4933 $spositivesign = "+"; 4934 $ipospercent = $inegpercent = 0; 4935 } 4936 4937 # output data 4938 4939 $locale_data .= pack "L<2", 4940 add_string( $sname ), # name 4941 add_string( $sopentypelang ); # LOCALE_SOPENTYPELANGUAGETAG 4942 4943 $locale_data .= pack "S<14", 4944 $loc->{lcid} || 0x1000, # LOCALE_ILANGUAGE 4945 $unique_lcid, # unique_lcid 4946 locale_entry( $loc, "idigits", 2 ), # LOCALE_IDIGITS 4947 locale_entry( $loc, "inegnumber", 1 ), # LOCALE_INEGNUMBER 4948 $icurrdigits, # LOCALE_ICURRDIGITS 4949 $icurrency, # LOCALE_ICURRENCY 4950 $inegcurr, # LOCALE_INEGCURR 4951 locale_entry( $loc, "ilzero", 1 ), # LOCALE_ILZERO 4952 !$neutral, # LOCALE_INEUTRAL 4953 $ifirstdayofweek, # LOCALE_IFIRSTDAYOFWEEK 4954 $firstweekofyear, # LOCALE_IFIRSTWEEKOFYEAR ad02ef7be Alex*4955 $geo->{dialcode} || 1 , # LOCALE_ICOUNTRY, 8cdb593f5 Alex*4956 $measure, # LOCALE_IMEASURE 4957 $digitsubstitution; # LOCALE_IDIGITSUBSTITUTION 4958 4959 $locale_data .= pack "L<18", 4960 add_string( $sgrouping ), # LOCALE_SGROUPING 4961 add_string( $smongrouping ), # LOCALE_SMONGROUPING 4962 add_string( $slist ), # LOCALE_SLIST 4963 add_string( $sdecimal ), # LOCALE_SDECIMAL 4964 add_string( $sthousand ), # LOCALE_STHOUSAND 4965 add_string( $scurrency ), # LOCALE_SCURRENCY 4966 add_string( $smondecimalsep ), # LOCALE_SMONDECIMALSEP 4967 add_string( $smonthousandsep ), # LOCALE_SMONTHOUSANDSEP 4968 add_string( $spositivesign ), # LOCALE_SPOSITIVESIGN 4969 add_string( $snegativesign ), # LOCALE_SNEGATIVESIGN 4970 add_string( $s1159 ), # LOCALE_S1159 4971 add_string( $s2359 ), # LOCALE_S2359 4972 add_strarray( @snativedigits ), # LOCALE_SNATIVEDIGITS 4973 add_strarray( @stimeformat ), # LOCALE_STIMEFORMAT 4974 add_strarray( @sshortdate ), # LOCALE_SSHORTDATE 4975 add_strarray( @slongdate ), # LOCALE_SLONGDATE 4976 add_strarray( @syearmonth ), # LOCALE_SYEARMONTH 4977 add_strarray( @sduration ); # LOCALE_SDURATION 4978 4979 $locale_data .= pack "S<8", 4980 $idefaultlanguage || 0x1000, # LOCALE_IDEFAULTLANGUAGE 4981 $ansicp, # LOCALE_IDEFAULTANSICODEPAGE 4982 $oemcp, # LOCALE_IDEFAULTCODEPAGE 4983 $maccp, # LOCALE_IDEFAULTMACCODEPAGE 4984 $ebcdiccp, # LOCALE_IDEFAULTEBCDICCODEPAGE ad02ef7be Alex*4985 $igeoid < 65536 ? $igeoid : 39070, # old_geoid 8cdb593f5 Alex*4986 $papersize ? 1 : 9, # LOCALE_IPAPERSIZE 4987 0; # FIXME # islamic_cal 4988 4989 $locale_data .= pack "L<24", 4990 add_string( $icalendartype ), # LOCALE_ICALENDARTYPE 4991 add_string( $sabbrevlangname ), # LOCALE_SABBREVLANGNAME 4992 add_string( $siso639langname ), # LOCALE_SISO639LANGNAME 4993 add_string( $senglanguage ), # LOCALE_SENGLANGUAGE 4994 add_string( $snativelangname ), # LOCALE_SNATIVELANGNAME 4995 add_string( $sengcountry ), # LOCALE_SENGCOUNTRY 4996 add_string( $snativectryname ), # LOCALE_SNATIVECTRYNAME 4997 add_string( $siso3166ctryname2 ), # LOCALE_SABBREVCTRYNAME 4998 add_string( $territory ), # LOCALE_SISO3166CTRYNAME 4999 add_string( $sintlsymbol ), # LOCALE_SINTLSYMBOL 5000 add_string( $sengcurrname ), # LOCALE_SENGCURRNAME 5001 add_string( $snativecurrname ), # LOCALE_SNATIVECURRNAME 5002 add_fontsig( @fontsig ), # LOCALE_FONTSIGNATURE 5003 add_string( $siso639langname2 ), # LOCALE_SISO639LANGNAME2 5004 add_string( $siso3166ctryname2 ), # LOCALE_SISO3166CTRYNAME2 5005 add_string( $sparent ), # LOCALE_SPARENT 5006 add_strarray( @sdayname ), # LOCALE_SDAYNAME 5007 add_strarray( @sabbrevdayname ), # LOCALE_SABBREVDAYNAME 5008 add_strarray( @smonthname ), # LOCALE_SMONTHNAME 5009 add_strarray( @sabbrevmonthname ), # LOCALE_SABBREVMONTHNAME 5010 add_strarray( @sgenitivemonth ), # LOCALE_SGENITIVEMONTH 5011 add_strarray( @sabbrevgenitivemonth ), # LOCALE_SABBREVGENITIVEMONTH 5012 add_strarray( @scalnames ), # LOCALE_SCALNAMES 5013 add_strarray( @{$loc->{sortnames}} ); # LOCALE_SSORTNAMES 5014 5015 $locale_data .= pack "S<6", 5016 $inegpercent, # LOCALE_INEGATIVEPERCENT 5017 $ipospercent, # LOCALE_IPOSITIVEPERCENT 5018 0, # unknown 5019 $ireadinglayout, # LOCALE_IREADINGLAYOUT 5020 0x2a, # unknown 5021 0x2a; # unknown 5022 5023 $locale_data .= pack "L<24", 5024 0, # unknown 5025 add_string( $sengdisplayname ), # LOCALE_SENGLISHDISPLAYNAME 5026 add_string( $snativedisplayname ), # LOCALE_SNATIVEDISPLAYNAME 5027 add_string( $spercent ), # LOCALE_SPERCENT 5028 add_string( $snan ), # LOCALE_SNAN 5029 add_string( $sposinfinity ), # LOCALE_SPOSINFINITY 5030 add_string( $sneginfinity ), # LOCALE_SNEGINFINITY 5031 0, # unknown 5032 add_string( $serastring ), # CAL_SERASTRING 5033 add_string( $serastring ), # CAL_SABBREVERASTRING 5034 0, # unknown 5035 add_string( $ssortlocale ), # LOCALE_SCONSOLEFALLBACKNAME 5036 add_strarray( @sshorttime ), # LOCALE_SSHORTTIME 5037 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME 5038 0, # unknown 5039 add_string( $ssortlocale ), # LOCALE_SSORTLOCALE 5040 add_string( "0409:00000409" ), # FIXME # LOCALE_SKEYBOARDSTOINSTALL 5041 add_string( $sscripts ), # LOCALE_SSCRIPTS 5042 add_string( $srelativelongdate ), # LOCALE_SRELATIVELONGDATE ad02ef7be Alex*5043 $igeoid, # LOCALE_IGEOID 8cdb593f5 Alex*5044 add_string( $sshortestam || "a" ), # LOCALE_SSHORTESTAM 5045 add_string( $sshortestpm || "p" ), # LOCALE_SSHORTESTPM 5046 add_strarray( @smonthday ), # LOCALE_SMONTHDAY 5047 add_string( "k0-windows-us" ) # FIXME # keyboard_layout 5048 } 5049 989f7d648 Alex*5050 # output language groups 5051 5052 my %groups; 630f605c2 Alex*5053 add_registry_key( $nlskey, "Locale", "00000409" ); 989f7d648 Alex*5054 foreach my $loc (@locales) 5055 { 5056 next unless defined $loc->{lcid}; 5057 next if ($loc->{lcid} & 0x80000000); 5058 next if !defined($loc->{alias}) && $loc->{name} !~ /-$loc->{territory}/; # skip neutral locales 5059 my $group = locale_entry( $loc, "group", 1 ); 5060 my $name = sprintf( "%08x", $loc->{lcid} ); 5061 my $val = sprintf( "%x", $group ); 630f605c2 Alex*5062 add_registry_string_value( $nlskey, "Locale", $name, $val ) unless ($loc->{lcid} & 0x000f0000); 5063 add_registry_string_value( $nlskey, "Locale\\Alternate Sorts", $name, $val ) if $loc->{name} =~ /_/; 989f7d648 Alex*5064 $groups{$val} = 1; 5065 } 630f605c2 Alex*5066 foreach my $group (keys %groups) { add_registry_string_value( $nlskey, "Language Groups", $group, "1" ); } 989f7d648 Alex*5067 65fc470df Alex*5068 # output calendar data 5069 5070 my $calendar_data = ""; 5071 foreach my $cal (@calendars) 5072 { 5073 my $scalname = $cal->{name}; 5074 my $iyearoffsetrange = 0; 5075 my $itwodigityearmax = $cal->{itwodigityearmax}; 5076 my @sshortdate; 5077 my @syearmonth; 5078 my @slongdate; 5079 my @serastring; 5080 my @sdayname; 5081 my @sabbrevdayname; 5082 my @smonthname; 5083 my @sabbrevmonthname; 5084 my @smonthday; 5085 my @sabbreverastring; 5086 my @sshortestdayname; 5087 5088 my $type = $cal->{type}; 5089 if (defined $cal->{locale} && defined $type) 5090 { 5091 my $loc = $lcnames{$cal->{locale}}; 5092 my $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" ); 5093 push @sshortdate, $fmt if $fmt; 5094 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMd' and not(\@alt)]" ); 5095 push @sshortdate, $fmt if $fmt; 5096 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" ); 5097 push @sshortdate, $fmt if $fmt; 5098 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMMMd' and not(\@alt)]" ); 5099 push @sshortdate, $fmt if $fmt; 5100 @sshortdate = map convert_date_format($_), @sshortdate; 5101 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" ); 5102 push @slongdate, $fmt if $fmt; 5103 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" ); 5104 push @slongdate, $fmt if $fmt; 5105 @slongdate = map convert_date_format($_), @slongdate; 5106 5107 foreach my $n (1..13) 5108 { 5109 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n' and not(\@yeartype)]" ); 5110 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n' and not(\@yeartype)]" ); 5111 push @smonthname, $name || ""; 5112 push @sabbrevmonthname, $abbrev || $name || ""; 5113 } 5114 5115 $scalname ||= loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$type']" ); 5116 if (defined $cal->{eras}) 5117 { 5118 my @eras; 5119 my $idx = 1; 5120 foreach my $era (@{$cal->{eras}}) 5121 { 5122 my $start = xml_query( $suppl, "/supplementalData/calendarData/calendar[\@type='$type']/eras/era[\@type='$era']/\@start" ); 5123 next unless $start =~ /^(-?\d+)-(\d+)-(\d+)/; 5124 my ($year, $mon, $day, $zero, $first) = ($1, $2, $3, $1 - 1, 1); 5125 if ($zero < 0) 5126 { 5127 $first -= $zero; 5128 $year = 1; 5129 $itwodigityearmax = 2049 - $zero; 5130 } 5131 unshift @eras, pack( "S<8", 6, $idx++, $year, $mon, $day, $zero, $first, 0 ); 5132 push @serastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraAbbr/era[\@type='$era']" ); 5133 push @sabbreverastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraNarrow/era[\@type='$era']" ); 5134 } 5135 $iyearoffsetrange = add_str_data( pack "S<L<*", scalar @eras, map { add_str_data($_); } @eras ); 5136 } 5137 } 5138 5139 @sshortdate = @{$cal->{sshortdate}} if defined $cal->{sshortdate} && !@sshortdate; 5140 @syearmonth = @{$cal->{syearmonth}} if defined $cal->{syearmonth}; 5141 @slongdate = @{$cal->{slongdate}} if defined $cal->{slongdate} && !@slongdate; 5142 @serastring = @{$cal->{serastring}} if defined $cal->{serastring} && !@serastring; 5143 @sdayname = @{$cal->{sdayname}} if defined $cal->{sdayname}; 5144 @sabbrevdayname = @{$cal->{sabbrevdayname}} if defined $cal->{sabbrevdayname}; 5145 @smonthname = @{$cal->{smonthname}} if defined $cal->{smonthname} && !join("",@smonthname); 5146 @sabbrevmonthname = @{$cal->{sabbrevmonthname}} if defined $cal->{sabbrevmonthname} && !join("",@sabbrevmonthname); 5147 @smonthday = @{$cal->{smonthday}} if defined $cal->{smonthday}; 5148 @sabbreverastring = @{$cal->{sabbreverastring}} if defined $cal->{sabbreverastring} && !@sabbreverastring; 5149 @sshortestdayname = @{$cal->{sshortestdayname}} if defined $cal->{sshortestdayname}; 5150 my $srelativelongdate = $cal->{srelativelongdate}; 5151 5152 @serastring = ("A.D.") unless @serastring; 5153 @sabbreverastring = ("AD") unless @sabbreverastring; 5154 5155 if ($cal->{id} != 1) # calendar 1 is a placeholder, information is fetched from locale instead 5156 { 5157 @sshortdate = ("") unless @sshortdate; 5158 @syearmonth = ("") unless @syearmonth; 5159 @slongdate = ("") unless @slongdate; 5160 @sdayname = ("") x 7 unless @sdayname; 5161 @sabbrevdayname = ("") x 7 unless @sabbrevdayname; 5162 @sshortestdayname = ("") x 7 unless @sshortestdayname; 5163 @smonthname = ("") x 13 unless @smonthname; 5164 @sabbrevmonthname = ("") x 13 unless @sabbrevmonthname; 5165 @smonthday = ("") unless @smonthday; 5166 } 5167 5168 $calendar_data .= pack "S<2L<17", 5169 $cal->{id}, # CAL_ICALINTVALUE 5170 $itwodigityearmax || 99, # CAL_ITWODIGITYEARMAX 5171 add_strarray( @sshortdate ), # CAL_SSHORTDATE 5172 add_strarray( @syearmonth ), # CAL_SYEARMONTH 5173 add_strarray( @slongdate ), # CAL_SLONGDATE 5174 add_strarray( @serastring ), # CAL_SERASTRING 5175 $iyearoffsetrange, # CAL_IYEAROFFSETRANGE 5176 add_strarray( @sdayname ), # CAL_SDAYNAME 5177 add_strarray( @sabbrevdayname ), # CAL_SABBREVDAYNAME 5178 add_strarray( @smonthname ), # CAL_SMONTHNAME 5179 add_strarray( @sabbrevmonthname ), # CAL_SABBREVMONTHNAME 5180 add_string( $scalname ), # CAL_SCALNAME 5181 add_strarray( @smonthday ), # CAL_SMONTHDAY 5182 add_strarray( @sabbreverastring ), # CAL_SABBREVERASTRING 5183 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME 5184 add_string( $srelativelongdate ); # CAL_SRELATIVELONGDATE 5185 } 5186 5187 # output locale header 5188 8cdb593f5 Alex*5189 my $nb_lcids = scalar keys %lcids; 5190 my $nb_locales = scalar grep { !defined $_->{alias} } @locales; 5191 my $nb_lcnames = scalar keys %lcnames; 5192 my $locale_size = length($locale_data) / $nb_locales; 65fc470df Alex*5193 my $nb_calendars = scalar @calendars; 5194 my $calendar_size = length($calendar_data) / $nb_calendars; 8cdb593f5 Alex*5195 my $lcids_offset = 19 * 4; # size of header 5196 my $lcnames_offset = $lcids_offset + length $lcid_data; 5197 my $locales_offset = $lcnames_offset + length $lcname_data; 5198 my $calendar_offset = $locales_offset + length $locale_data; 65fc470df Alex*5199 my $strings_offset = $calendar_offset + length $calendar_data; 8cdb593f5 Alex*5200 5201 my $locale_header = pack "L<7S<4L<S<2L<3S<2L<4", 5202 8, # offset 5203 0, 5204 7, # version 5205 0x5344534e, # magic 5206 0, 0, 0, 5207 0, 5208 $nb_lcids, 5209 $nb_locales, 5210 $locale_size, 5211 $locales_offset, 5212 $nb_lcnames, 5213 0, 5214 $lcids_offset, 5215 $lcnames_offset, 5216 0, 5217 $nb_calendars, 65fc470df Alex*5218 $calendar_size, 8cdb593f5 Alex*5219 $calendar_offset, 5220 $strings_offset, 5221 0, 0; 5222 65fc470df Alex*5223 return align_string( 4, $locale_header . $lcid_data . $lcname_data . $locale_data . $calendar_data . $string_data ); 8cdb593f5 Alex*5224 } 5225 5226 9e6d0e459 Alex*5227 ################################################################ 5228 # build the charmaps table for locale.nls 5229 sub build_charmaps_data() 5230 { 5231 my $data = ""; 5232 5233 # MAP_FOLDDIGITS b956620d8 Alex*5234 my @digits = (ord('0') .. ord('9')); 5235 $digitmap_table[0x3007] = $digits[0]; # Ideographic Zero 5236 @digitmap_table[0x0c78..0x0c7b] = @digits[0..3]; # Telugu Fraction Digits 5237 @digitmap_table[0x0c7c..0x0c7e] = @digits[1..3]; # Telugu Fraction Digits 5238 @digitmap_table[0x3021..0x3029] = @digits[1..9]; # Hangzhou Numerals 5239 @digitmap_table[0xa8e0..0xa8e9] = @digits; # Combining Devanagari Digits 5240 @digitmap_table[0x10107..0x1010f] = @digits[1..9]; # Aegean Numbers 5241 $digitmap_table[0x10320] = $digits[1]; # Old Italic Numerals 5242 $digitmap_table[0x10321] = $digits[5]; # Old Italic Numerals 9e6d0e459 Alex*5243 $data .= dump_binary_case_table( @digitmap_table ); 5244 5245 # CJK compatibility map 5246 $data .= dump_binary_case_table( @cjk_compat_table ); 5247 5248 # LCMAP_HIRAGANA/KATAKANA 5249 my (@hiragana_table, @katakana_table); 5250 foreach my $ch (0x3041..0x3096, 0x309d..0x309e) 5251 { 5252 $hiragana_table[$ch + 0x60] = $ch; 5253 $katakana_table[$ch] = $ch + 0x60; 5254 } 5255 $data .= dump_binary_case_table( @hiragana_table ) . dump_binary_case_table( @katakana_table ); 5256 5257 # LCMAP_HALFWIDTH/FULLWIDTH 5258 $halfwidth_table[0x2018] = 0x0027; 5259 $halfwidth_table[0x2019] = 0x0027; 5260 $halfwidth_table[0x201c] = 0x0022; 5261 $halfwidth_table[0x201d] = 0x0022; 5262 $halfwidth_table[0x309b] = 0xff9e; 5263 $halfwidth_table[0x309c] = 0xff9f; 5264 $fullwidth_table[0x309b] = 0x3099; 5265 $fullwidth_table[0x309c] = 0x309a; 5266 $data .= dump_binary_case_table( @halfwidth_table ) . dump_binary_case_table( @fullwidth_table ); 5267 5268 # LCMAP_TRADITIONAL/SIMPLIFIED_CHINESE 5269 $data .= dump_binary_case_table( @chinese_traditional_table ) . dump_binary_case_table( @chinese_simplified_table ); 5270 5271 # FIXME: some more unknown tables here 5272 5273 return $data; 5274 } 5275 5276 ad02ef7be Alex*5277 ################################################################ 5278 # build the geoids table for locale.nls 5279 sub build_geoids_data() 5280 { 5281 my $data = ""; 5282 my %index; 5283 my $idx = 0; 5284 my @geo_header = (0x00650067, 0x0000006f, 0, 4 * 7, scalar @geoids, 0, 0); 5285 5286 foreach my $geo (@geoids) 5287 { 5288 my $id = $geo->{id}; 5289 $geo = $geo->{alias} if defined $geo->{alias}; 5290 my $lat = "0.000"; 5291 my $long = "0.000"; 5292 my $iso2 = $geo->{iso2} || "XX"; 5293 my $iso3 = $geo->{iso3} || "XX"; 5294 my $isregion = $geo->{region} || (defined $geo->{uncode} && !defined $geo->{iso2}); 5295 my $sintlsymbol = $geo->{sintlsymbol} || "XDR"; 5296 my $scurrency = $geo->{scurrency} || "\x{00a4}"; 5297 5298 $data .= pack( "L<", $id ); 5299 $data .= pad_string( 24, encode( "UTF16LE", $lat )); 5300 $data .= pad_string( 24, encode( "UTF16LE", $long )); 5301 $data .= pack( "L<2", $isregion ? 14 : 16, $geo->{parentid} || 39070 ); 5302 $data .= pad_string( 8, encode( "UTF16LE", $iso2 )); 5303 $data .= pad_string( 8, encode( "UTF16LE", $iso3 )); 5304 $data .= pack( "S<2", $geo->{uncode} || 0, $geo->{dialcode} || 0 ); 5305 $data .= pad_string( 8, encode( "UTF16LE", $sintlsymbol )); 5306 $data .= pad_string( 16, encode( "UTF16LE", $scurrency )); 5307 $index{$geo->{name}} = $idx if $geo->{name}; 5308 $idx++; 5309 } 5310 $index{"XX"} = $index{"001"}; 5311 5312 $geo_header[5] = $geo_header[3] + length $data; 5313 $geo_header[6] = scalar keys %index; 5314 5315 foreach my $name (sort keys %index) 5316 { 5317 $data .= pad_string( 8, encode( "UTF16LE", $name )); 5318 $data .= pack "L<", $index{$name}; 5319 } 5320 5321 $geo_header[2] = $geo_header[3] + length $data; 5322 return pack( "L<7", @geo_header ) . $data; 5323 } 5324 5325 8cdb593f5 Alex*5326 ################################################################ 5327 # build a binary locale table 5328 sub dump_locales($$) 5329 { 5330 my ($filename, $chartypes) = @_; 5331 5332 printf "Building $filename\n"; 5333 5334 my $locale_data = build_locale_data(); 9e6d0e459 Alex*5335 my $charmaps_data = build_charmaps_data(); ad02ef7be Alex*5336 my $geoids_data = build_geoids_data(); 8cdb593f5 Alex*5337 my $scripts_data = ""; # FIXME 5338 5339 my @header = ( 0 ) x 8; 5340 $header[0] = 4 * scalar @header; # chartypes offset 5341 $header[4] = $header[0] + length $chartypes; # locales offset 5342 $header[5] = $header[4] + length $locale_data; # charmaps offset 5343 $header[6] = $header[5] + length $charmaps_data; # geoids offset 5344 $header[7] = $header[6] + length $geoids_data; # scripts offset 5345 5346 open OUTPUT, ">$filename.new" or die "Cannot create $filename"; 5347 print OUTPUT pack "L<*", @header; 5348 print OUTPUT $chartypes, $locale_data, $charmaps_data, $geoids_data, $scripts_data; 5349 close OUTPUT; 5350 save_file($filename); 95aeb41c8 Alex*5351 } 5352 3d086ca2b Alex*5353 3ec7c467c Alex*5354 ################################################################ 5355 # return the day of week of the first of the month 5356 sub month_first_dow($$) 5357 { 5358 my ($year, $month) = @_; 5359 my @time = gmtime( timegm_modern( 0, 0, 0, 1, $month - 1, $year )); 5360 return $time[6]; 5361 } 5362 5363 5364 ################################################################ 5365 # compare system time values 5366 sub compare_systime($$) 5367 { 5368 my ($a, $b) = @_; 5369 return $a->[0] <=> $b->[0] || 5370 $a->[1] <=> $b->[1] || 5371 $a->[2] <=> $b->[2] || 5372 $a->[3] <=> $b->[3] || 5373 $a->[4] <=> $b->[4] || 5374 $a->[5] <=> $b->[5] || 5375 $a->[6] <=> $b->[6]; 5376 } 5377 5378 5379 ################################################################ 5380 # compare the zone transition date with the rule date 5381 sub compare_transition_date($$$$) 5382 { 5383 my ($stdoff, $isdst, $zone, $rule) = @_; 5384 5385 if (scalar @{$zone} <= 1) 5386 { 5387 return (!defined($zone->[0]) || $zone->[0] > $rule->[0]) ? 1 : -1; 5388 } 5389 5390 my @date = parse_transition_date( $stdoff, $isdst, $zone->[0], $zone->[1], $zone->[2], $zone->[3] || 0 ); 5391 return compare_systime( \@date, $rule ); 5392 } 5393 5394 5395 ################################################################ 5396 # get the Windows zone names from the CLDR data 5397 sub load_windows_zones() 5398 { 5399 my $current_name; 5400 my %names; 5401 my $base = "cldr-release-$CLDRVERSION"; cfaa28933 Alex*5402 my $INPUT = open_data_file( "cldr", "$base/common/supplemental/windowsZones.xml" ); 3ec7c467c Alex*5403 while (<$INPUT>) 5404 { da387a9b5 Alex*5405 if (/<!-- +(\(UTC[^<]*) -->.*/) 3ec7c467c Alex*5406 { 5407 $current_name = $1; 5408 } 5409 if (/<mapZone other="(.*)" territory="001" type="(.*)"\/>/) 5410 { 5411 $names{$1} = [ $current_name, $2 ]; 5412 } 5413 } 5414 close $INPUT; 5415 return %names; 5416 } 5417 5418 5419 ################################################################ 5420 # parse a transition date specification from the tzdata files 5421 sub parse_transition_date($$@) 5422 { 5423 use integer; 5424 my ($stdoff, $isdst, $year, $in, $on, $at) = @_; 5425 5426 $on = "1" unless defined $on; 5427 $at = "0" unless defined $at; 5428 5429 my %months = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6, 5430 Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 ); 5431 my %days = ( Sun => 0, Mon => 1, Tue => 2, Wed => 3, Thu => 4, Fri => 5, Sat => 6 ); 5432 5433 my $mon = $in ? $months{$in} : 1; 5434 my ($week, $dow, $flag, $time, $sec); 5435 my $first = month_first_dow( $year, $mon ); 5436 5437 if ($on =~ /^last(.*)$/) 5438 { 5439 $week = 5; 5440 $dow = $days{$1}; 5441 } 5442 elsif ($on =~ /^(.*)>=(\d+)$/) 5443 { 5444 $dow = $days{$1}; 5445 my $diff = ($first + 6 - $dow) % 7; 5446 $week = $2 >= 25 ? 5 : ($2 + 6 + $diff) / 7; 5447 } 5448 elsif ($on =~ /^(.*)<=(\d+)$/) 5449 { 5450 $dow = $days{$1}; 5451 my $diff = ($first + $2 + 6 - $dow) % 7; 5452 $week = ($2 + 6 - $diff) / 7; 5453 if (!$week) 5454 { 5455 $week = 5; 5456 if (!--$mon) { $mon = 12; $year--; } 5457 } 5458 } 5459 elsif ($on =~ /^\d+$/) 5460 { 5461 $dow = ($first + $on - 1) % 7; 5462 $week = $on >= 25 ? 5 : ($on + 6) / 7; 5463 } 5464 else 5465 { 5466 die "unsupported date specification $year $in $on $at"; 5467 } 5468 5469 if ($at =~ /^(\d+):(\d+):(\d+)([uws]?)$/) 5470 { 5471 $time = $1 * 60 + $2; 5472 $sec = $3; 5473 $flag = $4; 5474 } 5475 elsif ($at =~ /^(\d+):(\d+)([uws]?)$/) 5476 { 5477 $time = $1 * 60 + $2; 5478 $flag = $3; 5479 } 5480 elsif ($at =~ /^(\d+)([uws]?)$/) 5481 { 5482 $time = $1 * 60; 5483 $flag = $2; 5484 } 5485 else 5486 { 5487 die "unsupported time specification $year $in $on $at"; 5488 } 5489 5490 $flag ||= "w"; 5491 $time -= $stdoff if $flag eq "u"; 5492 $time += 60 if !$isdst && $flag ne "w"; 5493 5494 if ($time < 0) # previous day 5495 { 5496 $week-- if $week < 5 && $dow == month_first_dow( $year, $mon ); 5497 $week-- if $week == 5 && $dow == month_first_dow( $year + ($mon == 12), $mon % 12 + 1 ); 5498 if (!$week) 5499 { 5500 $week = 5; 5501 if (!--$mon) { $mon = 12; $year--; } 5502 } 5503 $dow = ($dow + 6) % 7; 5504 $time += 24 * 60; 5505 } 5506 5507 return ($year, $mon, $week, $dow, $time / 60, $time % 60, $sec || 0); 5508 } 5509 5510 5511 ################################################################ 5512 # parse a system time value as a SYSTEMTIME structure 5513 sub pack_systime(@) 5514 { 5515 my ($year, $mon, $week, $dow, $hour, $min, $sec) = @_; 5516 return pack "S<8", 0, $mon, $dow, $week, $hour < 24 ? ($hour, $min, $sec, 0) : (23, 59, 59, 999); 5517 } 5518 5519 5520 ################################################################ 5521 # parse a timezone offset from the tzdata files 5522 sub parse_tz_offset($) 5523 { 5524 my ($hour, $min) = split /:/, shift; 5525 $min ||= 0; 5526 return $hour < 0 ? -$hour * 60 + $min : -$hour * 60 - $min; # invert sign 5527 } 5528 5529 5530 ################################################################ 5531 # build the timezone data 5532 sub dump_timezones($@) 5533 { 5534 my $filename = shift; 5535 my $FIRST_YEAR = 2000; 5536 my $LAST_YEAR = 2030; 5537 5538 my %names = load_windows_zones(); 5539 my %zones; 5540 my %rules; 5541 my %links; 5542 my %res_indices; 5543 5544 printf "Building $filename\n"; 5545 5546 open OUTPUT, ">$filename.new" or die "Cannot create $filename"; 5547 print OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n"; 5548 print OUTPUT "#include \"winresrc.h\"\n\n"; 5549 print OUTPUT "#pragma makedep po\n\n"; 5550 print OUTPUT "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n\n"; 5551 print OUTPUT "STRINGTABLE\n{\n"; 5552 5553 # load tzdata files 5554 5555 foreach my $filename (@_) 5556 { cfaa28933 Alex*5557 my $FILE = open_data_file( "tzdata", $filename ); 3ec7c467c Alex*5558 my $zonename; 5559 while (<$FILE>) 5560 { 5561 chomp; 5562 s/\#.*$//; 5563 next if /^\s*$/; 5564 my @fields = split /\s+/; 5565 if ($fields[0] eq "Zone" || ($zonename && $fields[0] eq "")) 5566 { 5567 shift @fields; 5568 $zonename = shift @fields unless $zonename; 5569 my ($stdoff, $rules, $dummy, @date) = @fields; 5570 $zones{$zonename} ||= [ ]; 5571 push @{$zones{$zonename}}, [ parse_tz_offset( $stdoff ), $rules, @date ]; 5572 $zonename = undef unless @date; # last entry doesn't have an until date 5573 next; 5574 } 5575 if ($fields[0] eq "Rule") 5576 { 5577 shift @fields; 5578 my ($rulename, $from, $to, $dummy, $in, $on, $at, $save) = @fields; 5579 $to = $from if $to eq "only"; 5580 $to = $LAST_YEAR if $to eq "max"; 5581 push @{$rules{$rulename}}, [ parse_tz_offset( $save ), $from, $to, $in, $on, $at ]; 5582 next; 5583 } 5584 if ($fields[0] eq "Link") 5585 { 5586 $links{$fields[2]} = $fields[1]; 5587 next; 5588 } 5589 die "unrecognized line $_"; 5590 } 5591 close $FILE; 5592 } 5593 5594 foreach my $name (sort { uc($a) cmp uc($b) } keys %names) 5595 { 5596 my ($display, $zone) = @{$names{$name}}; 5597 $zone = $links{$zone} if defined $links{$zone}; 5598 5599 # build list of transitions 5600 5601 my @transitions; 5602 my @from_date = ( 1 ); 5603 my $last_stdoff = 0; 5604 for (my $i = 0; $i < scalar @{$zones{$zone}}; $i++) 5605 { 5606 my ($stdoff, $rule, @until_date) = @{$zones{$zone}->[$i]}; 5607 my $isdst = ($last_stdoff != $stdoff); 5608 $from_date[0] ||= $LAST_YEAR; 5609 my @systime = parse_transition_date( $stdoff, $isdst, @from_date ); 5610 push @transitions, [ $stdoff, -1, \@systime ]; 5611 5612 if (defined $rules{$rule}) 5613 { 5614 foreach my $r (@{$rules{$rule}}) 5615 { 5616 my ($offset, $from, $to, $in, $on, $at) = @{$r}; 5617 foreach my $year ($from..$to) 5618 { 5619 next if $year < $from_date[0]; 5620 next if $until_date[0] && $year > $until_date[0]; 5621 my @systime = parse_transition_date( $stdoff, !!$offset, $year, $in, $on, $at ); 5622 next if compare_transition_date( $stdoff, $isdst, \@until_date, \@systime ) <= 0; 5623 my $ret = compare_transition_date( $stdoff, $isdst, \@from_date, \@systime ); 5624 next if $ret > 0; 5625 pop @transitions if !$ret; # remove transition if there's a dst change at the same time 5626 push @transitions, [ $stdoff, $offset, \@systime ]; 5627 } 5628 } 5629 } 5630 @from_date = @until_date; 5631 $last_stdoff = $stdoff; 5632 } 5633 @transitions = sort { compare_systime( $a->[2], $b->[2] ) } @transitions; 5634 5635 # build per-year dynamic info 5636 5637 my @info; 5638 my $last_dstoff = 0; 5639 my $last_dst = 0; 5640 my $year = $FIRST_YEAR; 5641 while ($year <= $LAST_YEAR) 5642 { 5643 if (@transitions && $transitions[0]->[2]->[0] < $year) 5644 { 5645 $last_stdoff = $transitions[0]->[0]; 5646 shift @transitions; 5647 next; 5648 } 5649 my ($std, $dst, @trans); 5650 my $cur_stdoff = $last_stdoff; 5651 my $cur_dstoff = ($name =~ /^UTC/) ? 0 : -60; 5652 while (@transitions && $transitions[0]->[2]->[0] == $year) 5653 { 5654 my $t = shift @transitions; 5655 my ($stdoff, $dstoff, $systime) = @{$t}; 5656 $systime = pack_systime( @{$systime} ); 5657 if (!$dstoff) # std 5658 { 5659 $cur_stdoff = $stdoff unless $std; 5660 $std = $systime; 5661 } 5662 elsif ($dstoff != -1) # dst 5663 { 5664 $cur_dstoff = $dstoff unless $dst; 5665 $dst ||= $systime; 5666 } 5667 elsif ($stdoff != $last_stdoff) # rule transition 5668 { 5669 # Handle a special case: Samoa moved to the other side of 5670 # the date line between 2011-12-03 and 2012-01-01, 5671 # entirely skipping the day 2011-12-31. We ignore this 5672 # change because it happens on a year boundary and more 5673 # importantly it would generate on offset of -25 hours, 5674 # which some programs (e.g., Mono) do not like. See 5675 # https://bugs.winehq.org/show_bug.cgi?id=51758 5676 5677 if ($last_stdoff - $stdoff < 24 * 60) 5678 { 5679 @trans = ($last_stdoff, $stdoff, $systime); 5680 $cur_stdoff = $stdoff; 5681 } 5682 } 5683 elsif ($dst) # rule transition with no stdoff change 5684 { 5685 $std = $systime; 5686 } 5687 $last_dstoff = ($dstoff == -1) ? 0 : $dstoff; 5688 } 5689 $last_stdoff = $cur_stdoff; 5690 5691 if ($cur_dstoff > 0) # swap std and dst to ensure that offset is negative 5692 { 5693 ($std, $dst) = ($dst, $std); 5694 $cur_stdoff += $cur_dstoff; 5695 $cur_dstoff = -$cur_dstoff; 5696 } 5697 5698 if (@trans) 5699 { 5700 # heuristic to prefer switching dst 5701 if ($last_dst == $year - 1 || (!$last_dst && $trans[0] > $trans[1])) 5702 { 5703 $dst ||= $trans[2]; 5704 $cur_stdoff = $trans[0]; 5705 $cur_dstoff = $trans[1] - $trans[0]; 5706 } 5707 else 5708 { 5709 $std ||= $trans[2]; 5710 $cur_stdoff = $trans[1]; 5711 $cur_dstoff = $trans[0] - $trans[1]; 5712 } 5713 } 5714 5715 if ($std || $dst) 5716 { 5717 $std ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 )); 5718 $dst ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 )); 5719 $last_dst = $year; 5720 } 5721 else 5722 { 5723 $std = pack "S<8", 0; 5724 $dst = pack "S<8", 0; 5725 $cur_stdoff += $last_dstoff; 5726 } 5727 $info[$year++] = pack( "l<3", $cur_stdoff, 0, $cur_dstoff ) . $std . $dst; 5728 } 5729 5730 # output registry keys 5731 5732 my $std_name = $name eq "UTC" ? "Coordinated Universal Time" : $name; 5733 my $dlt_name = $std_name =~ s/Standard Time/Daylight Time/r; 5734 my $res_idx = hex( substr( Digest::SHA::sha1_hex($name), -3, 3 )) << 4; 5735 $res_idx += 16 while exists $res_indices{$res_idx}; 5736 $res_indices{$res_idx} = 1; 5737 5738 add_registry_string_value( $zonekey, $name, "Display", $display ); 5739 add_registry_string_value( $zonekey, $name, "Std", $std_name ); 5740 add_registry_string_value( $zonekey, $name, "Dlt", $dlt_name ); 5741 add_registry_string_value( $zonekey, $name, "MUI_Std", sprintf( "\@tzres.dll,-%u", $res_idx )); 5742 add_registry_string_value( $zonekey, $name, "MUI_Dlt", sprintf( "\@tzres.dll,-%u", $res_idx + 1 )); 5743 add_registry_string_value( $zonekey, $name, "MUI_Display", sprintf( "\@tzres.dll,-%u", $res_idx + 2 )); 5744 add_registry_binary_value( $zonekey, $name, "TZI", $info[$LAST_YEAR] ); 5745 5746 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx, $std_name; 5747 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx + 1, $dlt_name; 5748 printf OUTPUT "%7d \"%s\"\n", $res_idx + 2, $display; 5749 5750 my $first_year = $FIRST_YEAR; 5751 my $last_year = $LAST_YEAR; 5752 $last_year-- while $last_year > $FIRST_YEAR && $info[$last_year] eq $info[$last_year - 1]; 5753 $first_year++ while $first_year < $last_year && $info[$first_year] eq $info[$last_year]; 5754 5755 next if $last_year <= $first_year; 5756 5757 foreach my $i ($first_year..$last_year) 5758 { 5759 add_registry_binary_value( $zonekey, "$name\\Dynamic DST", $i, $info[$i] ); 5760 } 5761 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "FirstEntry", $first_year ); 5762 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "LastEntry", $last_year ); 5763 } 5764 5765 print OUTPUT "}\n"; 5766 close OUTPUT; 5767 save_file($filename); 5768 } 5769 5770 712839d58 Alex*5771 ################################################################ 5772 # build the script to create registry keys 5773 sub dump_registry_script($%) 5774 { 5775 my ($filename, %keys) = @_; 5776 my $indent = 1; 630f605c2 Alex*5777 my @prev; 712839d58 Alex*5778 5779 printf "Building %s\n", $filename; 5780 open OUTPUT, ">$filename.new" or die "Cannot create $filename"; 5781 print OUTPUT "HKLM\n{\n"; 630f605c2 Alex*5782 foreach my $k (sort { ($a =~ tr/a-z\\/A-Z\001/r) cmp ($b =~ tr/a-z\\/A-Z\001/r) } keys %keys) 712839d58 Alex*5783 { 5784 my @subkeys = split /\\/, $k; 630f605c2 Alex*5785 while (@prev && @subkeys && $prev[0] eq $subkeys[0]) { shift @prev; shift @subkeys; } 5786 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; } 712839d58 Alex*5787 my ($def, @vals) = @{$keys{$k}}; 5788 for (my $i = 0; $i < @subkeys; $i++) 5789 { 630f605c2 Alex*5790 my $name = $subkeys[$i]; 5791 my $prefix = ""; 5792 if ($name =~ /^-/) 5793 { 5794 $name =~ s/^-//; 5795 $prefix = "NoRemove "; 5796 } 5797 if ($name =~ /\s/) 5798 { 5799 $name = "'$name'"; 5800 } 5801 printf OUTPUT "%*s%s%s%s\n%*s{\n", 4 * $indent, "", $prefix, $name, 712839d58 Alex*5802 $i == $#subkeys && $def ? " = s '$def'" : "", 4 * $indent, ""; 5803 $indent++; 5804 } 6f6854369 Alex*5805 foreach my $v (sort @vals) { printf OUTPUT "%*sval $v\n", 4 * $indent, ""; } 630f605c2 Alex*5806 @prev = split /\\/, $k; 712839d58 Alex*5807 } 630f605c2 Alex*5808 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; } 5809 printf OUTPUT "}\n"; 712839d58 Alex*5810 close OUTPUT; 5811 save_file($filename); 5812 } 5813 5814 3d086ca2b Alex*5815 ################################################################ 5816 # save a file if modified 5817 sub save_file($) 5818 { 5819 my $file = shift; 5fe73e690 Alex*5820 if (-f $file && !system "cmp $file $file.new >/dev/null") 3d086ca2b Alex*5821 { 5822 unlink "$file.new"; 5823 } 5824 else 5825 { 5826 rename "$file.new", "$file"; 5827 } fb270ddca Alex*5828 } 5829 5830 dc727fa7b Alex*5831 ################################################################ 5832 # main routine 5833 704a330a8 Alex*5834 chdir ".." if -f "./make_unicode"; cb524e09a Alex*5835 load_data(); b9178da58 Niko*5836 dump_bidi_dir_table( "dlls/gdi32/uniscribe/direction.c" ); 01ebf1ade Alex*5837 dump_bidi_dir_table( "dlls/dwrite/direction.c" ); 0d95b8218 Piot*5838 dump_bidi_dir_table( "dlls/wineps.drv/direction.c" ); b9178da58 Niko*5839 dump_mirroring( "dlls/gdi32/uniscribe/mirror.c" ); 370407291 Niko*5840 dump_mirroring( "dlls/dwrite/mirror.c" ); b9178da58 Niko*5841 dump_bracket( "dlls/gdi32/uniscribe/bracket.c" ); fdd8454ef Niko*5842 dump_bracket( "dlls/dwrite/bracket.c" ); b9178da58 Niko*5843 dump_shaping( "dlls/gdi32/uniscribe/shaping.c" ); bea9c706e Niko*5844 dump_arabic_shaping( "dlls/dwrite/shapers/arabic_table.c" ); b9178da58 Niko*5845 dump_linebreak( "dlls/gdi32/uniscribe/linebreak.c" ); f60350808 Niko*5846 dump_linebreak( "dlls/dwrite/linebreak.c" ); e36025a2a Niko*5847 dump_scripts( "dlls/dwrite/scripts" ); b9178da58 Niko*5848 dump_indic( "dlls/gdi32/uniscribe/indicsyllable.c" ); 5740b091b Jace*5849 dump_vertical( "dlls/win32u/vertical.c", 1 ); 6857cb569 Jace*5850 dump_vertical( "dlls/wineps.drv/vertical.c", 0 ); 42a30a693 Alex*5851 dump_intl_nls("nls/l_intl.nls"); f9f3e57cf Alex*5852 dump_norm_table( "nls/normnfc.nls" ); 5853 dump_norm_table( "nls/normnfd.nls" ); 5854 dump_norm_table( "nls/normnfkc.nls" ); 5855 dump_norm_table( "nls/normnfkd.nls" ); 5856 dump_norm_table( "nls/normidna.nls" ); cfaa28933 Alex*5857 my $chartypes = dump_sortkey_table( "nls/sortdefault.nls" ); 8cdb593f5 Alex*5858 dump_locales( "nls/locale.nls", $chartypes ); 5b4bdb9fd Alex*5859 foreach my $file (@allfiles) { dump_msdata_codepage( $file ); } 5860 dump_eucjp_codepage(); 3ec7c467c Alex*5861 dump_timezones( "dlls/tzres/tzres.rc", @timezone_files ); 712839d58 Alex*5862 dump_registry_script( "dlls/kernelbase/kernelbase.rgs", %registry_keys ); dc727fa7b Alex*5863 5864 exit 0; 5865 5866 # Local Variables: 704a330a8 Alex*5867 # compile-command: "./make_unicode" dc727fa7b Alex*5868 # End: