Jump to content

User:Drmccreedy/roadmap

From Wikipedia, the free encyclopedia

This is the Perl script I use to generate various Wikimedia Unicode character roadmaps.
It creates multilingual images for these Unicode planes:

As well as the legacy English-only images:

The script was based on Saric's script for the BMP Roadmap.
Feel free to use and adapt it or ping me if you'd like to add or correct a translation.

The following languages are supported: English, Belarusian, Chinese using simplified characters, Chinese using traditional characters, Czech, Dutch, French, German, Hungarian, Korean, Persian, Portuguese, Russian, Spanish, Turkish and Ukrainian.

I also have a test page so I can review all of the languages at once.

#!/usr/bin/perl
#                                                       last updated 2024-09-11
#
# This script uses Unicode data to create roadmaps of character allocation by plane.
# The groupings more or less match up to the chapters of the Unicode Standard.
#
# Various types of SVG images are created for each named Unicode plane with
# allocated code points using the naming convention new_svgs/map_XXX_variation.svg
# where
#    XXX is BMP, SIP, SMP, SSP, or TIP
#    variation is "multilingual" (850 by 500px),
#                 "en" (English legend only, 750px by 750px), or 
#                 "no_legend" (500px by 500px).
#
# SVG images can also be created for the other planes but aren't very interesting
# and shouldn't be added to Wikimedia/Wikipedia.
# If the -all option is specified, the script will generate output 
# for the other planes using the naming convention new_svgs/map_Znn_variation.svg
# where nn is the plane number (for example, new_svgs/map_Z03_multilingual.svg).
#
# Note that the multilingual SVG files created by this script use the <switch>
# element and the 'systemLanguage' attribute to provide a legend in multiple
# languages within one SVG file.  Wikipedia also allows the language to be 
# specified explicitly with the lang parameter: [[File:filename.svg|lang=code]]
# Keep in mind that if the SVG display program doesn't support the allowReorder
# attribute you'll see the FIRST language match not the BEST language match.
# For example, if the web browser language preferences are Dutch (nl) then German (de),
# and it doesn't support allowReorder, you'll see German because "de" comes before "nl"
# in the SVG file.

use utf8;
use feature 'unicode_strings';
use warnings;
use strict;
use Getopt::Long qw(GetOptions);
use POSIX qw(ceil floor);
use SVG;

my $result = GetOptions(qw(all|a test|t=s help|h|?));
our $opt_all = 0 unless $main::opt_all;    # print all 17 planes if specified
our $opt_test = "" unless $main::opt_test; # replace English text with this language
our $opt_help = 0 unless $main::opt_help;  # show help display and exit
if($opt_help || $result != 1) {
   print "Syntax: $0 [-all] [-test lang] [-?]\n";
   print "   where \"-all\" prints all 17 Unicode planes\n";
   print "         \"-test lang\" replaces English text with language code \"lang\",\n";
   print "                        draws dashed lines around the legend,\n";
   print "                        and prefaces the output file names with test_lang_\n";
   print "         \"-?\" shows this helpful information and exits\n";
   exit 0;
}

my $outputDirectory = "new_svgs";
mkdir($outputDirectory) unless(-d $outputDirectory);

# this font is used for the legend text:
our $defaultFontFamily = "'DejaVu Sans', sans-serif";
our %fontFamilyOverride = (
   # only add a font here if the language cannot be represented with the default fonts above
   ko => "'UnJamoDotum', sans-serif",
);

our %isRTL = (
   # right-to-left languages:
   fa => undef, # Persian
);

our %legendTextByLanguageAndGroup = (
   # English
   en => {
      Africa      => "African scripts",
      Americas    => "American scripts",
      AsiaEast    => "East Asian scripts",
      AsiaSC      => "South and\nCentral Asian scripts",
      AsiaSE      => "Southeast Asian scripts",
      asOfVersion => "As of Unicode \%s",
      cuneiform   => "Cuneiform",
      Europe      => "Non-Latin European scripts",
      Han         => "CJK characters",
      hieroglyphs => "Hieroglyphs",
      IndOcean    => "Indonesian and\nOceanic scripts",
      Latin       => "Latin script",
      ME          => "Middle Eastern and\nSouthwest Asian scripts",
      misc        => "Miscellaneous characters",
      notation    => "Notational systems",
      private     => "Private use",
      surrogates  => "UTF-16 surrogates",
      symbols     => "Symbols",
      tags        => "Tags",
      unallocated => "Unallocated code points",
      variation   => "Variation Selectors",
   },
   # Belarusian
   be => {
      Africa      => "Пісьменства Афрыкі",
      Americas    => "Пісьменства Амерыкі",
      AsiaEast    => "Пісьменства Усходняй Азіі",
      AsiaSC      => "Пісьменства Паўднёвай і\nЦэнтральнай Азіі",
      AsiaSE      => "Пісьменства Паўднёва-Усходняй Азіі",
      asOfVersion => "Па стане на версію Унікода %s",
      cuneiform   => "Клінапіс",
      Europe      => "Нелацінскія еўрапейскія пісьменства",
      Han         => "Ідэаграмы ККЯ",
      hieroglyphs => "Іерогліфы",
      IndOcean    => "Пісьменства Інданезіі і Акіяніі",
      Latin       => "Лацінская пісьменнасць",
      ME          => "Пісьменства Сярэдняга Усходу і\nПаўднёва-Заходняй Азіі",
      misc        => "Розныя сімвалы",
      notation    => "Сістэмы нотапісу",
      private     => "Вобласць для прыватнага выкарыстання",
      surrogates  => "Сурагатныя пары UTF-16",
      symbols     => "Знакі",
      tags        => "Тэгі",
      unallocated => "Свабодныя кодавыя пазіцыі",
      variation   => "Варыянтныя селектары",
   },
   # Chinese using simplified characters
   "zh-cmn-Hans,zh-Hans,zh-CN" => {
      Africa      => "非洲文字",
      Americas    => "美洲文字",
      AsiaEast    => "东亚文字",
      AsiaSC      => "南亚及中亚文字",
      AsiaSE      => "东南亚文字",
      asOfVersion => "目前版本为Unicode %s",
      cuneiform   => "楔形文字",
      Europe      => "非拉丁欧洲文字",
      Han         => "中日韩汉字",
      hieroglyphs => "象形文字",
      IndOcean    => "印度尼西亚及大洋洲文字",
      Latin       => "拉丁文字",
      ME          => "中东及西南亚文字",
      misc        => "杂项字符",
      notation    => "符号系统",
      private     => "私人使用区",
      surrogates  => "UTF-16代理区",
      symbols     => "符号",
      tags        => "标签",
      unallocated => "未分配代码点",
      variation   => "变体选择符",
   },
   # Chinese using traditional characters
   "zh-cmn-Hant,zh-Hant,zh-TW" => {
      Africa      => "非洲文字",
      Americas    => "美洲文字",
      AsiaEast    => "東亞文字",
      AsiaSC      => "南亞及中亞文字",
      AsiaSE      => "東南亞文字",
      asOfVersion => "目前版本為Unicode %s",
      cuneiform   => "楔形文字",
      Europe      => "非拉丁歐洲文字",
      Han         => "中日韓漢字",
      hieroglyphs => "象形文字",
      IndOcean    => "印度尼西亞及大洋洲文字",
      Latin       => "拉丁文字",
      ME          => "中東及西南亞文字",
      misc        => "雜項字符",
      notation    => "符號系統",
      private     => "私人使用區",
      surrogates  => "UTF-16代理區",
      symbols     => "符號",
      tags        => "標籤",
      unallocated => "未分配代碼點",
      variation   => "變體選擇符",
   },
   # Czech
   cs => {
      Africa      => "Africká písma",
      Americas    => "Americká písma",
      AsiaEast    => "Východoasijská písma",
      AsiaSC      => "Písma jižní a střední Asie",
      AsiaSE      => "Písma jihovýchodní Asie",
      asOfVersion => "V Unicode %s",
      cuneiform   => "Klínové písmo",
      Europe      => "Nelatinková evropská písma",
      Han         => "Čínština, japonština a korejština",
      hieroglyphs => "Hieroglyfy",
      IndOcean    => "Písma Indonésie a Oceánie",
      Latin       => "Latinka",
      ME          => "Písma Blízkého a Středního východu",
      misc        => "Různé znaky",
      notation    => "Notační systémy",
      private     => "Pro soukromé použití",
      surrogates  => "Náhradní páry UTF-16 (surrogate pairs)",
      symbols     => "Symboly",
      tags        => "Jmenovky (tags)",
      unallocated => "Nepřidělené kódové body",
      variation   => "Selektory variant",
   },
   # Dutch
   nl => {
      Africa      => "Afrikaanse schriften",
      Americas    => "Noord- en Zuid-Amerikaanse schriften",
      AsiaEast    => "Oost-Aziatische schriften",
      AsiaSC      => "Zuid- en Centraal-Aziatische schriften",
      AsiaSE      => "Zuidoost-Aziatische schriften",
      asOfVersion => "Geldig voor Unicode \%s",
      cuneiform   => "Spijkerschrift",
      Europe      => "Niet-Latijnse Europese schriften",
      Han         => "CJK-karakters",
      hieroglyphs => "Hiërogliefen",
      IndOcean    => "Indonesische en Oceanische schriften",
      Latin       => "Latijnse schriften",
      ME          => "Midden-Oosterse en\nZuidwest-Aziatische schriften",
      misc        => "Diverse karakters",
      notation    => "Notatiesystemen",
      private     => "Privégebruik",
      surrogates  => "UTF-16-plaatsvervangers",
      symbols     => "Symbolen",
      tags        => "Tags",
      unallocated => "Niet toegekend",
      variation   => "Variantkeuzes",
   },
   # French
   fr => {
      Africa      => "Écritures africaines",
      Americas    => "Écriture américaine",
      AsiaEast    => "Écriture de l'Asie de l'Est",
      AsiaSC      => "Écriture de l'Asie centrale et du Sud",
      AsiaSE      => "Écriture de l'Asie du Sud-Est",
      asOfVersion => "À partir d'Unicode %s",
      cuneiform   => "Cunéiforme",
      Europe      => "Écriture européenne non latine",
      Han         => "Caractères CJK",
      hieroglyphs => "Hiéroglyphes",
      IndOcean    => "Écritures indonésiennes\net océaniennes",
      Latin       => "Écriture latine",
      ME          => "Écriture du Moyen-Orient\net de l'Asie du Sud-Ouest",
      misc        => "Caractères divers",
      notation    => "Systèmes de notation",
      private     => "Usage privé",
      surrogates  => "Seizet d'indirection de l'UTF-16",
      symbols     => "Symboles",
      tags        => "Étiquettes",
      unallocated => "Points de code non alloués",
      variation   => "Sélecteurs de variations",
   },
   # German
   de => {
      Africa      => "Afrikanische Schriften",
      Americas    => "Amerikanische Schriften",
      AsiaEast    => "Ostasiatische Schriften",
      AsiaSC      => "Süd- und Mittelasiatische\nSchriften",
      AsiaSE      => "Südostasiatische Schriften",
      asOfVersion => "Stand: Unicode \%s",
      cuneiform   => "Keilschrift",
      Europe      => "Andere europäische Schriften",
      Han         => "CJK-Ideogramme",
      hieroglyphs => "Hieroglyphen",
      IndOcean    => "Indonesische und ozeanische\nSchriften",
      Latin       => "Lateinische Schriften und Symbole",
      ME          => "Nahost- und Südwestasiatische\nSchriften",
      misc        => "Verschiedene Zeichen",
      notation    => "Notationssysteme",
      private     => "Privater Nutzungsbereich",
      surrogates  => "UTF-16-Surrogates",
      symbols     => "Symbole",
      tags        => "Tags",
      unallocated => "Nicht belegte Codebereiche",
      variation   => "Variantenselektoren",
   },
   # Hungarian
   hu => {
      Africa      => "Afrikai írásrendszerek",
      Americas    => "Amerikai írásrendszerek",
      AsiaEast    => "Kelet-ázsiai írásrendszerek",
      AsiaSC      => "Dél- és közép-ázsiai írásrendszerek",
      AsiaSE      => "Délkelet-ázsiai írásrendszerek",
      asOfVersion => "A Unicode \%s szerint",
      cuneiform   => "Ékírás",
      Europe      => "Nem latin betűs\neurópai írásrendszerek",
      Han         => "CJK (kínai, japán, koreai) karakterek",
      hieroglyphs => "Hieroglifák",
      IndOcean    => "Indonéziai és óceániai\nírásrendszerek",
      Latin       => "Latin betűs írás",
      ME          => "Közel-keleti és délnyugat-\názsiai írásrendszerek",
      misc        => "Egyéb karakterek",
      notation    => "Jelölésrendszerek",
      private     => "Saját használatú terület",
      surrogates  => "UTF-16-helyettesítők",
      symbols     => "Szimbólumok",
      tags        => "Címkék",
      unallocated => "Nem használt kódpontok",
      variation   => "Variációválasztók",
   },
   # Korean
   ko => {
      Africa      => "아프리카 문자",
      Americas    => "북미 및 남미 문자",
      AsiaEast    => "동아시아 문자",
      AsiaSC      => "남부와 중앙 아시아 문자",
      AsiaSE      => "동남아시아 문자",
      asOfVersion => "유니 코드 버전  \%s",
      cuneiform   => "쐐기 문자",
      Europe      => "기타 유럽 문자",
      Han         => "CJK 문자",
      hieroglyphs => "상형 문자",
      IndOcean    => "인도네시아, 오세아니아 문자",
      Latin       => "로마자, 로마자권 기호",
      ME          => "중동·서남아시아 문자",
      misc        => "기타 문자",
      notation    => "Notational systems",
      private     => "사용자 정의 영역",
      surrogates  => "UTF-16 상·하위 대체 영역",
      symbols     => "기호",
      tags        => "Tags",
      unallocated => "쓰이지 않음",
      variation   => "Variation Selectors",
   },
   # Persian
   fa => {
      Africa      => "خط‌های آفریقایی",
      Americas    => "خط‌های آمریکایی",
      AsiaEast    => "خط‌های آسیای شرقی",
      AsiaSC      => "خط‌های جنوب آسیا و آسیای میانه",
      AsiaSE      => "خط‌های جنوب شرق آسیا",
      asOfVersion => "تا یونی‌کد \%s",
      cuneiform   => "خط میخی",
      Europe      => "خط‌های اروپایی غیر لاتین",
      Han         => "اندیشه‌نگاری‌های CJK",
      hieroglyphs => "هیروگلیف‌ها",
      IndOcean    => "خط‌های اندونزی و اقیانوسیه",
      Latin       => "خط لاتین",
      ME          => "خط‌های خاورمیانه و جنوب آسیا",
      misc        => "نویسه‌های متفرقه",
      notation    => "نمادگان‌ها",
      private     => "کاربرد شخصی",
      surrogates  => "جایگزین‌های UTF-16",
      symbols     => "نمادها",
      tags        => "برچسب‌ها",
      unallocated => "موقعیت‌کدهای منتسب‌نشده",
      variation   => "انتخابگرهای گلیف",
   },
   # Portuguese
   pt => {
      Africa      => "Escrita africana",
      Americas    => "Escrita americana",
      AsiaEast    => "Escrita da Ásia Oriental",
      AsiaSC      => "Escrita da Ásia Central\ne do Sul",
      AsiaSE      => "Escrita do Sudeste Asiático",
      asOfVersion => "A partir do Unicode %s",
      cuneiform   => "Cuneiforme",
      Europe      => "Escrita europeia não latina",
      Han         => "Caracteres CJK",
      hieroglyphs => "Hieróglifos",
      IndOcean    => "Escrita indonésia e oceânica",
      Latin       => "Escrita latina",
      ME          => "Escrita do Oriente Médio\ne do Sudoeste Asiático",
      misc        => "Caracteres diversos",
      notation    => "Sistemas de notação",
      private     => "Uso privado",
      surrogates  => "Substitutos do UTF-16",
      symbols     => "Símbolos",
      tags        => "Etiquetas",
      unallocated => "Pontos de código não atribuídos",
      variation   => "Seletores de variação",
   },
   # Russian
   ru => {
      Africa      => "Письменности Африки",
      Americas    => "Письменности Америки",
      AsiaEast    => "Письменности Восточной Азии",
      AsiaSC      => "Письменности Южной и\nЦентральной Азии",
      AsiaSE      => "Письменности Юго-Восточной Азии",
      asOfVersion => "По состоянию на версию Юникода %s",
      cuneiform   => "Клинопись",
      Europe      => "Нелатинские европейские письменности",
      Han         => "Идеограммы ККЯ",
      hieroglyphs => "Иероглифы",
      IndOcean    => "Письменности Индонезии и Океании",
      Latin       => "Латинская письменность",
      ME          => "Письменности Среднего Востока и\nЮго-Западной Азии",
      misc        => "Разные символы",
      notation    => "Системы нотописи",
      private     => "Область для частного использования",
      surrogates  => "Суррогатные пары UTF-16",
      symbols     => "Знаки",
      tags        => "Тэги",
      unallocated => "Свободные кодовые позиции",
      variation   => "Вариантные селекторы",
   },
   # Spanish
   es => {
      Africa      => "Escrituras africanas",
      Americas    => "Escrituras americanas",
      AsiaEast    => "Escrituras de Asia Oriental",
      AsiaSC      => "Escrituras de Asia Meridional\ny Asia Central",
      AsiaSE      => "Escrituras del Sudeste Asiático",
      asOfVersion => "A partir de Unicode %s",
      cuneiform   => "Cuneiforme",
      Europe      => "Escrituras europeas no latinas",
      Han         => "Caracteres CJK",
      hieroglyphs => "Jeroglíficos",
      IndOcean    => "Escrituras indonesias y oceánicas",
      Latin       => "Escritura latina",
      ME          => "Escrituras del Oriente Medio\ny del Asia sudoccidental",
      misc        => "Caracteres varios",
      notation    => "Sistemas notacionales",
      private     => "Uso privado",
      surrogates  => "Sustitutos de UTF-16",
      symbols     => "Símbolos",
      tags        => "Etiquetas",
      unallocated => "Puntos de código no asignados",
      variation   => "Selectores de variación",
   },
   # Turkish
   tr => {
      Africa      => "Afrika yazıları",
      Americas    => "Amerikan yazını",
      AsiaEast    => "Doğu Asya yazıları",
      AsiaSC      => "Güney ve Orta Asya yazıları",
      AsiaSE      => "Güneydoğu Asya yazını",
      asOfVersion => "Unicode %s'den itibaren",
      cuneiform   => "Çivi Yazısı",
      Europe      => "Latin olmayan Avrupa yazıları",
      Han         => "Çince ve Japonca karakterler",
      hieroglyphs => "Hiyeroglifler",
      IndOcean    => "Endonezya ve Okyanusya yazıları",
      Latin       => "Latince yazı",
      ME          => "Orta Doğu ve Güneybatı Asya yazını",
      misc        => "Çeşitli karakterler",
      notation    => "Notasyonel sistemler",
      private     => "Özel kullanım",
      surrogates  => "UTF-16 vekilleri",
      symbols     => "Semboller",
      tags        => "Etiketler",
      unallocated => "Ayrılmamış kod noktaları",
      variation   => "Varyasyon Seçiciler",
   },
   # Ukrainian
   uk => {
      Africa      => "Писемності Африки",
      Americas    => "Писемності Америки",
      AsiaEast    => "Писемності Східної Азії",
      AsiaSC      => "Писемності Південної і\nЦентральної Азії",
      AsiaSE      => "Писемності Південно-Східної Азії",
      asOfVersion => "Станом на версію Юнікоду %s",
      cuneiform   => "Клинопис",
      Europe      => "Нелатинські європейські писемності",
      Han         => "Ідеограми ККЯ",
      hieroglyphs => "Ієрогліфи",
      IndOcean    => "Писемності Індонезії та Океанії",
      Latin       => "Латинська писемність",
      ME          => "Писемності Середнього Сходу і\nПівденно-Західної Азії",
      misc        => "Різні символи",
      notation    => "Системи нотописі",
      private     => "Область для приватного використання",
      surrogates  => "Сурогатні пари UTF-16",
      symbols     => "Знаки",
      tags        => "Теги",
      unallocated => "Вільні кодові позиції",
      variation   => "Варіантні селектори",
   },
);


if($opt_test) {
   # this is just for testing... it causes the specified language to be printed instead of English
   if(exists($legendTextByLanguageAndGroup{$opt_test})) {
      warn "\nWarning: Overwriting systemLanguage=en values with $opt_test text for testing purposes\n";
      $legendTextByLanguageAndGroup{'en'} = $legendTextByLanguageAndGroup{$opt_test}; # test a language
      if(exists($isRTL{$opt_test})) {
         $isRTL{'en'} = undef; # use same text direction as the specified language
      }
   } else {
      print STDERR "Unsupported language code $opt_test specified on -test option\nSupported language codes:";
      foreach (sort customLanguageSort keys %legendTextByLanguageAndGroup) {
         print STDERR " $_";
      }
      print STDERR "\n";
      exit 1;
  }
}

my $defaultLanguage = 'en';
$legendTextByLanguageAndGroup{'default'} = $legendTextByLanguageAndGroup{$defaultLanguage}; # set the default language

our %legendByPlane;
our %dataByPlane;

our $version = '16.0'; # Unicode version to add to the legend

our %planeAcronymByNumber = (
   0, 'BMP',
   1, 'SMP',
   2, 'SIP',
   3, 'TIP',
   14, 'SSP',
);
our %planeNameByNumber = (
   0, 'Basic Multilingual Plane',
   1, 'Supplementary Multilingual Plane',
   2, 'Supplementary Ideographic Plane',
   3, 'Tertiary Ideographic Plane',
   14, 'Supplementary Special-purpose Plane',
);
# Note: Unicode has not published identifying names for planes 15 and 16.
# Chapter 2.8 of the Unicode Standard says "The two Private Use Planes (Planes 15 and 16)",
# while the PUA block names used are Supplementary PUA-A and Supplementary PUA-B.

# --------------- Plane 0 Data (BMP)  --------------- 
$legendByPlane{0} = [qw(Latin Europe Africa ME AsiaSC AsiaSE AsiaEast Han IndOcean Americas notation symbols private surrogates unallocated asOfVersion)];
#                    FYI: BMP legend omits "misc" and "variation" groups because they're too small to see.
$dataByPlane{0} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[000 0 Latin
    003 0 Europe
    005 9 ME
    007 8 AsiaSC
    007 C Africa
    008 0 ME
    009 0 AsiaSC
    00E 0 AsiaSE
    00F 0 AsiaSC
    010 0 AsiaSE
    010 A Europe
    011 0 AsiaEast
    012 0 Africa
    013 A Americas
    016 8 Europe
    017 0 IndOcean
    017 8 AsiaSE
    018 0 AsiaSC
    018 B Americas
    019 0 AsiaSC
    019 5 AsiaSE
    01A 0 IndOcean
    01A 2 AsiaSE
    01A B Europe
    01B 0 IndOcean
    01C 0 AsiaSC
    01C 8 Europe
    01C C IndOcean
    01C D AsiaSC
    01D 0 Latin
    01D C Europe
    01E 0 Latin
    01F 0 Europe
    020 7 symbols
    028 0 notation
    029 0 symbols
    02C 0 Europe
    02C 6 Latin
    02C 8 Europe
    02D 3 Africa
    02D E Europe
    02E 0 symbols
    02E 8 Han
    02F E unallocated
    02F F Han
    030 0 AsiaEast
    031 C Han
    031 F AsiaEast
    032 0 symbols
    034 0 Han
    04D C symbols
    04E 0 Han
    0A0 0 AsiaEast
    0A5 0 Africa
    0A6 4 Europe
    0A6 A Africa
    0A7 0 AsiaEast
    0A7 2 Latin
    0A8 0 AsiaSC
    0A8 3 symbols
    0A8 4 AsiaSC
    0A9 0 AsiaSE
    0A9 3 IndOcean
    0A9 6 AsiaEast
    0A9 8 IndOcean
    0A9 E AsiaSE
    0AA E AsiaSC
    0AB 0 Africa
    0AB 3 Latin
    0AB 7 Americas
    0AB C AsiaSC
    0AC 0 AsiaEast
    0D8 0 surrogates
    0E0 0 private
    0F9 0 Han
    0FB 0 Latin
    0FB 1 Europe
    0FB 2 ME
    0FE 0 variation
    0FE 1 AsiaEast
    0FE 2 Europe
    0FE 3 AsiaEast
    0FE 7 ME
    0FF 0 AsiaEast
    0FF F misc];

# --------------- Plane 1 Data (SMP)  --------------- 
$legendByPlane{1} = [qw(Latin Europe Africa ME AsiaSC AsiaSE AsiaEast IndOcean Americas cuneiform hieroglyphs notation symbols unallocated asOfVersion)];
$dataByPlane{1} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[100 0 Europe
    102 0 unallocated
    102 8 Europe
    103 8 cuneiform
    103 E unallocated
    104 0 Americas
    104 5 Europe
    104 8 Africa
    104 B Americas
    105 0 Europe
    107 8 Latin
    107 C unallocated
    108 0 Europe
    108 4 ME
    108 B unallocated
    108 E ME
    109 2 Europe
    109 4 unallocated
    109 8 hieroglyphs
    10A 0 AsiaSC
    10A 6 ME
    10A A unallocated
    10A C ME
    10B B unallocated
    10C 0 AsiaSC
    10C 5 unallocated
    10C 8 Europe
    10D 0 AsiaSE
    10D 4 Africa
    10D 9 unallocated
    10E 6 symbols
    10E 8 ME
    10F 0 AsiaSC
    10F E ME
    110 0 AsiaSC
    112 5 unallocated
    112 8 AsiaSC
    114 E unallocated
    115 8 AsiaSC
    116 D AsiaSE
    117 0 AsiaSC
    117 5 unallocated
    118 0 AsiaSC
    118 5 unallocated
    118 A AsiaSC
    119 6 unallocated
    119 A AsiaSC
    11A B Americas
    11A C AsiaSE
    11B 0 AsiaSC
    11B 6 unallocated
    11B C AsiaSC
    11C C unallocated
    11D 0 AsiaSC
    11D B unallocated
    11E E IndOcean
    11F 6 unallocated
    11F B AsiaEast
    11F C AsiaSC
    120 0 cuneiform
    125 5 unallocated
    12F 9 Europe
    130 0 hieroglyphs
    146 8 unallocated
    161 0 AsiaSC
    161 4 unallocated
    168 0 Africa
    16A 4 AsiaSC
    16A D Africa
    16B 0 AsiaSE
    16B 9 unallocated
    16D 4 AsiaSC
    16D 8 unallocated
    16E 4 Africa
    16E A unallocated
    16F 0 AsiaEast
    16F A unallocated
    16F E AsiaEast
    18D 8 unallocated
    1AF F AsiaEast
    1B3 0 unallocated
    1BC 0 notation
    1BC B unallocated
    1CC 0 symbols
    1CE C unallocated
    1CF 0 notation
    1CF D unallocated
    1D0 0 notation
    1D2 5 unallocated
    1D2 C symbols
    1D3 8 unallocated
    1D4 0 symbols
    1D8 0 notation
    1DA B unallocated
    1DF 0 Latin
    1E0 0 Europe
    1E0 9 unallocated
    1E1 0 AsiaSE
    1E1 5 unallocated
    1E2 9 AsiaSC
    1E3 0 unallocated
    1E4 D AsiaSC
    1E5 0 unallocated
    1E5 D AsiaSC
    1E6 0 unallocated
    1E7 E Africa
    1E8 E unallocated
    1E9 0 Africa
    1E9 6 unallocated
    1EC 7 symbols
    1EC C unallocated
    1ED 0 symbols
    1ED 5 unallocated
    1EE 0 symbols
    1EF 0 unallocated
    1F0 0 symbols
    1FC 0 unallocated];

# --------------- Plane 2 Data (SIP)  --------------- 
$legendByPlane{2} = [qw(Han unallocated asOfVersion)];
$dataByPlane{2} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[200 0 Han
    2A6 E unallocated
    2A7 0 Han
    2EE 6 unallocated
    2F8 0 Han
    2FA 2 unallocated];

# --------------- Plane 3 Data (TIP)  --------------- 
$legendByPlane{3} = [qw(Han unallocated asOfVersion)];
$dataByPlane{3} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[300 0 Han
    323 B unallocated];

# --------------- Plane 4 Data  --------------- 
$legendByPlane{4} = [qw(unallocated asOfVersion)];
$dataByPlane{4} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[400 0 unallocated];

# --------------- Plane 5 Data  --------------- 
$legendByPlane{5} = [qw(unallocated asOfVersion)];
$dataByPlane{5} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[500 0 unallocated];

# --------------- Plane 6 Data  --------------- 
$legendByPlane{6} = [qw(unallocated asOfVersion)];
$dataByPlane{6} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[600 0 unallocated];

# --------------- Plane 7 Data  --------------- 
$legendByPlane{7} = [qw(unallocated asOfVersion)];
$dataByPlane{7} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[700 0 unallocated];

# --------------- Plane 8 Data  --------------- 
$legendByPlane{8} = [qw(unallocated asOfVersion)];
$dataByPlane{8} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[800 0 unallocated];

# --------------- Plane 9 Data  --------------- 
$legendByPlane{9} = [qw(unallocated asOfVersion)];
$dataByPlane{9} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[900 0 unallocated];

# --------------- Plane 10 Data  --------------- 
$legendByPlane{10} = [qw(unallocated asOfVersion)];
$dataByPlane{10} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[A00 0 unallocated];

# --------------- Plane 11 Data  --------------- 
$legendByPlane{11} = [qw(unallocated asOfVersion)];
$dataByPlane{11} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[B00 0 unallocated];

# --------------- Plane 12 Data  --------------- 
$legendByPlane{12} = [qw(unallocated asOfVersion)];
$dataByPlane{12} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[C00 0 unallocated];

# --------------- Plane 13 Data  --------------- 
$legendByPlane{13} = [qw(unallocated asOfVersion)];
$dataByPlane{13} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[D00 0 unallocated];

# --------------- Plane 14 Data (SSP)  --------------- 
$legendByPlane{14} = [qw(tags variation unallocated asOfVersion)];
$dataByPlane{14} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[E00 0 tags
    E00 8 unallocated
    E01 0 variation
    E01 F unallocated];

# --------------- Plane 15 Data  --------------- 
$legendByPlane{15} = [qw(private asOfVersion)];
$dataByPlane{15} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[F00 0 private];

# --------------- Plane 16 Data  --------------- 
$legendByPlane{16} = [qw(private asOfVersion)];
$dataByPlane{16} =
#   abc d group # comment (for a starting code point of U+abcd0)
 qq[000 0 private]; # "000 0" looks wrong but works

# ---------------------------------------------------

# See https://wiki.riteme.site/wiki/Web_colors when selecting new colors
our %legendColorByGroup = (
   Africa => 'lightgreen',
   Americas => 'yellow',
   AsiaEast => 'crimson',
   AsiaSC => 'darkgreen',
   AsiaSE => 'purple',
   cuneiform => 'rosybrown',
   Europe => 'blue',
   Han => 'salmon',
   hieroglyphs => 'cornflowerblue',
   IndOcean => 'sienna',
   Latin => 'black',
   ME => 'orange',
   misc => 'pink',
   notation => 'cyan',
   private => 'darkgray',
   surrogates => 'lightgray',
   symbols => 'orchid',
   tags => 'slategray',
   unallocated => 'white',
   variation => 'darkkhaki',
);

# pick text color based on background color
our %textColors = (
   map({$_ => 'black'}
      qw(cornflowerblue crimson cyan darkgray darkkhaki lightblue lightgray lightgreen orange orchid pink rosybrown salmon slategray white yellow)),
   map({$_ => 'white'}
      qw(black blue darkgreen purple sienna)),
);

# make sure every group color has a contrasting text color:
foreach (keys %legendColorByGroup) {
   if(!exists($textColors{$legendColorByGroup{$_}})) {
      die "Fatal data error: Need to define contrasting text color for \"$legendColorByGroup{$_}\" in \%textColors";
   }
}

# warn of duplicate colors
my %legendGroupByColor = ();
foreach (keys %legendColorByGroup) {
   $legendGroupByColor{$legendColorByGroup{$_}} .= "$_,";
}
foreach (sort keys %legendGroupByColor) {
   @_ = split /,/, $legendGroupByColor{$_};
   if(scalar @_ > 1) {
      warn "Warning: Color \"$_\" is used by multiple groups: @_\n";
   }
}

our $gradientDef;
our $gradientID;
 
sub stripes {
   # This creates a "gradient" of distinct vertical stripes.
   # Its arguments should be # the starting x-coordinate of the gradient,
   # the ending x-coordinate, an SVG color, and then any number of stops.
   # Each stop should be an array reference containing a stop # location 
   # (expressed as a number between 0 and 1) and a color.
   # The subroutine returns a string you can set a stroke or fill
   # attribute to to use the gradient.
   my ($x1, $x2, $firstColor, @stops) = @_;
   my $gradientElement = 
         $gradientDef->gradient(-type => 'linear',
                                gradientUnits => "userSpaceOnUse",
                                id => 'grad' . ++$gradientID,
                                x1 => $x1, x2 => $x2);
   $gradientElement->stop(offset => '0%', 'stop-color' => $firstColor);
   my $lastColor = $firstColor;
   foreach (@stops) {
      my $percent = 100*$_->[0] . '%';
      $gradientElement->stop(offset => $percent, 'stop-color' => $lastColor);
      $gradientElement->stop(offset => $percent, 'stop-color' => $_->[1]);
      $lastColor = $_->[1];
   }
   $gradientElement->stop(offset => '100%', 'stop-color' => $lastColor);
   return "url(#grad$gradientID)";
}
 
sub determineTextColor {
   # Given the same arguments as &stripes, returns a value to use for the "fill"
   # of text overlaying the given colors. This may be # a solid color instead of a gradient.
   my ($x1, $x2, $firstColor, @stops) = @_;
   my $lastTextColor = $textColors{$firstColor};
   $firstColor = $lastTextColor;
   for (my $n = 0 ; $n < @stops ; ++$n) {
      my $thisTextColor = $textColors{$stops[$n][1]};
      if ($thisTextColor eq $lastTextColor) {
         # This stop is redundant, so we can remove it.
         splice(@stops, $n, 1);
         $n < @stops ? redo : last;
      }
      $stops[$n][1] = $thisTextColor;
      $lastTextColor = $thisTextColor;
   }
   return (@stops
           ? stripes($x1, $x2, $firstColor, @stops) 
           : # We can just return a solid color.
           $firstColor);
}
 
# create images for each desired plane
my @desiredPlanes;
if($opt_all) {
   @desiredPlanes = sort { $a <=> $b } keys %dataByPlane;
} else {
   @desiredPlanes = sort { $a <=> $b } keys %planeAcronymByNumber;
}
foreach my $thisPlane (@desiredPlanes) {
   my @scripts = @{$legendByPlane{$thisPlane}};
   my $dataString = $dataByPlane{$thisPlane};
   if(($dataString =~ tr/\n//) == 0) {
      # We need at least two lines so just duplicate the single line
      $dataString .= "\n$dataString";
   }
   $gradientID = -1;

   # Process $dataString
   $dataString =~ s {\#.+} {}gm;
   our @d = ();
   {
      foreach (split /\s*\n\s*/, $dataString) {
         /\S/ or next;
         /(.)(.)(.)\s+(.)\s+(.+)/;
         die "Fatal data error: \"$5\" not found in \%legendColorByGroup\n"
            unless defined($legendColorByGroup{$5});
         push(@d, [hex($2), hex($3), hex($4), $legendColorByGroup{$5}, hex($1)]);
      }
   }
 
   # Set up the SVG
   my $sideLength = 550;
   # Height and width of the roadmap square in pixels.
   # The following sizes are expressed as fractions of
   # $sideLength.
   my $lineWidth = 1/250; # The width of the divider lines.
   my $legendWidth = 7/10; # Width of the margin used for the legend (was 1/2 but was made bigger for various languages)
   my $legendBoxSpace = 1/30; # Space between the rectangle for each legend and the right edge of the roadmap square.
   my $legendTopMargin = 1/30; # Space between the first box of the legend and the top of the image.
   my $legendBoxWidth = 1/20;
   my $legendBoxHeightDefault = 2/77; # NOTE: This affects the legend text size too.
   #  $legendBoxHeightDefault was 1/40 (24/960) but was changed to this ridiculous fraction so that most languages will use 13.00px for font-size.
   my $legendTextSpace = 1/50; # Space between the left edge of each legend box and its descriptive text.
   my $legendLineBreak = 1/50;
   $$_ *= $sideLength
      foreach (\$lineWidth, \$legendWidth, \$legendBoxSpace,
               \$legendBoxHeightDefault, \$legendTopMargin,
               \$legendBoxWidth, \$legendTextSpace,
               \$legendLineBreak);
 
   our $svg = new SVG(-encoding => 'UTF-8',
                      -printerror => 1, # print processing errors to STDERR
                      width => $sideLength, # width without legend
                      height => $sideLength);

   # create <title>
   my $title;
   if(defined($planeAcronymByNumber{$thisPlane})) {
      $title = "Roadmap to the Unicode $planeAcronymByNumber{$thisPlane}";
   } else {
      $title = "Roadmap to Unicode Plane $thisPlane";
   }
   $svg->title->cdata($title);

   # create <desc>
   my($year, $month, $day);
   ($_,$_,$_,$day,$month,$year) = localtime(time);
   my $desc = "This chart is a roadmap of character allocation for Unicode plane $thisPlane";
   if(exists($planeNameByNumber{$thisPlane})) {
      $desc .= ": $planeNameByNumber{$thisPlane}";
   }
   if(exists($planeAcronymByNumber{$thisPlane})) {
      $desc .= " ($planeAcronymByNumber{$thisPlane})";
   }
   $desc .= sprintf ".  It was created on %04d-%02d-%02d using Unicode $version data.", $year+1900, $month+1, $day;
   if(scalar @scripts > 3) {
      $desc .= "  Characters are categorized more-or-less by the chapters in The Unicode Standard.";
   }
   $svg->desc->cdata($desc);

   $gradientDef = $svg->defs;
   # Declare this here to ensure that the gradient definitions appear in the file before anything else,
   # especially the rectangles that reference them.
   $svg->rectangle(x => 0,
                   y => 0,
                   width => $sideLength, # width without legend
                   height => $sideLength, 
                   fill => 'white');
   our $rectangleGroup = $svg->group('stroke-width' => ($lineWidth . 'px'),
                                     'stroke' => 'gainsboro');

   our $squareSideLength = ($sideLength - $lineWidth) / 16;

   # determine font size for code point numbers
   my $hexFontSize;
   if($thisPlane == 16) {
      # small font size to accommodate four digits
      $hexFontSize = $squareSideLength/2.8;
   } elsif($thisPlane == 0) {
      # large font size for two digits
      $hexFontSize = $squareSideLength/2.2;
   } else {
      # medium font size for three digits
      $hexFontSize = $squareSideLength/2.4;
   }
   $hexFontSize = 0 + sprintf "%.1f", $hexFontSize; # round to one decimal place

   # this defines each code point box
   our $roadmapTextGroup = $svg->group('text-anchor' => 'middle',
                                       'font-family' => $defaultFontFamily,
                                       'font-size' => ($hexFontSize . 'px'));
 
   # Draw the roadmap square
   {
      my $lastColorUsed = shift(@d)->[3]; # The last color we used.
      my @nextStop = @{shift @d}; # The next stop (equivalent to one line of the $dataString).
      foreach my $y (0 .. 15) {
         foreach my $x (0 .. 15) {
            # $y and $x correspond to the first and second digits, respectively, of each character's code point
            my $xp = $lineWidth/2 + $x*$squareSideLength;
            my $yp = $lineWidth/2 + $y*$squareSideLength;
            my ($squareFill, $textFill);
            my @stopsHere = ();
            # Stops that occur in this square.
            while (@nextStop and $nextStop[0] == $y and $nextStop[1] == $x) {
               push(@stopsHere, [@nextStop]);
               @nextStop = (@d ? @{shift @d} : ());
            }
            if (@stopsHere) {
               $stopsHere[0][2] or $lastColorUsed = shift(@stopsHere)->[3];
               my @args = ($xp,
                           $xp + $squareSideLength,
                           $lastColorUsed,
                           map {[ $_->[2]/16, $_->[3] ]} @stopsHere);
               $squareFill = stripes(@args);
               $textFill = determineTextColor(@args);
               @stopsHere and $lastColorUsed = $stopsHere[-1][3];
            } else {
               $squareFill = $lastColorUsed;
               $textFill = $textColors{$squareFill};
            }
            $rectangleGroup->rectangle(x => $xp, y => $yp,
                                       width => $squareSideLength,
                                       height => $squareSideLength,
                                       fill => $squareFill);
            $roadmapTextGroup->text(x => ($xp + $squareSideLength/2),
                                    y => ($yp + (2/3)*$squareSideLength),
                                    fill => $textFill
                                   )->cdata(sprintf('%.0X%X%X', $thisPlane, $y, $x));
         }
      }
   }
 
   # Create output without legend
   my $xmlOutput = $svg->xmlify;
   if(!$opt_test) {
      writeFile($xmlOutput, $thisPlane, "no legend");
   }

   # Draw the legend
   {
      my $legendRectanglesGroup = $svg->group('stroke-width' => $lineWidth . 'px',
                                              stroke => 'black');
      my $legendRectanglesSwitch = $legendRectanglesGroup->switch('allowReorder' => 'yes');

      $legendBoxHeightDefault = 0 + sprintf "%.1f", $legendBoxHeightDefault; # round to one decimal digit
      my $legendWordsGroup = $svg->group('text-anchor' => 'start',
                                         'font-family' => $defaultFontFamily,
                                         'font-size' => $legendBoxHeightDefault . 'px');
      my $legendWordsSwitch = $legendWordsGroup->switch('allowReorder' => 'yes');
      foreach my $thisLanguage (sort customLanguageSort keys %legendTextByLanguageAndGroup) {
         my $x;
         if(exists($isRTL{$thisLanguage})) {
            $x = ceil($sideLength + $legendWidth - $legendBoxSpace - $legendBoxWidth - $lineWidth / 2);
         } else {
            $x = ceil($sideLength + $legendBoxSpace + $lineWidth / 2);
         }
         my $y = floor($legendTopMargin + $lineWidth / 2);

         # Create the group for the little color boxes in the legend for this specific language
         my $legendRectanglesThisLanguage = $legendRectanglesSwitch->group();

         # Create the group for the words in the legend for this specific language
         my $legendWordsThisLanguage = $legendWordsSwitch->group();

         # Override the default font if necessary
         if(defined($fontFamilyOverride{$thisLanguage})) {
            $legendWordsThisLanguage->setAttribute('font-family' => $fontFamilyOverride{$thisLanguage});
         }

         # Make text larger for some languages because they have more space:
         my $legendBoxHeight = $legendBoxHeightDefault;
         if($thisLanguage eq 'ko' || $opt_test eq 'ko') {
            # Korean
            $legendBoxHeight = 0 + sprintf "%.1f", $legendBoxHeightDefault * 1.1;
            $legendWordsThisLanguage->setAttribute('font-size' => $legendBoxHeight);
         } elsif($thisLanguage eq 'fa' || $opt_test eq 'fa') {
            # Persian
            $legendBoxHeight = 0 + sprintf "%.1f", $legendBoxHeightDefault * 1.2;
            $legendWordsThisLanguage->setAttribute('font-size' => $legendBoxHeight);
         }

         if($thisLanguage eq 'default') {
            # Add default xml:lang language
            $legendWordsThisLanguage->setAttribute('xml:lang' => $defaultLanguage);
         } else {
            # Add systemLanguage to both legend groups (used to choose which language to show)
            $legendRectanglesThisLanguage->setAttribute(systemLanguage => $thisLanguage);
            $legendWordsThisLanguage->setAttribute(systemLanguage => $thisLanguage);
            # Add xml:lang language (not used to choose language but can affect font choice)
            my $xmlLang = $thisLanguage;
            $xmlLang =~ s/,.*$//; # xml:lang only handles one language so use the first in the list
            $legendWordsThisLanguage->setAttribute('xml:lang' => $xmlLang);
            if(exists($isRTL{$thisLanguage})) {
               # Set anchor for words (reversed for RTL)
               $legendWordsThisLanguage->setAttribute('text-anchor' => 'end');
            }
         }

         foreach my $thisGroup (@scripts) {
            if($thisGroup ne "asOfVersion") {
               $legendRectanglesThisLanguage->rectangle(x => $x,
                                            y => $y,
                                            width => $legendBoxWidth,
                                            height => $legendBoxHeight,
                                            fill => $legendColorByGroup{$thisGroup});
            }
            $y += $legendLineBreak;
            my @txt = split /\n/, $legendTextByLanguageAndGroup{$thisLanguage}{$thisGroup};
            foreach my $thisLine (@txt) {
               if($thisGroup eq "asOfVersion") {
                  $x -= $legendBoxWidth;
                  $y += $legendLineBreak;
                  $thisLine = sprintf $legendTextByLanguageAndGroup{$thisLanguage}{$thisGroup}, $version;
                  if($thisLanguage eq 'fa' || $opt_test eq 'fa') {
                     $thisLine =~ tr/0-9./۰-۹٫/; # switch to Persian numerals
                  }
               }
               if(exists($isRTL{$thisLanguage})) {
                  $legendWordsThisLanguage->text(x => ($x - int($legendBoxWidth / 3)),
                                      y => $y
                                     )->cdata($thisLine);
               } else {
                  $legendWordsThisLanguage->text(x => ($x + $legendBoxWidth + $legendTextSpace),
                                      y => $y
                                     )->cdata($thisLine);
               }
               $y += (5/4)* sprintf "%.2f", $legendBoxHeight;
            }
         }
      }
   }
 

   # Create output with legend
   $xmlOutput = $svg->xmlify;
   my $newWidth = $sideLength + $legendWidth; # adjust width to include the legend
   $xmlOutput =~ s/width="$sideLength"/width="$newWidth"/g;
   if($opt_test) {
      # Add dashed lines to the legend to make it easier to determine if the text is going to be truncated
      $xmlOutput =~ s/(<\/svg>)/<line x1=\"$newWidth\" y1=\"0\" x2=\"$newWidth\" y2=\"$sideLength\" style=\"stroke:rgb(0,0,0);stroke-width:2;stroke-dasharray:5,5\" \/><line x1=\"$sideLength\" y1=\"$sideLength\" x2=\"$newWidth\" y2=\"$sideLength\" style=\"stroke:rgb(0,0,0);stroke-width:2;stroke-dasharray:5,5\" \/>$1/;
   }
   writeFile($xmlOutput, $thisPlane, "multilingual");
 

   # Create monolingual English version
   $xmlOutput = $svg->xmlify;
   $newWidth = $sideLength * 1.5; # adjust width to include the smaller legend
   $xmlOutput =~ s/width="$sideLength"/width="$newWidth"/g;
   # this is a hack... ideally it should alter the groups themselves using the SVG package but regex is easier to get working (and easier to break)
   $xmlOutput =~ s/\s*<g[^>]+?systemLanguage.*?<\/g>//gms; # delete all groups with systemLanguage
   if($xmlOutput =~ m/systemLanguage/) {
      warn "Warning: systemLanguage still present, monolingual versions probably wrong\n";
   } else {
      $xmlOutput =~ s/\s*<switch\s*[^>]*?>\s*//gms;   # delete all <switch>s
      $xmlOutput =~ s/\s*<\/switch\s*>\s*//gms; # delete all </switch>s
      if($xmlOutput =~ m/<switch/) {
         warn "Warning: switch still present, monolingual versions probably wrong\n";
      } else {
         # condense emtpy groups
         $xmlOutput =~ s/(<g [^>]+?>)<g\s*>/$1/gms;
         $xmlOutput =~ s/(<g [^>]+?)><g( xml:lang="..+?")>/$1$2>/gms;
         $xmlOutput =~ s/(<\/g>)<\/g>/$1/gms;
         # tidy up
         $xmlOutput =~ s/\t\t\t\t/\t\t/g;
         $xmlOutput =~ s/\t\t\t/\t\t/g;
         $xmlOutput =~ s/\t(\t<\/g>)/$1/g;
         $xmlOutput =~ s/(<\/g>)(<g)/$1\n\t$2/g;
         $xmlOutput =~ s/(<\/g>)(<\/svg)/$1\n$2/g;
      }
   }
   if(!$opt_test) {
      writeFile($xmlOutput, $thisPlane, "en");
   }

} # End of plane loop

sub writeFile {
   my $txt = $_[0];
   my $planeNumber = $_[1];
   my $suffix = $_[2];

   # Fix-ups: # Remove extra space in <text> elements. Inkscape ignores it, but
   # librsvg treats it like a normal character, thus messing up text alignment.
   $txt =~ s{\s+</text>\s} {</text>\n}g;
   # Do the same for other elements as well
   $txt =~ s{\s+</g>\s} {</g>\n};
   $txt =~ s{\s+</title>\s} {</title>\n};
   $txt =~ s{\s+</switch>\s} {</switch>\n};
   # Get rid of space after elements with no attributes
   $txt =~ s{<([A-Za-z]+)\s+>} {<$1>}g;
   # Change to Unix-style newlines if necessary.
   $txt =~ s{\015\012?} {\012}g;
   # Remove comment containing non-existant www.roitsystems.com URL
   $txt =~ s/(<!--[^>]*?www.roitsystems.com.*?-->)//gsm;
   # Remove blank lines
   $txt =~ s/^\s*\n+//mg;
   # Fix odd problem with 0x80-0xff
   # not sure why but 0x80-0xff get turned into " &    #nnn;" where nnn is the decimal character value
   $txt =~ s/ &    #([0-9]+);/chr($1)/ge;

   # Determine file name
   my $filename;
   if(defined($planeAcronymByNumber{$planeNumber})) {
      $filename = "map $planeAcronymByNumber{$planeNumber} $suffix.svg";
   } else {
      $filename = sprintf "map Z%.2i $suffix.svg", $planeNumber;
   }
   $filename =~ s/ /_/g; # replace spaces with underscores

   # write the file
   open(OUT, "> $outputDirectory/$filename") ||
      die("Can't open $outputDirectory/$filename\n");
   binmode(OUT, ":utf8");
   print OUT $txt;
   close(OUT);
}

sub customLanguageSort
{
   # Always put the default last
   if($a eq "default") {
      return 1; 
   }
   if($b eq "default") {
      return -1;
   }

   # otherwise sort alphabetically without regard to case
   return lc($a) cmp lc($b);
}

# end of script