#!/usr/bin/perl -w # Unix/Linux locales # setlocale(LC_CTYPE, "de_AT.ISO8859-15"); # Windows locales # setlocale(LC_CTYPE, "Russian_Russia.866"); # setlocale(LC_CTYPE, "English_United Kingdom.1252"); use strict; use vars qw($lang); BEGIN { $lang= $ENV{LANG} || ''; print "LANG is $lang\n"; if ($lang) { require locale; import locale (); } } my $locale_bit; print "\$^H is $^H \n"; # Locale bit is 0x4 on ActivePerl build 633 or higher... $locale_bit = ($^H & 0x800)? 1 : 0 ; print "Locale bit is $locale_bit \n"; if ( exists $INC{'locale.pm'} ) { print "Locale loaded\n"; } else { print "Locale not loaded\n"; } use POSIX qw(locale_h); my $cur_locale = setlocale(LC_CTYPE); print "Locale is $cur_locale\n"; setlocale(LC_CTYPE, $lang); setlocale(LC_COLLATE, $lang); $cur_locale = setlocale(LC_CTYPE); print "Locale now is $cur_locale\n"; # setlocale(LC_CTYPE, "C"); # Test collation sequence for sorting print +(sort grep /\w/, map { chr() } 0..255), "\n"; # NOTE: ActivePerl's locale support is very broken for the UK locale above! # Weird non-alpha characters appear as if lower case or upper case of # completely different characters... # NOTE: fr_FR.ISO8859-1 on Debian includes '_' and '-' in the \w character # class - en_US includes only '_'... Same locale on 5.8 includes 'mu'. # Sorting of UTF8 characters doesn't seem to work... foreach my $charno ( 0..255 ) { my $char = chr ($charno); if ( (uc $char) eq $char and (lc $char) ne $char ) { printf "upper: %d %c\t", $charno, $charno; my $lower = lc $char; printf "lc: %d %c\n", ord $lower, ord $lower; } if ( lc($char) eq $char and uc $char ne $char ) { printf "lower: %d %c\t", $charno, $charno; my $upper = uc $char; printf "uc: %d %c\n", ord $upper, ord $upper; } }