Skip to content

Commit

Permalink
Item13328: Issues with Site Locale & Charset checkers
Browse files Browse the repository at this point in the history
 - Original checkers were "bootstrapping" these settings.  Removed that
   code from the checkers and into Load::bootstrap

 - Need to consistency check Locale and CharSet, so the checkers also
   need to cross-specify each other.  Any change checks both.

 - If the CharSet checker detects an invalid CharSet,  it needs to
   overrid the config temporarily, or the checker will be unable to
   generate any output.

 - Attempt to bootstrap a Locale and CharSet.  Seems to do a better job
   from the cli shell  tools/configure, where it can detect the default
   charset of the shell.  Under Apache it comes up with Locale C.
  • Loading branch information
gac410 committed Mar 27, 2015
1 parent ef1811b commit b7c10da
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 50 deletions.
11 changes: 5 additions & 6 deletions core/lib/Foswiki.spec
Expand Up @@ -1282,12 +1282,11 @@ $Foswiki::cfg{LanguageFileCompression} = $FALSE;
# **BOOLEAN LABEL="Use Locale" EXPERT**
# Enable the use of {Site}{Locale}. WARNING: Perl locales are badly broken
# in some versions of perl. For this reason locales are disabled in Foswiki.
# If you enable them they can be made to work, but you will have to disable
# taint checks, and collation will only work with single-byte character
# sets.
# If you enable them they can be made to work, but collation will only work
# with single-byte character sets.
$Foswiki::cfg{UseLocale} = $FALSE;

# **STRING 50 LABEL="Site Locale" DISPLAY_IF="{UseLocale}" CHECK="iff:'{UseLocale}'"**
# **STRING 50 LABEL="Site Locale" DISPLAY_IF="{UseLocale}" CHECK="iff:'{UseLocale}'" CHECK="also:{Site}{CharSet}" **
# Site-wide locale - used by Foswiki and external programs such as grep, and to
# specify the character set and language in which content must be presented
# for the user's web browser.
Expand All @@ -1313,9 +1312,9 @@ $Foswiki::cfg{UseLocale} = $FALSE;
# * =ja_JP.eucjp= - Japan
# * =C= - English only; no I18N features regarding character encodings
# and external programs.
$Foswiki::cfg{Site}{Locale} = 'en.utf8';
#$Foswiki::cfg{Site}{Locale} = 'en.utf8';

# **STRING 50 LABEL="Site Character Set" **
# **STRING 50 LABEL="Site Character Set" CHECK="also:{Site}{Locale}" **
# Set this to match your site locale (from 'locale -a')
# whose character set is not supported by your available perl conversion module
# (Encode for Perl 5.8 or higher, or Unicode::MapUTF8 for other Perl
Expand Down
68 changes: 58 additions & 10 deletions core/lib/Foswiki/Configure/Checkers/Site/CharSet.pm
Expand Up @@ -8,17 +8,65 @@ use Foswiki::Configure::Checker ();
our @ISA = ('Foswiki::Configure::Checker');

sub check_current_value {
my ($this, $reporter) = @_;

# Extract the character set from locale and use in HTML templates
# and HTTP headers
unless ( defined $Foswiki::cfg{Site}{CharSet} ) {
$Foswiki::cfg{Site}{Locale} =~ m/\.([a-z0-9_-]+)$/i;
$Foswiki::cfg{Site}{CharSet} = $1 if defined $1;
$Foswiki::cfg{Site}{CharSet} =~ s/^utf8$/utf-8/i;
$Foswiki::cfg{Site}{CharSet} =~ s/^eucjp$/euc-jp/i;
$Foswiki::cfg{Site}{CharSet} = lc( $Foswiki::cfg{Site}{CharSet} );
my ( $this, $reporter ) = @_;

# Test if this is actually an available encoding:
eval {
require Encode;
Encode::encode( $Foswiki::cfg{Site}{CharSet}, 'test', 0 );
};
if ($@) {
$reporter->ERROR(
"Unknown Charaset requested. Foswiki will not function correctly with this setting."
);
print STDERR "encode failed $@ \n";

#SMELL Need to override so Foswiki can produce output
$Foswiki::cfg{Site}{CharSet} = 'C'; # C should always be safe.
return;
}

if ( $Foswiki::cfg{Site}{CharSet} =~
m/^(?:iso-?2022-?|hz-?|gb2312|gbk|gb18030|.*big5|.*shift_?jis|ms.kanji|johab|uhc)/i
)
{

$reporter->ERROR(
<<HERE
Cannot use this multi-byte encoding ('$Foswiki::cfg{Site}{CharSet}') as site character
encoding. Please set a different character encoding setting.
HERE
);
}

# Extract the character set from locale for consistency check
my $charset;
$Foswiki::cfg{Site}{Locale} =~ m/\.([a-z0-9_-]+)$/i;
$charset = $1 || ''; # no guess?
$charset =~ s/^utf8$/utf-8/i;
$charset =~ s/^eucjp$/euc-jp/i;
$charset = lc($charset);

if ( $charset && ( lc( $Foswiki::cfg{Site}{CharSet} ) ne $charset ) ) {
$reporter->ERROR(
<<HERE
The Character set determined by the configured Locale, and this character set,
are inconsistent. Recommended setting: =$charset=
HERE
);
}

if ( $Foswiki::cfg{isBOOTSTRAPPING}
&& ( lc( $Foswiki::cfg{Site}{CharSet} ne 'iso-8859-1' ) ) )
{
$reporter->WARN( <<HERE );
The BOOTSTRAP process has guessed a site Character Set of =$Foswiki::cfg{Site}{CharSet}=.
This is different from the Foswiki 1.1 default of =iso-8859-1=. If you intend to migrate
data from prior releases of Foswiki or TWiki, you should either match the previously used Character Set.
or migrate data using <a href="http://foswiki.org/Extensions/CharsetConverterContrib" target="_blank">CharsetConverterContrib</a>
HERE
}

return '';
}

Expand Down
25 changes: 0 additions & 25 deletions core/lib/Foswiki/Configure/Checkers/Site/Locale.pm
Expand Up @@ -38,31 +38,6 @@ HERE
}
}

# Set the default site charset
unless ( defined( $Foswiki::cfg{Site}{CharSet} ) ) {
$Foswiki::cfg{Site}{CharSet} = 'iso-8859-1';
}

# Check for unusable multi-byte encodings as site character set
# - anything that enables a single ASCII character such as '[' to be
# matched within a multi-byte character cannot be used for Foswiki.
# Refuse to work with character sets that allow Foswiki syntax
# to be recognised within multi-byte characters.
# FIXME: match other problematic multi-byte character sets
if ( $Foswiki::cfg{UseLocale}
&& $Foswiki::cfg{Site}{CharSet} =~
m/^(?:iso-?2022-?|hz-?|gb2312|gbk|gb18030|.*big5|.*shift_?jis|ms.kanji|johab|uhc)/i
)
{

$reporter->ERROR(
<<HERE
Cannot use this multi-byte encoding ('$Foswiki::cfg{Site}{CharSet}') as site character
encoding. Please set a different character encoding in the {Site}{Locale}
setting.
HERE
);
}
}

1;
Expand Down
7 changes: 0 additions & 7 deletions core/lib/Foswiki/Configure/Checkers/UseLocale.pm
Expand Up @@ -44,13 +44,6 @@ sub check_current_value {
}
}

$reporter->WARN(<<HERE);
Perl Locales are known to have issues. Perl taint checking (the -T flag)
must be disables if UseLocale is enabled. Even with taint checking disabled
there are a number of know issues with Foswiki Locales. For known issues
see: <a href="http://foswiki.org/Tasks/I18N">Foswiki I18N tasks</a>
HERE

if ( $Foswiki::cfg{OS} eq 'WINDOWS' ) {

# Warn re known broken locale setup
Expand Down
54 changes: 52 additions & 2 deletions core/lib/Foswiki/Configure/Load.pm
Expand Up @@ -20,6 +20,7 @@ use Cwd qw( abs_path );
use Assert;
use File::Basename;
use File::Spec;
use POSIX qw(locale_h);

use Foswiki::Configure::FileUtil;

Expand Down Expand Up @@ -147,7 +148,7 @@ sub readConfig {
unless ($nospec) {
push @files, 'Foswiki.spec';
}
if (!$nospec && $config_spec) {
if ( !$nospec && $config_spec ) {
foreach my $dir (@INC) {
foreach my $subdir ( 'Foswiki/Plugins', 'Foswiki/Contrib' ) {
my $d;
Expand Down Expand Up @@ -351,7 +352,7 @@ sub setBootstrap {
qw( {DataDir} {DefaultUrlHost} {DetailedOS} {OS} {PubUrlPath} {ToolsDir} {WorkingDir}
{PubDir} {TemplateDir} {ScriptDir} {ScriptUrlPath} {ScriptUrlPaths}{view}
{ScriptSuffix} {LocalesDir} {Store}{Implementation}
{Store}{SearchAlgorithm} );
{Store}{SearchAlgorithm} {Site}{CharSet} {Site}{Locale} );

$Foswiki::cfg{isBOOTSTRAPPING} = 1;
push( @{ $Foswiki::cfg{BOOTSTRAP} }, @BOOTSTRAP );
Expand Down Expand Up @@ -466,6 +467,9 @@ sub bootstrapConfig {
}
}

# Bootstrap the Site Locale and CharSet
_bootstrapSiteSettings();

# Bootstrap the store related settings.
_bootstrapStoreSettings();

Expand Down Expand Up @@ -509,6 +513,52 @@ BOOTS

=begin TML
---++ StaticMethod _bootstrapSiteSettings()
Called by bootstrapConfig. This handles the Site Locale & CharSet settings.
=cut

sub _bootstrapSiteSettings {

# Guess a locale first. This isn't necessarily used, but helps guess a CharSet, which is always used.

require locale;
$Foswiki::cfg{Site}{Locale} = setlocale(LC_CTYPE);

my $charset;
$Foswiki::cfg{Site}{Locale} =~ m/\.([a-z0-9_-]+)$/i;
$charset = $1 || 'utf-8';
$charset =~ s/^utf8$/utf-8/i;
$charset =~ s/^eucjp$/euc-jp/i;
$Foswiki::cfg{Site}{CharSet} = lc($charset);

eval {
require Encode;
Encode::encode( $Foswiki::cfg{Site}{CharSet}, 'test', 0 );
};
if ($@) {
print STDERR
"AUTOCONFIG: Derived $Foswiki::cfg{Site}{CharSet} fails to encode, trying CGI default character set\n";
require CGI;
$Foswiki::cfg{Site}{CharSet} = CGI::charset();
}

if ( $Foswiki::cfg{Site}{CharSet} =~
m/^(?:iso-?2022-?|hz-?|gb2312|gbk|gb18030|.*big5|.*shift_?jis|ms.kanji|johab|uhc)/i
)
{
print STDERR
"AUTOCONFIG: Double-byte character set guessed not usable by Foswiki. Defaulting to 'utf-8'";
$Foswiki::cfg{Site}{CharSet} = 'utf-8';
}

print STDERR
"AUTOCONFIG: Set initial {Site}{Locale} to $Foswiki::cfg{Site}{Locale} using {Site}{CharSet} $Foswiki::cfg{Site}{CharSet} \n";
}

=begin TML
---++ StaticMethod _bootstrapStoreSettings()
Called by bootstrapConfig. This handles the store specific settings. This in turn
Expand Down

0 comments on commit b7c10da

Please sign in to comment.