Skip to content

Commit

Permalink
Item14420: filter out non-breaking zero-width space as well as the br…
Browse files Browse the repository at this point in the history
…eaking version. TinyMCE 4 has started using it, and still doesn't filter it from output. Also rename a couple of functions in Node to reflect modern HTML usage.
  • Loading branch information
cdot committed Aug 26, 2017
1 parent 2d1a826 commit cda2014
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 39 deletions.
19 changes: 12 additions & 7 deletions WysiwygPlugin/lib/Foswiki/Plugins/WysiwygPlugin/HTML2TML.pm
Expand Up @@ -196,13 +196,18 @@ DEFAULT
$text =~ s/\&\#x22;/\"/goi;
$text =~ s/\&\#160;/\ /goi;

# SMELL: Item11912 These are left behind by TMCE as zero width characters
# surrounding italics and bold inserted by Ctrl-i and Ctrl-b
# We really ought to have a better solution. TMCE is supposed
# to handle this it the cleanup routine, but it doesn't happen,
# and cleanup routine has been deprecated.
$text =~ s///g; # TMCE 3.5.x
$text =~ s/​//g; # TMCE pre 3.5
# Item11912 Item14420 Zero-width space (x200B) and Non-breaking
# zero-width space (xFEFF) are left behind by TinyMCE with
# italics and bold inserted by Ctrl-i and Ctrl-b. This is probably
# a TinyMCE bug, but in general these characters are useless in TML
# so we strip them in all their forms anyway.
foreach my $d ( 0xFEFF, 0x200B ) {
$text =~ s/&#$d;//g; # decimal entity
my $c = chr($d);
$text =~ s/$c//g;
my $h = sprintf( "%04x", $d );
$text =~ s/&#x$h;//g; # hex entity
}

HTML::Entities::_decode_entities( $text, safeEntities() );

Expand Down
65 changes: 33 additions & 32 deletions WysiwygPlugin/lib/Foswiki/Plugins/WysiwygPlugin/HTML2TML/Node.pm
Expand Up @@ -1535,36 +1535,7 @@ sub _handleABBR { return _flatten(@_); }
sub _handleACRONYM { return _flatten(@_); }
sub _handleADDRESS { return _flatten(@_); }

sub _handleB {
my ( $this, $options ) = @_;
if ( $options & IN_TABLE ) {
if (
$this->{parent}
&& ( $this->{parent}->{tag} eq 'td'
|| $this->{parent}->{tag} eq 'th' )
)
{
# Item9651: Don't convert bold in a table cell into stars
# if the TML would be interpreted as a heading
my $left = $this->{prev} ? $this->{prev}->stringify() : '';
my $right = $this->{next} ? $this->{next}->stringify() : '';
if ( "$left$right" =~ /^\s*$/ ) {

# Have to exclude ==this case== and __that case__ as
# they look awfully similar but don't generate table
# headings
my ( $foo, $t ) = _emphasis( @_, '*' );
if ( $t !~ /.*?$CHECK1(==|__).*\1$CHECK2/ ) {
return ( 0, undef );
}
else {
return ( $foo, $t );
}
}
}
}
return _emphasis( @_, '*' );
}
sub _handleB { return _handleSTRONG(@_); }
sub _handleBASE { return ( 0, '' ); }
sub _handleBASEFONT { return ( 0, '' ); }

Expand Down Expand Up @@ -1723,7 +1694,7 @@ sub _handleH3 { return _H( @_, 3 ); }
sub _handleH4 { return _H( @_, 4 ); }
sub _handleH5 { return _H( @_, 5 ); }
sub _handleH6 { return _H( @_, 6 ); }
sub _handleI { return _emphasis( @_, '_' ); }
sub _handleI { return _handleEM(@_); }

sub _handleIMG {
my ( $this, $options ) = @_;
Expand Down Expand Up @@ -1964,7 +1935,37 @@ sub _handleSPAN {

# STRIKE

sub _handleSTRONG { return _emphasis( @_, '*' ); }
sub _handleSTRONG {
my ( $this, $options ) = @_;

if ( $options & IN_TABLE ) {
if (
$this->{parent}
&& ( $this->{parent}->{tag} eq 'td'
|| $this->{parent}->{tag} eq 'th' )
)
{
# Item9651: Don't convert bold/strong in a table cell into stars
# if the TML would be interpreted as a heading
my $left = $this->{prev} ? $this->{prev}->stringify() : '';
my $right = $this->{next} ? $this->{next}->stringify() : '';
if ( "$left$right" =~ /^\s*$/ ) {

# Have to exclude ==this case== and __that case__ as
# they look awfully similar but don't generate table
# headings
my ( $foo, $t ) = _emphasis( @_, '*' );
if ( $t && $t !~ /.*?$CHECK1(==|__).*\1$CHECK2/ ) {
return ( 0, undef );
}
else {
return ( $foo, $t );
}
}
}
}
return _emphasis( @_, '*' );
}

sub _handleSTYLE { return ( 0, '' ); }

Expand Down

0 comments on commit cda2014

Please sign in to comment.