Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
a few more improvements for invalid tag handling in Mojo::DOM::HTML
  • Loading branch information
kraih committed Jul 21, 2013
1 parent db47fc4 commit 8f17208
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 8 deletions.
20 changes: 14 additions & 6 deletions lib/Mojo/DOM/HTML.pm
Expand Up @@ -40,10 +40,12 @@ my $TOKEN_RE = qr/
|
<(
\s*
[^>\s]* # Tag
[^<>\s]+ # Tag
\s*
(?:$ATTR_RE)* # Attributes
)>
|
(<) # Runaway "<"
)??
/xis;

Expand Down Expand Up @@ -79,11 +81,17 @@ sub parse {
my $tree = ['root'];
my $current = $tree;
while ($html =~ m/\G$TOKEN_RE/gcs) {
my ($text, $pi, $comment, $cdata, $doctype, $tag)
= ($1, $2, $3, $4, $5, $6);

# Text
if (length $text) { push @$current, ['text', html_unescape($text)] }
my ($text, $pi, $comment, $cdata, $doctype, $tag, $runaway)
= ($1, $2, $3, $4, $5, $6, $11);

# Text (and runaway "<")
$text .= '<' if defined $runaway;
if (length $text) {
$text = html_unescape $text;
my $sibling = $current->[-1];
if (ref $sibling && $sibling->[0] eq 'text') { $sibling->[1] .= $text }
else { push @$current, ['text', $text] }
}

# DOCTYPE
if ($doctype) { push @$current, ['doctype', $doctype] }
Expand Down
4 changes: 2 additions & 2 deletions t/mojo/dom.t
Expand Up @@ -1666,11 +1666,11 @@ $dom = Mojo::DOM->new->parse(<<'EOF');
</div>
</mt:If>
</div>
<b>la<>la<>la</b>
<b>>la<>la<>la<</b>
</body>
</html>
EOF
is $dom->at('#screw-up > b')->text, 'la la la', 'right text';
is $dom->at('#screw-up > b')->text, '>la<>la<>la<', 'right text';
is $dom->at('#screw-up .ewww > a > img')->attrs('src'), '/test.png',
'right attribute';
is $dom->find('#screw-up .ewww > a > img')->[1]->attrs('src'), '/test2.png',
Expand Down

0 comments on commit 8f17208

Please sign in to comment.