Skip to content

Commit

Permalink
improved Mojo::DOM::HTML performance a little more
Browse files Browse the repository at this point in the history
  • Loading branch information
kraih committed Jan 4, 2014
1 parent f7121fa commit be98957
Showing 1 changed file with 37 additions and 38 deletions.
75 changes: 37 additions & 38 deletions lib/Mojo/DOM/HTML.pm
Expand Up @@ -98,56 +98,55 @@ sub parse {
$text .= '<' if defined $runaway;
push @$current, ['text', html_unescape $text] if length $text;

# DOCTYPE
if (defined $doctype) { push @$current, ['doctype', $doctype] }
# Tag
if ($tag) {

# Comment
elsif (defined $comment) { push @$current, ['comment', $comment] }
# End
my $xml = $self->xml;
if ($tag =~ $END_RE) { _end($xml ? $1 : lc($1), $xml, \$current) }

# CDATA
elsif (defined $cdata) { push @$current, ['cdata', $cdata] }
# Start
elsif ($tag =~ m!([^\s/]+)([\s\S]*)!) {
my ($start, $attr) = ($xml ? $1 : lc($1), $2);

# Processing instruction (try to detect XML)
elsif (defined $pi) {
$self->xml(1) if !defined $self->xml && $pi =~ /xml/i;
push @$current, ['pi', $pi];
}
# Attributes
my %attrs;
while ($attr =~ /$ATTR_RE/g) {
my ($key, $value) = ($xml ? $1 : lc($1), $2 // $3 // $4);

# End
next unless $tag;
my $xml = $self->xml;
if ($tag =~ $END_RE) { _end($xml ? $1 : lc($1), $xml, \$current) }
# Empty tag
next if $key eq '/';

# Start
elsif ($tag =~ m!([^\s/]+)([\s\S]*)!) {
my ($start, $attr) = ($xml ? $1 : lc($1), $2);
$attrs{$key} = defined $value ? html_unescape($value) : $value;
}

# Attributes
my %attrs;
while ($attr =~ /$ATTR_RE/g) {
my $key = $xml ? $1 : lc($1);
my $value = $2 // $3 // $4;
_start($start, \%attrs, $xml, \$current);

# Empty tag
next if $key eq '/';
# Element without end tag
_end($start, $xml, \$current)
if (!$xml && $VOID{$start}) || $attr =~ m!/\s*$!;

$attrs{$key} = defined $value ? html_unescape($value) : $value;
# Relaxed "script" or "style"
next unless $start eq 'script' || $start eq 'style';
next unless $html =~ m!\G(.*?)<\s*/\s*$start\s*>!gcsi;
push @$current, ['raw', $1];
_end($start, $xml, \$current);
}
}

# Tag
_start($start, \%attrs, $xml, \$current);
# DOCTYPE
elsif (defined $doctype) { push @$current, ['doctype', $doctype] }

# Element without end tag
_end($start, $xml, \$current)
if (!$self->xml && $VOID{$start}) || $attr =~ m!/\s*$!;
# Comment
elsif (defined $comment) { push @$current, ['comment', $comment] }

# Relaxed "script" or "style"
if ($start eq 'script' || $start eq 'style') {
if ($html =~ m!\G(.*?)<\s*/\s*$start\s*>!gcsi) {
push @$current, ['raw', $1];
_end($start, $xml, \$current);
}
}
# CDATA
elsif (defined $cdata) { push @$current, ['cdata', $cdata] }

# Processing instruction (try to detect XML)
elsif (defined $pi) {
$self->xml(1) if !defined $self->xml && $pi =~ /xml/i;
push @$current, ['pi', $pi];
}
}

Expand Down

0 comments on commit be98957

Please sign in to comment.