Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
fixed list parsing bug in Mojo::DOM::HTML and added next_sibling and …
…previous_sibling methods to Mojo::DOM and Mojo::DOM::HTML
  • Loading branch information
kraih committed Feb 11, 2014
1 parent 55c78d7 commit abf8c07
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 28 deletions.
3 changes: 3 additions & 0 deletions Changes
@@ -1,5 +1,8 @@

4.80 2014-02-12
- Added next_sibling and previous_sibling methods to Mojo::DOM.
- Added next_sibling and previous_sibling methods to Mojo::DOM::Node.
- Fixed list parsing bug in Mojo::DOM::HTML.

4.79 2014-02-11
- Improved not found page to show request information and the exact path
Expand Down
48 changes: 36 additions & 12 deletions lib/Mojo/DOM.pm
Expand Up @@ -117,7 +117,8 @@ sub new {
return @_ ? $self->parse(@_) : $self;
}

sub next { shift->_siblings->[1][0] }
sub next { _siblings(shift)->[1][0] }
sub next_sibling { _siblings(shift, 0, 1)->[1][0] }

sub node { shift->tree->[0] }

Expand All @@ -133,7 +134,8 @@ sub prepend { shift->_add(0, @_) }

sub prepend_content { shift->_content(0, 0, @_) }

sub previous { shift->_siblings->[0][-1] }
sub previous { _siblings(shift)->[0][-1] }
sub previous_sibling { _siblings(shift, 0, 1)->[0][-1] }

sub remove { shift->replace('') }

Expand Down Expand Up @@ -328,13 +330,13 @@ sub _select {
}

sub _siblings {
my ($self, $merge) = @_;
my ($start, $merge, $all) = @_;

return $merge ? [] : [[], []] unless my $parent = $self->parent;
return $merge ? [] : [[], []] unless my $parent = $start->parent;

my $tree = $self->tree;
my $tree = $start->tree;
my (@before, @after, $match);
for my $child ($parent->children->each) {
for my $child ($all ? $parent->contents->each : $parent->children->each) {
++$match and next if $child->tree eq $tree;
$match ? push @after, $child : push @before, $child;
}
Expand Down Expand Up @@ -501,7 +503,7 @@ L<Mojo::DOM> implements the following methods.
Return a L<Mojo::Collection> object containing all nodes in DOM structure as
L<Mojo::DOM> and L<Mojo::DOM::Node> objects.
"<p><b>123</b></p>"
# "<p><b>123</b></p>"
$dom->parse('<p><!-- test --><b>123<!-- 456 --></b></p>')
->all_contents->grep(sub { $_->node eq 'comment' })->remove->first;
Expand Down Expand Up @@ -608,7 +610,7 @@ fragment.
Return a L<Mojo::Collection> object containing the child nodes of this element
as L<Mojo::DOM> and L<Mojo::DOM::Node> objects.
"<p><b>123</b></p>"
# "<p><b>123</b></p>"
$dom->parse('<p>test<b>123</b></p>')->at('p')->contents->first->remove;
=head2 find
Expand Down Expand Up @@ -658,12 +660,23 @@ fragment if necessary.
my $sibling = $dom->next;
Return L<Mojo::DOM> object for next sibling of this element or C<undef> if
there are no more siblings.
Return L<Mojo::DOM> object for next sibling element or C<undef> if there are
no more siblings.
# "<h2>B</h2>"
$dom->parse('<div><h1>A</h1><h2>B</h2></div>')->at('h1')->next;
=head2 next_sibling
my $sibling = $dom->next_sibling;
Return L<Mojo::DOM> or L<Mojo::DOM::Node> object for next sibling node or
C<undef> if there are no more siblings.
# "456"
$dom->parse('<p><b>123</b><!-- test -->456</p>')->at('b')
->next_sibling->next_sibling;
=head2 node
my $type = $dom->node;
Expand Down Expand Up @@ -708,12 +721,23 @@ Prepend HTML/XML fragment to this element's content.
my $sibling = $dom->previous;
Return L<Mojo::DOM> object for previous sibling of this element or C<undef> if
there are no more siblings.
Return L<Mojo::DOM> object for previous sibling element or C<undef> if there
are no more siblings.
# "<h1>A</h1>"
$dom->parse('<div><h1>A</h1><h2>B</h2></div>')->at('h2')->previous;
=head2 previous_sibling
my $sibling = $dom->previous_sibling;
Return L<Mojo::DOM> or L<Mojo::DOM::Node> object for previous sibling node or
C<undef> if there are no more siblings.
# "123"
$dom->parse('<p>123<!-- test --><b>456</b></p>')->at('b')
->previous_sibling->previous_sibling;
=head2 remove
my $parent = $dom->remove;
Expand Down
19 changes: 8 additions & 11 deletions lib/Mojo/DOM/HTML.pm
Expand Up @@ -156,12 +156,12 @@ sub parse {
sub render { _render($_[0]->tree, $_[0]->xml) }

sub _close {
my ($xml, $current, $allowed, $scope) = @_;
my ($current, $allowed, $scope) = @_;

# Close allowed parent elements in scope
my $parent = $$current;
while ($parent->[0] ne 'root' && $parent->[1] ne $scope) {
_end($parent->[1], $xml, $current) if $allowed->{$parent->[1]};
while ($parent->[0] ne 'root' && !$scope->{$parent->[1]}) {
_end($parent->[1], 0, $current) if $allowed->{$parent->[1]};
$parent = $parent->[3];
}
}
Expand Down Expand Up @@ -192,10 +192,7 @@ sub _end {
$next = $$current->[3];

# Match
if ($end eq $$current->[1]) { return $$current = $$current->[3] }

# Table
elsif ($end eq 'table') { _close($xml, $current, \%TABLE, $end) }
return $$current = $$current->[3] if $end eq $$current->[1];

# Missing end tag
_end($$current->[1], $xml, $current);
Expand Down Expand Up @@ -270,19 +267,19 @@ sub _start {
if (my $end = $END{$start}) { _end($_, 0, $current) for @$end }

# "li"
elsif ($start eq 'li') { _close(0, $current, {li => 1}, 'ul') }
elsif ($start eq 'li') { _close($current, {li => 1}, {ul => 1, ol => 1}) }

# "colgroup", "thead", "tbody" and "tfoot"
elsif ($start eq 'colgroup' || $start =~ /^t(?:head|body|foot)$/) {
_close(0, $current, \%TABLE, 'table');
_close($current, \%TABLE, {table => 1});
}

# "tr"
elsif ($start eq 'tr') { _close(0, $current, {tr => 1}, 'table') }
elsif ($start eq 'tr') { _close($current, {tr => 1}, {table => 1}) }

# "th" and "td"
elsif ($start eq 'th' || $start eq 'td') {
_close(0, $current, {$_ => 1}, 'table') for qw(th td);
_close($current, {$_ => 1}, {table => 1}) for qw(th td);
}
}

Expand Down
20 changes: 20 additions & 0 deletions lib/Mojo/DOM/Node.pm
Expand Up @@ -2,6 +2,8 @@ package Mojo::DOM::Node;
use Mojo::Base -base;
use overload bool => sub {1}, '""' => sub { shift->content }, fallback => 1;

use Mojo::DOM;

has [qw(parent tree)];

sub content {
Expand All @@ -11,8 +13,12 @@ sub content {
return $self;
}

sub next_sibling { Mojo::DOM::_siblings(shift, 0, 1)->[1][0] }

sub node { shift->tree->[0] }

sub previous_sibling { Mojo::DOM::_siblings(shift, 0, 1)->[0][-1] }

sub remove {
my $self = shift;

Expand Down Expand Up @@ -75,13 +81,27 @@ following new ones.
Return or replace this node's content.
=head2 next_sibling
my $sibling = $node->next_sibling;
Return L<Mojo::DOM> or L<Mojo::DOM::Node> object for next sibling node or
C<undef> if there are no more siblings.
=head2 node
my $type = $node->node;
This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw> or
C<text>.
=head2 previous_sibling
my $sibling = $node->previous_sibling;
Return L<Mojo::DOM> or L<Mojo::DOM::Node> object for previous sibling node or
C<undef> if there are no more siblings.
=head2 remove
my $parent = $node->remove;
Expand Down
26 changes: 21 additions & 5 deletions t/mojo/dom.t
Expand Up @@ -145,6 +145,15 @@ ok !$dom->at('simple')->ancestors->first->xml, 'XML mode not active';
# Nodes
$dom = Mojo::DOM->new(
'<!DOCTYPE before><p>test<![CDATA[123]]><!-- 456 --></p><?after?>');
is $dom->at('p')->previous_sibling->content, ' before', 'right content';
is $dom->at('p')->previous_sibling->previous_sibling, undef,
'no more siblings';
is $dom->at('p')->next_sibling->content, 'after', 'right content';
is $dom->at('p')->next_sibling->next_sibling, undef, 'no more siblings';
is $dom->at('p')->contents->[-1]->previous_sibling->previous_sibling->content,
'test', 'right content';
is $dom->at('p')->contents->first->next_sibling->next_sibling->content,
' 456 ', 'right content';
is $dom->all_contents->[0]->node, 'doctype', 'right node';
is $dom->all_contents->[0]->content, ' before', 'right content';
is $dom->all_contents->[1]->type, 'p', 'right type';
Expand Down Expand Up @@ -1185,18 +1194,25 @@ is $dom->at('html > body')->text, 'bar', 'right text';
# Optional "li" tag
$dom = Mojo::DOM->new->parse(<<EOF);
<ul>
<li>
<ol>
<li>F
<li>G
</ol>
<li>A</li>
<LI>B
<li>C</li>
<li>D
<li>E
</ul>
EOF
is $dom->find('ul > li')->[0]->text, 'A', 'right text';
is $dom->find('ul > li')->[1]->text, 'B', 'right text';
is $dom->find('ul > li')->[2]->text, 'C', 'right text';
is $dom->find('ul > li')->[3]->text, 'D', 'right text';
is $dom->find('ul > li')->[4]->text, 'E', 'right text';
is $dom->find('ul > li > ol > li')->[0]->text, 'F', 'right text';
is $dom->find('ul > li > ol > li')->[1]->text, 'G', 'right text';
is $dom->find('ul > li')->[1]->text, 'A', 'right text';
is $dom->find('ul > li')->[2]->text, 'B', 'right text';
is $dom->find('ul > li')->[3]->text, 'C', 'right text';
is $dom->find('ul > li')->[4]->text, 'D', 'right text';
is $dom->find('ul > li')->[5]->text, 'E', 'right text';

# Optional "p" tag
$dom = Mojo::DOM->new->parse(<<EOF);
Expand Down

0 comments on commit abf8c07

Please sign in to comment.