Skip to content

Commit

Permalink
removed charset support from Mojo::DOM
Browse files Browse the repository at this point in the history
  • Loading branch information
kraih committed May 11, 2013
1 parent b287b49 commit 2dc883a
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 119 deletions.
2 changes: 2 additions & 0 deletions Changes
Expand Up @@ -13,6 +13,8 @@
- Removed Mojolicious::Plugin::PoweredBy and
Mojolicious::Plugin::RequestTimer.
- Removed data attribute from Mojo::URL.
- Removed charset attribute from Mojo::DOM::HTML.
- Removed charset method from Mojo::DOM.
- Removed is_chunked, is_dynamic, is_multipart, has_leftovers, leftovers,
write and write_chunk methods from Mojo::Message.
- Removed hmac_md5_sum method from Mojo::ByteStream.
Expand Down
77 changes: 25 additions & 52 deletions lib/Mojo/DOM.pm
Expand Up @@ -69,20 +69,17 @@ sub attrs {
return $self;
}

sub charset { shift->_html(charset => @_) }

sub children {
my ($self, $type) = @_;

my @children;
my $charset = $self->charset;
my $xml = $self->xml;
my $tree = $self->tree;
my $xml = $self->xml;
my $tree = $self->tree;
for my $e (@$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree]) {

# Make sure child is the right type
next if $e->[0] ne 'tag' || (defined $type && $e->[1] ne $type);
push @children, $self->new->charset($charset)->tree($e)->xml($xml);
push @children, $self->new->tree($e)->xml($xml);
}

return Mojo::Collection->new(@children);
Expand All @@ -92,22 +89,19 @@ sub content_xml {
my $self = shift;

# Render children individually
my $tree = $self->tree;
my $charset = $self->charset;
my $xml = $self->xml;
return join '', map {
Mojo::DOM::HTML->new(charset => $charset, tree => $_, xml => $xml)->render
} @$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree];
my $tree = $self->tree;
my $xml = $self->xml;
return join '',
map { Mojo::DOM::HTML->new(tree => $_, xml => $xml)->render }
@$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree];
}

sub find {
my ($self, $selector) = @_;

my $charset = $self->charset;
my $xml = $self->xml;
my $xml = $self->xml;
my $results = Mojo::DOM::CSS->new(tree => $self->tree)->select($selector);
return Mojo::Collection->new(
map { $self->new->charset($charset)->tree($_)->xml($xml) } @$results);
return Mojo::Collection->new(map { $self->new->tree($_)->xml($xml) }
@$results);
}

sub namespace {
Expand Down Expand Up @@ -137,8 +131,7 @@ sub next { shift->_sibling(1) }
sub parent {
my $self = shift;
return undef if (my $tree = $self->tree)->[0] eq 'root';
return $self->new->charset($self->charset)->tree($tree->[3])
->xml($self->xml);
return $self->new->tree($tree->[3])->xml($self->xml);
}

sub parse {
Expand Down Expand Up @@ -196,7 +189,7 @@ sub root {
$root = $parent;
}

return $self->new->charset($self->charset)->tree($root)->xml($self->xml);
return $self->new->tree($root)->xml($self->xml);
}

sub text {
Expand Down Expand Up @@ -302,11 +295,7 @@ sub _parent {
return \@new;
}

sub _parse {
my $self = shift;
Mojo::DOM::HTML->new(charset => $self->charset, xml => $self->xml)
->parse(shift)->tree;
}
sub _parse { Mojo::DOM::HTML->new(xml => shift->xml)->parse(shift)->tree }

sub _sibling {
my ($self, $next) = @_;
Expand Down Expand Up @@ -466,8 +455,7 @@ enabled by default.
$dom = $dom->append('<p>Hi!</p>');
Append HTML/XML to element. Note that the HTML/XML will be decoded if a
C<charset> has been defined.
Append HTML/XML to element.
# "<div><h1>A</h1><h2>B</h2></div>"
$dom->parse('<div><h1>A</h1></div>')->at('h1')->append('<h2>B</h2>')->root;
Expand All @@ -476,8 +464,7 @@ C<charset> has been defined.
$dom = $dom->append_content('<p>Hi!</p>');
Append HTML/XML to element content. Note that the HTML/XML will be decoded if
a C<charset> has been defined.
Append HTML/XML to element content.
# "<div><h1>AB</h1></div>"
$dom->parse('<div><h1>A</h1></div>')->at('h1')->append_content('B')->root;
Expand All @@ -502,13 +489,6 @@ L<Mojo::DOM::CSS> are supported.
Element attributes.
=head2 charset
my $charset = $dom->charset;
$dom = $dom->charset('UTF-8');
Charset used for decoding and encoding HTML/XML.
=head2 children
my $collection = $dom->children;
Expand All @@ -524,8 +504,7 @@ L<Mojo::DOM> objects, similar to C<find>.
my $xml = $dom->content_xml;
Render content of this element to XML. Note that the XML will be encoded if a
C<charset> has been defined.
Render content of this element to XML.
# "<b>test</b>"
$dom->parse('<div><b>test</b></div>')->div->content_xml;
Expand Down Expand Up @@ -577,18 +556,16 @@ has no parent.
$dom = $dom->parse('<foo bar="baz">test</foo>');
Parse HTML/XML document with L<Mojo::DOM::HTML>. Note that the HTML/XML will
be decoded if a C<charset> has been defined.
Parse HTML/XML document with L<Mojo::DOM::HTML>.
# Parse UTF-8 encoded XML
my $dom = Mojo::DOM->new->charset('UTF-8')->xml(1)->parse($xml);
# Parse XML
my $dom = Mojo::DOM->new->xml(1)->parse($xml);
=head2 prepend
$dom = $dom->prepend('<p>Hi!</p>');
Prepend HTML/XML to element. Note that the HTML/XML will be decoded if a
C<charset> has been defined.
Prepend HTML/XML to element.
# "<div><h1>A</h1><h2>B</h2></div>"
$dom->parse('<div><h2>B</h2></div>')->at('h2')->prepend('<h1>A</h1>')->root;
Expand All @@ -597,8 +574,7 @@ C<charset> has been defined.
$dom = $dom->prepend_content('<p>Hi!</p>');
Prepend HTML/XML to element content. Note that the HTML/XML will be decoded if
a C<charset> has been defined.
Prepend HTML/XML to element content.
# "<div><h2>AB</h2></div>"
$dom->parse('<div><h2>B</h2></div>')->at('h2')->prepend_content('A')->root;
Expand Down Expand Up @@ -627,8 +603,7 @@ Remove element and return it as a L<Mojo::DOM> object.
my $old = $dom->replace('<div>test</div>');
Replace element with HTML/XML and return the replaced element as a
L<Mojo::DOM> object. Note that the HTML/XML will be decoded if a C<charset>
has been defined.
L<Mojo::DOM> object.
# "<div><h2>B</h2></div>"
$dom->parse('<div><h1>A</h1></div>')->at('h1')->replace('<h2>B</h2>')->root;
Expand All @@ -640,8 +615,7 @@ has been defined.
$dom = $dom->replace_content('<p>test</p>');
Replace element content with HTML/XML. Note that the HTML/XML will be decoded
if a C<charset> has been defined.
Replace element content with HTML/XML.
# "<div><h1>B</h1></div>"
$dom->parse('<div><h1>A</h1></div>')->at('h1')->replace_content('B')->root;
Expand Down Expand Up @@ -702,8 +676,7 @@ is enabled by default.
my $xml = $dom->to_xml;
my $xml = "$dom";
Render this element and its content to XML. Note that the XML will be encoded
if a C<charset> has been defined.
Render this element and its content to XML.
# "<b>test</b>"
$dom->parse('<div><b>test</b></div>')->div->b->to_xml;
Expand Down
29 changes: 5 additions & 24 deletions lib/Mojo/DOM/HTML.pm
@@ -1,10 +1,10 @@
package Mojo::DOM::HTML;
use Mojo::Base -base;

use Mojo::Util qw(decode encode html_unescape xml_escape);
use Mojo::Util qw(html_unescape xml_escape);
use Scalar::Util 'weaken';

has [qw(charset xml)];
has 'xml';
has tree => sub { ['root'] };

my $ATTR_RE = qr/
Expand Down Expand Up @@ -76,11 +76,6 @@ my %INLINE = map { $_ => 1 } (
sub parse {
my ($self, $html) = @_;

if (my $charset = $self->charset) {
if (defined(my $chars = decode $charset, $html)) { $html = $chars }
else { $self->charset(undef) }
}

my $tree = ['root'];
my $current = $tree;
while ($html =~ m/\G$TOKEN_RE/gcs) {
Expand Down Expand Up @@ -146,12 +141,7 @@ sub parse {
return $self->tree($tree);
}

sub render {
my $self = shift;
my $content = $self->_render($self->tree);
my $charset = $self->charset;
return $charset ? encode($charset, $content) : $content;
}
sub render { $_[0]->_render($_[0]->tree) }

sub _close {
my ($self, $current, $tags, $stop) = @_;
Expand Down Expand Up @@ -348,13 +338,6 @@ L<Mojo::DOM::HTML> is the HTML/XML engine used by L<Mojo::DOM>.
L<Mojo::DOM::HTML> implements the following attributes.
=head2 charset
my $charset = $html->charset;
$html = $html->charset('UTF-8');
Charset used for decoding and encoding HTML/XML.
=head2 tree
my $tree = $html->tree;
Expand All @@ -380,15 +363,13 @@ following new ones.
$html = $html->parse('<foo bar="baz">test</foo>');
Parse HTML/XML document. Note that the HTML/XML will be decoded if a
C<charset> has been defined.
Parse HTML/XML document.
=head2 render
my $xml = $html->render;
Render DOM to XML. Note that the XML will be encoded if a C<charset> has been
defined.
Render DOM to XML.
=head1 SEE ALSO
Expand Down
7 changes: 4 additions & 3 deletions lib/Mojo/Message.pm
Expand Up @@ -79,9 +79,10 @@ sub dom {
my $self = shift;

return undef if $self->content->is_multipart;
my $dom = $self->{dom}
||= Mojo::DOM->new->charset($self->content->charset // undef)
->parse($self->body);
my $html = $self->body;
my $charset = $self->content->charset;
$html = decode($charset, $html) // $html if $charset;
my $dom = $self->{dom} ||= Mojo::DOM->new($html);

return @_ ? $dom->find(@_) : $dom;
}
Expand Down
8 changes: 4 additions & 4 deletions lib/Mojolicious/Command/get.pm
Expand Up @@ -27,7 +27,7 @@ usage: $0 get [OPTIONS] URL [SELECTOR|JSON-POINTER] [COMMANDS]
These options are available:
-C, --charset <charset> Charset of HTML/XML content, defaults to auto
detection or "UTF-8".
detection.
-c, --content <content> Content to send with request.
-H, --header <name:value> Additional HTTP header.
-M, --method <method> HTTP method to use, defaults to "GET".
Expand Down Expand Up @@ -138,8 +138,8 @@ sub _say {
sub _select {
my ($buffer, $selector, $charset, @args) = @_;

my $dom = Mojo::DOM->new->charset($charset)->parse($buffer);
my $results = $dom->find($selector);
$buffer = decode($charset, $buffer) // $buffer if $charset;
my $results = Mojo::DOM->new($buffer)->find($selector);

my $finished;
while (defined(my $command = shift @args)) {
Expand Down Expand Up @@ -167,7 +167,7 @@ sub _select {
$finished++;
}

unless ($finished) { say for @$results }
unless ($finished) { _say($_) for @$results }
}

1;
Expand Down

0 comments on commit 2dc883a

Please sign in to comment.