Skip to content

Commit

Permalink
Item14293: soffice/libreoffice indexer for ppt
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelDaum committed Jan 23, 2017
1 parent c7797fe commit 72b121e
Show file tree
Hide file tree
Showing 28 changed files with 119 additions and 46 deletions.
14 changes: 7 additions & 7 deletions .gitignore
@@ -1,8 +1,8 @@
*.swp
StringifierContrib.md5
StringifierContrib.sha1
StringifierContrib.tgz
StringifierContrib.txt
StringifierContrib.zip
StringifierContrib_installer
StringifierContrib_installer.pl
/StringifierContrib.md5
/StringifierContrib.sha1
/StringifierContrib.tgz
/StringifierContrib.txt
/StringifierContrib.zip
/StringifierContrib_installer
/StringifierContrib_installer.pl
3 changes: 2 additions & 1 deletion data/System/StringifierContrib.txt
Expand Up @@ -139,6 +139,7 @@ Foswiki:Development/UnitTests for more information on unit testing.
%$DEPENDENCIES%

---++ Change History
| 23 Jan 2017: | (4.30) added stringifier to index XLS using soffice |
| 18 Oct 2015: | (4.20) removed dependency on File::MMagic; now using extension-based mime detection |
| 01 Oct 2015: | (4.10) don't default to pass-through for non-supported document types; fixed unit tests |
| 29 Sep 2015: | (4.00) added unicode support with Foswiki > 2.0 |
Expand All @@ -155,7 +156,7 @@ Foswiki:Development/UnitTests for more information on unit testing.

%META:FORM{name="PackageForm"}%
%META:FIELD{name="Author" title="Author" value="Foswiki:Main.MarkusHesse, Foswiki:Main.SvenDowideit, Foswiki:Main.MichaelDaum & Foswiki:Main.AndrewJones"}%
%META:FIELD{name="Copyright" title="Copyright" value="© 2007, Foswiki:Main.MarkusHesse; © 2009-2015, Foswiki Contributors"}%
%META:FIELD{name="Copyright" title="Copyright" value="© 2007, Foswiki:Main.MarkusHesse; © 2009-2017, Foswiki Contributors"}%
%META:FIELD{name="Description" title="Description" value="%25$SHORTDESCRIPTION%25"}%
%META:FIELD{name="Home" title="Home" value="Foswiki:Extensions/%TOPIC%"}%
%META:FIELD{name="License" title="License" value="GPL ([[http://www.gnu.org/copyleft/gpl.html][GNU General Public License]])"}%
Expand Down
4 changes: 1 addition & 3 deletions lib/Foswiki/Contrib/Stringifier.pm
@@ -1,6 +1,6 @@
# Plugin for Foswiki - The Free and Open Source Wiki, http://foswiki.org/
#
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down Expand Up @@ -65,7 +65,5 @@ sub stringFor {
return $plugin->stringForFile($filename);
}



1;

5 changes: 4 additions & 1 deletion lib/Foswiki/Contrib/Stringifier/Base.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down Expand Up @@ -54,6 +54,9 @@ __PACKAGE__->plugins;
sub _programExists {
my ($self, $program) = @_;

# work around a bug in old File::Which that doesn't like absolute paths
return $program if -f $program;

my $path = File::Which::which($program);
return defined $path;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/DOCX.pm
@@ -1,5 +1,5 @@
# Copyright (C) 2009 TWIKI.NET (http://www.twiki.net)
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/DOC_abiword.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/DOC_antiword.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/DOC_catdoc.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
4 changes: 2 additions & 2 deletions lib/Foswiki/Contrib/Stringifier/Plugins/DOC_soffice.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down Expand Up @@ -28,7 +28,7 @@ my $soffice = $Foswiki::cfg{StringifierContrib}{sofficeCmd} || '/usr/bin/soffice

if (defined($Foswiki::cfg{StringifierContrib}{WordIndexer}) &&
($Foswiki::cfg{StringifierContrib}{WordIndexer} eq 'soffice')) {
if (-f $soffice){
if (__PACKAGE__->_programExists($soffice)) {
__PACKAGE__->register_handler("application/word", ".doc", "text/docx", ".docx");
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/DOC_wv.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/HTML.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/ODT.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2011-2015 Foswiki Contributors
# Copyright (C) 2011-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/PDF.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/PPT.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/PPTX.pm
@@ -1,5 +1,5 @@
# Copyright (C) 2009 TWIKI.NET (http://www.twiki.net)
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/PPT_catdoc.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
13 changes: 6 additions & 7 deletions lib/Foswiki/Contrib/Stringifier/Plugins/PPT_soffice.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down Expand Up @@ -26,18 +26,17 @@ use File::Basename qw(basename);

my $soffice = $Foswiki::cfg{StringifierContrib}{sofficeCmd} || '/usr/bin/soffice';

if (defined($Foswiki::cfg{StringifierContrib}{PowerpointIndexer})
&& $Foswiki::cfg{StringifierContrib}{PowerpointIndexer} eq 'soffice')
if ((!defined($Foswiki::cfg{StringifierContrib}{PowerpointIndexer}) || $Foswiki::cfg{StringifierContrib}{PowerpointIndexer} eq 'soffice')
&& __PACKAGE__->_programExists($soffice))
{
if (-f $soffice) {
__PACKAGE__->register_handler("text/ppt", ".ppt", "text/pptx", ".pptx");
}
__PACKAGE__->register_handler("text/ppt", ".ppt", "text/pptx", ".pptx");
}


sub stringForFile {
my ($self, $file) = @_;
my $tmpDir = File::Temp->newdir();

# first convert to pdf as the thing can't do txt:Text directly as reliably
my $cmd = $soffice . ' --convert-to pdf --invisible --headless --minimized --outdir %OUTDIR|F% %FILENAME|F%';

Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/Text.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down
9 changes: 6 additions & 3 deletions lib/Foswiki/Contrib/Stringifier/Plugins/XLS.pm
@@ -1,4 +1,4 @@
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand All @@ -22,9 +22,12 @@ our @ISA = qw( Foswiki::Contrib::Stringifier::Base );

my $xls2txt = $Foswiki::cfg{StringifierContrib}{xls2txtCmd} || 'xls2txt.pl';

# Only if xls2txt.pl exists, I register myself.
if (__PACKAGE__->_programExists($xls2txt)){
if (defined($Foswiki::cfg{StringifierContrib}{ExcelIndexer})
&& ($Foswiki::cfg{StringifierContrib}{ExcelIndexer} eq 'script'))
{
if (__PACKAGE__->_programExists($xls2txt)) {
__PACKAGE__->register_handler("application/excel", ".xls");
}
}

sub stringForFile {
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/Stringifier/Plugins/XLSX.pm
@@ -1,5 +1,5 @@
# Copyright (C) 2009 TWIKI.NET (http://www.twiki.net)
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down
57 changes: 57 additions & 0 deletions lib/Foswiki/Contrib/Stringifier/Plugins/XLS_soffice.pm
@@ -0,0 +1,57 @@
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details, published at
# http://www.gnu.org/copyleft/gpl.html

package Foswiki::Contrib::Stringifier::Plugins::XLS_soffice;

use strict;
use warnings;

use Foswiki::Contrib::Stringifier::Base ();
our @ISA = qw( Foswiki::Contrib::Stringifier::Base );

use Foswiki::Func ();
use File::Temp ();
use File::Basename qw(basename);

my $soffice = $Foswiki::cfg{StringifierContrib}{sofficeCmd} || '/usr/bin/soffice';

if (defined($Foswiki::cfg{StringifierContrib}{ExcelIndexer}) &&
($Foswiki::cfg{StringifierContrib}{ExcelIndexer} eq 'soffice')) {
if (__PACKAGE__->_programExists($soffice)) {
__PACKAGE__->register_handler("application/excel", ".xls");
}
}

sub stringForFile {
my ($self, $file) = @_;
my $tmpDir = File::Temp->newdir();

my $cmd = $soffice . ' --convert-to html --invisible --headless --minimized --outdir %OUTDIR|F% %FILENAME|F%';

my ($data, $exit) = Foswiki::Sandbox->sysCommand(
$cmd,
OUTDIR => $tmpDir->dirname,
FILENAME => $file,
);

return '' unless ($exit == 0);

my $stringifier = Foswiki::Contrib::Stringifier::Plugins::HTML->new();
my $tmpFile = $tmpDir->dirname . '/' . basename($file, ".xls", ".xlsx") . '.html';

return $stringifier->stringForFile($tmpFile);
}

1;

6 changes: 3 additions & 3 deletions lib/Foswiki/Contrib/StringifierContrib.pm
@@ -1,6 +1,6 @@
# Plugin for Foswiki - The Free and Open Source Wiki, http://foswiki.org/
#
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand All @@ -18,8 +18,8 @@ package Foswiki::Contrib::StringifierContrib;
use strict;
use warnings;

our $VERSION = '4.20';
our $RELEASE = '18 Oct 2015';
our $VERSION = '4.30';
our $RELEASE = '23 Jan 2017';
our $SHORTDESCRIPTION = 'Helper library to stringify binary document formats';
our $NO_PREFS_IN_TOPIC = 1;

Expand Down
15 changes: 13 additions & 2 deletions lib/Foswiki/Contrib/StringifierContrib/Config.spec
@@ -1,26 +1,37 @@
# ---+ Extensions
# ---++ StringifierContrib

# **SELECT abiword, antiword, catdoc, soffice, wv **
# **SELECT abiword, antiword, catdoc, soffice, wv, none **
# Select which MS Word indexer to use (you need to have antiword, abiword or wvText installed)
# <dl>
# <dt>abiword</dt><dd></dd>
# <dt>antiword</dt><dd>chould be used on Linux/Unix but may have problems with doc files generated by OpenOffice</dd>
# <dt>catdoc</dt><dd></dd>
# <dt>soffice</dt><dd>is the most capable of all converters but may be slow</dd>
# <dt>wvText</dt><dd>is the default</dd>
# <dt>none</dt><dd>don't index word documents</dd>
# </dl>
$Foswiki::cfg{StringifierContrib}{WordIndexer} = 'antiword';

# **SELECT catppt,script,soffice**
# **SELECT catppt,script,soffice, none**
# Select which indexer to use to extract the text of a Powerpoint file (ppt, pptx)
# <dl>
# <dt>catppt</dt><dd>is the default</dd>
# <dt>script</dt><dd>uses ppthtml </dd>
# <dt>soffice</dt><dd>is the most capable of all converters but may be slow</dd>
# <dt>none</dt><dd>don't index powerpoint documents</dd>
# </dl>
$Foswiki::cfg{StringifierContrib}{PowerpointIndexer} = 'script';

# **SELECT script,soffice, none**
# Select which indexer to use to extract the text of an XLS file
# <dl>
# <dt>script</dt><dd>default </dd>
# <dt>soffice</dt><dd>is the most capable of all converters but may be slow</dd>
# <dt>none</dt><dd>don't index excel documents</dd>
# </dl>
$Foswiki::cfg{StringifierContrib}{ExcelIndexer} = 'script';

# **COMMAND**
# Path to your abiword command (used to convert MS word documents: .doc)
$Foswiki::cfg{StringifierContrib}{abiwordCmd} = 'abiword';
Expand Down
1 change: 1 addition & 0 deletions lib/Foswiki/Contrib/StringifierContrib/MANIFEST
Expand Up @@ -18,6 +18,7 @@ lib/Foswiki/Contrib/Stringifier/Plugins/PPT_soffice.pm 0644
lib/Foswiki/Contrib/Stringifier/Plugins/PPTX.pm 0644
lib/Foswiki/Contrib/Stringifier/Plugins/Text.pm 0644
lib/Foswiki/Contrib/Stringifier/Plugins/XLS.pm 0644
lib/Foswiki/Contrib/Stringifier/Plugins/XLS_soffice.pm 0644
lib/Foswiki/Contrib/Stringifier/Plugins/XLSX.pm 0644
lib/Foswiki/Contrib/Stringifier.pm 0644
tools/docx2txt.pl 0755
Expand Down
2 changes: 1 addition & 1 deletion lib/Foswiki/Contrib/StringifierContrib/build.pl
@@ -1,4 +1,4 @@
#!/usr/bin/perl -w
#!/usr/bin/env perl
BEGIN {
unshift @INC, split( /:/, $ENV{FOSWIKI_LIBS} );
}
Expand Down
2 changes: 1 addition & 1 deletion tools/docx2txt.pl
Expand Up @@ -2,7 +2,7 @@

# docx2txt, a command-line utility to convert Docx documents to text format.
# Copyright (C) 2008-2009 Sandeep Kumar
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down
2 changes: 1 addition & 1 deletion tools/pptx2txt.pl
Expand Up @@ -5,7 +5,7 @@
# Microsoft Office presentation files.
# Copyright (C) 2009 - Sopan Shewale - sopan.shewale@gmail.com
# TWIKI.NET - sales@twiki.net
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# The development of this tools is completely based on other similar
# tool called docx2txt - available at http://docx2txt.sourceforge.net/
Expand Down
2 changes: 1 addition & 1 deletion tools/stringify
@@ -1,6 +1,6 @@
#!/usr/bin/env perl
#
# Copyright (C) 2009-2015 Foswiki Contributors
# Copyright (C) 2009-2017 Foswiki Contributors
#
# For licensing info read LICENSE file in the Foswiki root.
# This program is free software; you can redistribute it and/or
Expand Down

0 comments on commit 72b121e

Please sign in to comment.