package Plucene::SearchEngine::Index::DOC; use base 'Plucene::SearchEngine::Index::Base'; __PACKAGE__->register_handler("application/doc", ".doc"); use File::Temp qw/tmpnam/; =head1 NAME Plucene::SearchEngine::Index::DOC - Backend for parsing MS DOC =head1 DESCRIPTION This backend analyzes a DOC file for its textual content (using C). =cut sub gather_data_from_file { my ($self, $filename) = @_; my $txt = tmpnam(); system("antiword $filename > $txt"); return unless -e $txt; $self->Plucene::SearchEngine::Index::Text::gather_data_from_file($txt); unlink $txt; return $self; } 1;