- added pear package to create and use a fulltext search engine based

on lucene
This commit is contained in:
steinm 2011-03-10 14:12:06 +00:00
parent 81ef36cf78
commit 88196ef287
5 changed files with 324 additions and 0 deletions

44
LetoDMS_Lucene/Lucene.php Normal file
View File

@ -0,0 +1,44 @@
<?php
// LetoDMS. Document Management System
// Copyright (C) 2011 Uwe Steinmann
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
/**
* @uses Zend_Search_Lucene
*/
require_once('Zend/Search/Lucene.php');
/**
* @uses Zend_Search_Lucene_Analysis_TokenFilter_Stopwords
*/
require_once("Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php");
/**
* @uses LetoDMS_Lucene_Indexer
*/
require_once('Lucene/Indexer.php');
/**
* @uses LetoDMS_Lucene_Search
*/
require_once('Lucene/Search.php');
/**
* @uses LetoDMS_Lucene_IndexedDocument
*/
require_once('Lucene/IndexedDocument.php');
?>

View File

@ -0,0 +1,85 @@
<?php
/**
* Implementation of an indexed document
*
* @category DMS
* @package LetoDMS_Lucene
* @license GPL 2
* @version @version@
* @author Uwe Steinmann <uwe@steinmann.cx>
* @copyright Copyright (C) 2010, Uwe Steinmann
* @version Release: @package_version@
*/
/**
* Class for managing an indexed document.
*
* @category DMS
* @package LetoDMS_Lucene
* @version @version@
* @author Uwe Steinmann <uwe@steinmann.cx>
* @copyright Copyright (C) 2011, Uwe Steinmann
* @version Release: @package_version@
*/
class LetoDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
/**
* Constructor. Creates our indexable document and adds all
* necessary fields to it using the passed in document
*/
public function __construct($dms, $document) {
$version = $document->getLatestContent();
$this->addField(Zend_Search_Lucene_Field::Keyword('document_id', $document->getID()));
$this->addField(Zend_Search_Lucene_Field::Keyword('mimetype', $version->getMimeType()));
$this->addField(Zend_Search_Lucene_Field::UnIndexed('created', $version->getDate()));
$this->addField(Zend_Search_Lucene_Field::Text('title', $document->getName()));
if($categories = $document->getCategories()) {
$names = array();
foreach($categories as $cat) {
$names[] = $cat->getName();
}
$this->addField(Zend_Search_Lucene_Field::Text('category', implode(' ', $names)));
}
$owner = $document->getOwner();
$this->addField(Zend_Search_Lucene_Field::Text('owner', $owner->getLogin()));
if($keywords = $document->getKeywords()) {
$this->addField(Zend_Search_Lucene_Field::Text('keywords', $keywords));
}
if($comment = $document->getComment()) {
$this->addField(Zend_Search_Lucene_Field::Text('comment', $comment));
}
$path = $dms->contentDir . $version->getPath();
$content = '';
$fp = null;
switch($version->getMimeType()) {
case "application/pdf":
$fp = popen('pdftotext -nopgbrk '.$path.' - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'r');
break;
case "application/msword":
$fp = popen('catdoc '.$path, 'r');
break;
case "application/vnd.ms-excel":
$fp = popen('ssconvert -T Gnumeric_stf:stf_csv -S '.$path.' fd://1', 'r');
break;
case "audio/mpeg":
if(function_exists('id3_get_tag')) {
echo "lasjfdl";
}
break;
case "text/plain":
$fp = popen('cat '.$path, 'r');
break;
}
if($fp) {
$content = '';
while(!feof($fp)) {
$content .= fread($fp, 2048);
}
pclose($fp);
}
if($content) {
$this->addField(Zend_Search_Lucene_Field::UnStored('content', $content, 'utf-8'));
}
}
}
?>

View File

@ -0,0 +1,45 @@
<?php
/**
* Implementation of lucene index
*
* @category DMS
* @package LetoDMS_Lucene
* @license GPL 2
* @version @version@
* @author Uwe Steinmann <uwe@steinmann.cx>
* @copyright Copyright (C) 2010, Uwe Steinmann
* @version Release: @package_version@
*/
/**
* Class for managing a lucene index.
*
* @category DMS
* @package LetoDMS_Lucene
* @version @version@
* @author Uwe Steinmann <uwe@steinmann.cx>
* @copyright Copyright (C) 2011, Uwe Steinmann
* @version Release: @package_version@
*/
class LetoDMS_Lucene_Indexer extends Zend_Search_Lucene {
/**
* @var string $indexname name of lucene index
* @access protected
*/
protected $indexname;
/**
* Create a new index
*
* @return object instance of LetoDMS_Lucene_Search
*/
function __construct() { /* {{{ */
$this->version = '@package_version@';
if($this->version[0] == '@')
$this->version = '3.0.0';
} /* }}} */
}
?>

View File

@ -0,0 +1,75 @@
<?php
/**
* Implementation of search in lucene index
*
* @category DMS
* @package LetoDMS_Lucene
* @license GPL 2
* @version @version@
* @author Uwe Steinmann <uwe@steinmann.cx>
* @copyright Copyright (C) 2010, Uwe Steinmann
* @version Release: @package_version@
*/
/**
* Class for searching in a lucene index.
*
* @category DMS
* @package LetoDMS_Lucene
* @version @version@
* @author Uwe Steinmann <uwe@steinmann.cx>
* @copyright Copyright (C) 2011, Uwe Steinmann
* @version Release: @package_version@
*/
class LetoDMS_Lucene_Search {
/**
* @var object $index lucene index
* @access protected
*/
protected $index;
/**
* Create a new instance of the search
*
* @param object $index lucene index
* @return object instance of LetoDMS_Lucene_Search
*/
function __construct($index) { /* {{{ */
$this->index = $index;
$this->version = '@package_version@';
if($this->version[0] == '@')
$this->version = '3.0.0';
} /* }}} */
/**
* Search in index
*
* @param object $index lucene index
* @return object instance of LetoDMS_Lucene_Search
*/
function search($term, $owner, $status='', $categories=array()) { /* {{{ */
$query = '';
if($term)
$query .= trim($term);
if($owner) {
if($query)
$query .= ' && ';
$query .= 'owner:'.$owner;
}
if($categories) {
if($query)
$query .= ' && ';
$query .= '(category:"';
$query .= implode('" || category:"', $categories);
$query .= '")';
}
$hits = $this->index->find($query);
$recs = array();
foreach($hits as $hit) {
$recs[] = array('id'=>$hit->id, 'document_id'=>$hit->document_id);
}
return $recs;
} /* }}} */
}
?>

View File

@ -0,0 +1,75 @@
<?xml version="1.0" encoding="UTF-8"?>
<package packagerversion="1.8.1" version="2.0" xmlns="http://pear.php.net/dtd/package-2.0" xmlns:tasks="http://pear.php.net/dtd/tasks-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pear.php.net/dtd/tasks-1.0 http://pear.php.net/dtd/tasks-1.0.xsd http://pear.php.net/dtd/package-2.0 http://pear.php.net/dtd/package-2.0.xsd">
<name>LetoDMS_Lucene</name>
<channel>pear.php.net</channel>
<summary>Fulltext search for LetoDMS</summary>
<description>LetoDMS is a web based document management system (DMS). This is
the fulltext search engine for it, based on Lucene.</description>
<lead>
<name>Uwe Steinmann</name>
<user>steinm</user>
<email>uwe@steinmann.cx</email>
<active>yes</active>
</lead>
<date>2011-02-17</date>
<time>08:05:38</time>
<version>
<release>1.0.0</release>
<api>1.0.0</api>
</version>
<stability>
<release>beta</release>
<api>beta</api>
</stability>
<license uri="http://opensource.org/licenses/bsd-license">BSD License</license>
<notes>
- Initial Release
</notes>
<contents>
<dir baseinstalldir="LetoDMS" name="/">
<dir name="Lucene">
<file name="Indexer.php" role="php">
<tasks:replace from="@package_version@" to="version" type="package-info" />
</file>
<file name="Search.php" role="php">
<tasks:replace from="@package_version@" to="version" type="package-info" />
</file>
<file name="IndexedDocument.php" role="php">
<tasks:replace from="@package_version@" to="version" type="package-info" />
</file>
</dir> <!-- /Lucene -->
<dir name="tests">
</dir> <!-- /tests -->
<file name="Lucene.php" role="php">
<tasks:replace from="@package_version@" to="version" type="package-info" />
</file>
</dir> <!-- / -->
</contents>
<dependencies>
<required>
<php>
<min>4.3.0</min>
</php>
<pearinstaller>
<min>1.5.4</min>
</pearinstaller>
</required>
</dependencies>
<phprelease />
<changelog>
<release>
<version>
<release>0.0.1</release>
<api>0.0.1</api>
</version>
<stability>
<release>alpha</release>
<api>alpha</api>
</stability>
<date>2009-04-27</date>
<license uri="http://opensource.org/licenses/bsd-license">BSD License</license>
<notes>
</notes>
</release>
</changelog>
</package>