mirror of
https://git.code.sf.net/p/seeddms/code
synced 2025-03-12 00:45:34 +00:00
- added pear package to create and use a fulltext search engine based
on lucene
This commit is contained in:
parent
81ef36cf78
commit
88196ef287
44
LetoDMS_Lucene/Lucene.php
Normal file
44
LetoDMS_Lucene/Lucene.php
Normal file
|
@ -0,0 +1,44 @@
|
|||
<?php
|
||||
// LetoDMS. Document Management System
|
||||
// Copyright (C) 2011 Uwe Steinmann
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
/**
|
||||
* @uses Zend_Search_Lucene
|
||||
*/
|
||||
require_once('Zend/Search/Lucene.php');
|
||||
|
||||
/**
|
||||
* @uses Zend_Search_Lucene_Analysis_TokenFilter_Stopwords
|
||||
*/
|
||||
require_once("Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php");
|
||||
|
||||
/**
|
||||
* @uses LetoDMS_Lucene_Indexer
|
||||
*/
|
||||
require_once('Lucene/Indexer.php');
|
||||
|
||||
/**
|
||||
* @uses LetoDMS_Lucene_Search
|
||||
*/
|
||||
require_once('Lucene/Search.php');
|
||||
|
||||
/**
|
||||
* @uses LetoDMS_Lucene_IndexedDocument
|
||||
*/
|
||||
require_once('Lucene/IndexedDocument.php');
|
||||
|
||||
?>
|
85
LetoDMS_Lucene/Lucene/IndexedDocument.php
Normal file
85
LetoDMS_Lucene/Lucene/IndexedDocument.php
Normal file
|
@ -0,0 +1,85 @@
|
|||
<?php
|
||||
/**
|
||||
* Implementation of an indexed document
|
||||
*
|
||||
* @category DMS
|
||||
* @package LetoDMS_Lucene
|
||||
* @license GPL 2
|
||||
* @version @version@
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2010, Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Class for managing an indexed document.
|
||||
*
|
||||
* @category DMS
|
||||
* @package LetoDMS_Lucene
|
||||
* @version @version@
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2011, Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
class LetoDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
|
||||
/**
|
||||
* Constructor. Creates our indexable document and adds all
|
||||
* necessary fields to it using the passed in document
|
||||
*/
|
||||
public function __construct($dms, $document) {
|
||||
$version = $document->getLatestContent();
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('document_id', $document->getID()));
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('mimetype', $version->getMimeType()));
|
||||
$this->addField(Zend_Search_Lucene_Field::UnIndexed('created', $version->getDate()));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $document->getName()));
|
||||
if($categories = $document->getCategories()) {
|
||||
$names = array();
|
||||
foreach($categories as $cat) {
|
||||
$names[] = $cat->getName();
|
||||
}
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('category', implode(' ', $names)));
|
||||
}
|
||||
$owner = $document->getOwner();
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('owner', $owner->getLogin()));
|
||||
if($keywords = $document->getKeywords()) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('keywords', $keywords));
|
||||
}
|
||||
if($comment = $document->getComment()) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('comment', $comment));
|
||||
}
|
||||
$path = $dms->contentDir . $version->getPath();
|
||||
$content = '';
|
||||
$fp = null;
|
||||
switch($version->getMimeType()) {
|
||||
case "application/pdf":
|
||||
$fp = popen('pdftotext -nopgbrk '.$path.' - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'r');
|
||||
break;
|
||||
case "application/msword":
|
||||
$fp = popen('catdoc '.$path, 'r');
|
||||
break;
|
||||
case "application/vnd.ms-excel":
|
||||
$fp = popen('ssconvert -T Gnumeric_stf:stf_csv -S '.$path.' fd://1', 'r');
|
||||
break;
|
||||
case "audio/mpeg":
|
||||
if(function_exists('id3_get_tag')) {
|
||||
echo "lasjfdl";
|
||||
}
|
||||
break;
|
||||
case "text/plain":
|
||||
$fp = popen('cat '.$path, 'r');
|
||||
break;
|
||||
}
|
||||
if($fp) {
|
||||
$content = '';
|
||||
while(!feof($fp)) {
|
||||
$content .= fread($fp, 2048);
|
||||
}
|
||||
pclose($fp);
|
||||
}
|
||||
if($content) {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('content', $content, 'utf-8'));
|
||||
}
|
||||
}
|
||||
}
|
||||
?>
|
45
LetoDMS_Lucene/Lucene/Indexer.php
Normal file
45
LetoDMS_Lucene/Lucene/Indexer.php
Normal file
|
@ -0,0 +1,45 @@
|
|||
<?php
|
||||
/**
|
||||
* Implementation of lucene index
|
||||
*
|
||||
* @category DMS
|
||||
* @package LetoDMS_Lucene
|
||||
* @license GPL 2
|
||||
* @version @version@
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2010, Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Class for managing a lucene index.
|
||||
*
|
||||
* @category DMS
|
||||
* @package LetoDMS_Lucene
|
||||
* @version @version@
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2011, Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
class LetoDMS_Lucene_Indexer extends Zend_Search_Lucene {
|
||||
/**
|
||||
* @var string $indexname name of lucene index
|
||||
* @access protected
|
||||
*/
|
||||
protected $indexname;
|
||||
|
||||
/**
|
||||
* Create a new index
|
||||
*
|
||||
* @return object instance of LetoDMS_Lucene_Search
|
||||
*/
|
||||
function __construct() { /* {{{ */
|
||||
$this->version = '@package_version@';
|
||||
if($this->version[0] == '@')
|
||||
$this->version = '3.0.0';
|
||||
} /* }}} */
|
||||
|
||||
|
||||
}
|
||||
?>
|
75
LetoDMS_Lucene/Lucene/Search.php
Normal file
75
LetoDMS_Lucene/Lucene/Search.php
Normal file
|
@ -0,0 +1,75 @@
|
|||
<?php
|
||||
/**
|
||||
* Implementation of search in lucene index
|
||||
*
|
||||
* @category DMS
|
||||
* @package LetoDMS_Lucene
|
||||
* @license GPL 2
|
||||
* @version @version@
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2010, Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Class for searching in a lucene index.
|
||||
*
|
||||
* @category DMS
|
||||
* @package LetoDMS_Lucene
|
||||
* @version @version@
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2011, Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
class LetoDMS_Lucene_Search {
|
||||
/**
|
||||
* @var object $index lucene index
|
||||
* @access protected
|
||||
*/
|
||||
protected $index;
|
||||
|
||||
/**
|
||||
* Create a new instance of the search
|
||||
*
|
||||
* @param object $index lucene index
|
||||
* @return object instance of LetoDMS_Lucene_Search
|
||||
*/
|
||||
function __construct($index) { /* {{{ */
|
||||
$this->index = $index;
|
||||
$this->version = '@package_version@';
|
||||
if($this->version[0] == '@')
|
||||
$this->version = '3.0.0';
|
||||
} /* }}} */
|
||||
|
||||
/**
|
||||
* Search in index
|
||||
*
|
||||
* @param object $index lucene index
|
||||
* @return object instance of LetoDMS_Lucene_Search
|
||||
*/
|
||||
function search($term, $owner, $status='', $categories=array()) { /* {{{ */
|
||||
$query = '';
|
||||
if($term)
|
||||
$query .= trim($term);
|
||||
if($owner) {
|
||||
if($query)
|
||||
$query .= ' && ';
|
||||
$query .= 'owner:'.$owner;
|
||||
}
|
||||
if($categories) {
|
||||
if($query)
|
||||
$query .= ' && ';
|
||||
$query .= '(category:"';
|
||||
$query .= implode('" || category:"', $categories);
|
||||
$query .= '")';
|
||||
}
|
||||
$hits = $this->index->find($query);
|
||||
$recs = array();
|
||||
foreach($hits as $hit) {
|
||||
$recs[] = array('id'=>$hit->id, 'document_id'=>$hit->document_id);
|
||||
}
|
||||
return $recs;
|
||||
} /* }}} */
|
||||
}
|
||||
?>
|
75
LetoDMS_Lucene/package.xml
Normal file
75
LetoDMS_Lucene/package.xml
Normal file
|
@ -0,0 +1,75 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<package packagerversion="1.8.1" version="2.0" xmlns="http://pear.php.net/dtd/package-2.0" xmlns:tasks="http://pear.php.net/dtd/tasks-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pear.php.net/dtd/tasks-1.0 http://pear.php.net/dtd/tasks-1.0.xsd http://pear.php.net/dtd/package-2.0 http://pear.php.net/dtd/package-2.0.xsd">
|
||||
<name>LetoDMS_Lucene</name>
|
||||
<channel>pear.php.net</channel>
|
||||
<summary>Fulltext search for LetoDMS</summary>
|
||||
<description>LetoDMS is a web based document management system (DMS). This is
|
||||
the fulltext search engine for it, based on Lucene.</description>
|
||||
<lead>
|
||||
<name>Uwe Steinmann</name>
|
||||
<user>steinm</user>
|
||||
<email>uwe@steinmann.cx</email>
|
||||
<active>yes</active>
|
||||
</lead>
|
||||
<date>2011-02-17</date>
|
||||
<time>08:05:38</time>
|
||||
<version>
|
||||
<release>1.0.0</release>
|
||||
<api>1.0.0</api>
|
||||
</version>
|
||||
<stability>
|
||||
<release>beta</release>
|
||||
<api>beta</api>
|
||||
</stability>
|
||||
<license uri="http://opensource.org/licenses/bsd-license">BSD License</license>
|
||||
<notes>
|
||||
- Initial Release
|
||||
</notes>
|
||||
<contents>
|
||||
<dir baseinstalldir="LetoDMS" name="/">
|
||||
<dir name="Lucene">
|
||||
<file name="Indexer.php" role="php">
|
||||
<tasks:replace from="@package_version@" to="version" type="package-info" />
|
||||
</file>
|
||||
<file name="Search.php" role="php">
|
||||
<tasks:replace from="@package_version@" to="version" type="package-info" />
|
||||
</file>
|
||||
<file name="IndexedDocument.php" role="php">
|
||||
<tasks:replace from="@package_version@" to="version" type="package-info" />
|
||||
</file>
|
||||
</dir> <!-- /Lucene -->
|
||||
<dir name="tests">
|
||||
</dir> <!-- /tests -->
|
||||
<file name="Lucene.php" role="php">
|
||||
<tasks:replace from="@package_version@" to="version" type="package-info" />
|
||||
</file>
|
||||
</dir> <!-- / -->
|
||||
</contents>
|
||||
<dependencies>
|
||||
<required>
|
||||
<php>
|
||||
<min>4.3.0</min>
|
||||
</php>
|
||||
<pearinstaller>
|
||||
<min>1.5.4</min>
|
||||
</pearinstaller>
|
||||
</required>
|
||||
</dependencies>
|
||||
<phprelease />
|
||||
<changelog>
|
||||
<release>
|
||||
<version>
|
||||
<release>0.0.1</release>
|
||||
<api>0.0.1</api>
|
||||
</version>
|
||||
<stability>
|
||||
<release>alpha</release>
|
||||
<api>alpha</api>
|
||||
</stability>
|
||||
<date>2009-04-27</date>
|
||||
<license uri="http://opensource.org/licenses/bsd-license">BSD License</license>
|
||||
<notes>
|
||||
</notes>
|
||||
</release>
|
||||
</changelog>
|
||||
</package>
|
Loading…
Reference in New Issue
Block a user