diff --git a/SeedDMS_Lucene/Lucene/IndexedDocument.php b/SeedDMS_Lucene/Lucene/IndexedDocument.php index a0b1b8ae7..14bd3fd49 100644 --- a/SeedDMS_Lucene/Lucene/IndexedDocument.php +++ b/SeedDMS_Lucene/Lucene/IndexedDocument.php @@ -220,8 +220,20 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document { return $this->mimetype; } /* }}} */ + public function setContent($data) { /* {{{ */ + $this->addField(Zend_Search_Lucene_Field::UnStored('content', $data, 'utf-8')); + } /* }}} */ + public function getCmd() { /* {{{ */ return $this->cmd; } /* }}} */ + + /* Use only for setting the command if e.g. an extension takes over the + * conversion to txt (like the office extension which uses the collabora + * conversion service). + */ + public function setCmd($cmd) { /* {{{ */ + $this->cmd = $cmd; + } /* }}} */ } ?> diff --git a/SeedDMS_SQLiteFTS/SQLiteFTS/Document.php b/SeedDMS_SQLiteFTS/SQLiteFTS/Document.php index 956c8076c..ea63d733e 100644 --- a/SeedDMS_SQLiteFTS/SQLiteFTS/Document.php +++ b/SeedDMS_SQLiteFTS/SQLiteFTS/Document.php @@ -36,14 +36,14 @@ class SeedDMS_SQLiteFTS_Document { */ protected $fields; - public function __get($key) { /* {{{ */ + public function ___get($key) { /* {{{ */ if(isset($this->fields[$key])) return $this->fields[$key]; else return false; } /* }}} */ - public function addField($key, $value) { /* {{{ */ + public function _addField($key, $value) { /* {{{ */ //if($key == 'document_id') { if($key == 'docid') { $this->id = $this->fields[$key] = (int) $value; @@ -55,12 +55,63 @@ class SeedDMS_SQLiteFTS_Document { } } /* }}} */ - public function getFieldValue($key) { /* {{{ */ + public function addField(SeedDMS_SQLiteFTS_Field $field) { /* {{{ */ + $this->fields[$field->name] = $field; + if($field->name == 'docid') { + $this->id = $field->value; + } + return $this; + } /* }}} */ + + /** + * Return an array with the names of the fields in this document. + * + * @return array + */ + public function getFieldNames() { + return array_keys($this->fields); + } + + public function _getFieldValue($key) { /* {{{ */ if(isset($this->fields[$key])) return $this->fields[$key]; else return false; } /* }}} */ + /** + * Proxy method for getFieldValue(), provides more convenient access to + * the string value of a field. + * + * @param string $name + * @return string + */ + public function __get($name) { + return $this->getFieldValue($name); + } + + /** + * Returns Zend_Search_Lucene_Field object for a named field in this document. + * + * @param string $fieldName + * @return Zend_Search_Lucene_Field + */ + public function getField($fieldName) { + if (!array_key_exists($fieldName, $this->fields)) { + require_once 'SeedDMS/SQLiteFTS/Exception.php'; + throw new SeedDMS_SQLiteFTS_Exception("Field name \"$fieldName\" not found in document."); + } + return $this->fields[$fieldName]; + } + + /** + * Returns the string value of a named field in this document. + * + * @see __get() + * @return string + */ + public function getFieldValue($fieldName) { + return $this->getField($fieldName)->value; + } } ?> diff --git a/SeedDMS_SQLiteFTS/SQLiteFTS/Exception.php b/SeedDMS_SQLiteFTS/SQLiteFTS/Exception.php new file mode 100644 index 000000000..281947dce --- /dev/null +++ b/SeedDMS_SQLiteFTS/SQLiteFTS/Exception.php @@ -0,0 +1,41 @@ + + * @copyright Copyright (C) 2010, Uwe Steinmann + * @version Release: @package_version@ + */ + + +/** + * Class for managing a field. + * + * @category DMS + * @package SeedDMS_SQLiteFTS + * @version @version@ + * @author Uwe Steinmann + * @copyright Copyright (C) 2011, Uwe Steinmann + * @version Release: @package_version@ + */ +class SeedDMS_SQLiteFTS_Field { + + /** + * Field name + * + * @var string + */ + public $name; + + /** + * Field value + * + * @var boolean + */ + public $value; + + /** + * Object constructor + * + * @param string $name + * @param string $value + */ + public function __construct($name, $value) { + $this->name = $name; + $this->value = $value; + } + + /** + * Constructs a String-valued Field that is not tokenized, but is indexed + * and stored. Useful for non-text fields, e.g. date or url. + * + * @param string $name + * @param string $value + * @return SeedDMS_SQLiteFTS_Field + */ + public static function keyword($name, $value) { + return new self($name, $value); + } + + /** + * Constructs a String-valued Field that is tokenized and indexed, + * and is stored in the index, for return with hits. Useful for short text + * fields, like "title" or "subject". Term vector will not be stored for this field. + * + * @param string $name + * @param string $value + * @return SeedDMS_SQLiteFTS_Field + */ + public static function text($name, $value) { + return new self($name, $value); + } + + /** + * Constructs a String-valued Field that is tokenized and indexed, + * but that is not stored in the index. + * + * @param string $name + * @param string $value + * @return SeedDMS_SQLiteFTS_Field + */ + public static function unStored($name, $value) { + return new self($name, $value); + } +} diff --git a/SeedDMS_SQLiteFTS/SQLiteFTS/IndexedDocument.php b/SeedDMS_SQLiteFTS/SQLiteFTS/IndexedDocument.php index 74c6ee234..0fb7c86da 100644 --- a/SeedDMS_SQLiteFTS/SQLiteFTS/IndexedDocument.php +++ b/SeedDMS_SQLiteFTS/SQLiteFTS/IndexedDocument.php @@ -15,6 +15,7 @@ * @uses SeedDMS_SQLiteFTS_Document */ require_once('Document.php'); +require_once('Field.php'); /** @@ -111,50 +112,50 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { $this->cmd = ''; $this->mimetype = ''; - $this->addField('title', $document->getName()); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('title', $document->getName())); if($acllist = $document->getReadAccessList(1, 1, 1)) { $allu = []; foreach($acllist['users'] as $u) $allu[] = $u->getLogin(); - $this->addField('users', implode(' ', $allu)); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('users', implode(' ', $allu))); /* $allg = []; foreach($acllist['groups'] as $g) $allg[] = $g->getName(); - $this->addField('groups', implode(' ', $allg)); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('groups', implode(' ', $allg))); */ } if($attributes = $document->getAttributes()) { foreach($attributes as $attribute) { $attrdef = $attribute->getAttributeDefinition(); if($attrdef->getValueSet() != '') - $this->addField('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue()); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue())); else - $this->addField('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue()); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue())); } } $owner = $document->getOwner(); - $this->addField('owner', $owner->getLogin()); - $this->addField('path', str_replace(':', 'x', $document->getFolderList())); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('owner', $owner->getLogin())); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('path', str_replace(':', 'x', $document->getFolderList()))); if($comment = $document->getComment()) { - $this->addField('comment', $comment); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('comment', $comment)); } if($document->isType('document')) { - $this->addField('document_id', 'D'.$document->getID()); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('document_id', 'D'.$document->getID())); $version = $document->getLatestContent(); if($version) { - $this->addField('mimetype', $version->getMimeType()); - $this->addField('origfilename', $version->getOriginalFileName()); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('mimetype', $version->getMimeType())); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('origfilename', $version->getOriginalFileName())); if(!$nocontent) - $this->addField('created', $version->getDate(), 'unindexed'); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('created', $version->getDate(), 'unindexed')); if($attributes = $version->getAttributes()) { foreach($attributes as $attribute) { $attrdef = $attribute->getAttributeDefinition(); if($attrdef->getValueSet() != '') - $this->addField('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue()); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue())); else - $this->addField('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue()); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue())); } } } @@ -163,14 +164,14 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { foreach($categories as $cat) { $names[] = $cat->getName(); } - $this->addField('category', implode(' ', $names)); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('category', implode(' ', $names))); } if($keywords = $document->getKeywords()) { - $this->addField('keywords', $keywords); + $this->addField(SeedDMS_SQLiteFTS_Field::Text('keywords', $keywords)); } if($version) { $status = $version->getStatus(); - $this->addField('status', $status['status']+10); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('status', $status['status']+10)); } if($version && !$nocontent) { $path = $dms->contentDir . $version->getPath(); @@ -192,7 +193,7 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { try { $content = self::execWithTimeout($cmd, $timeout); if($content['stdout']) { - $this->addField('content', $content['stdout'], 'unstored'); + $this->addField(SeedDMS_SQLiteFTS_Field::UnStored('content', $content['stdout'])); } if($content['stderr']) { $this->errormsg = $content['stderr']; @@ -203,8 +204,8 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { } } } elseif($document->isType('folder')) { - $this->addField('document_id', 'F'.$document->getID()); - $this->addField('created', $document->getDate(), 'unindexed'); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('document_id', 'F'.$document->getID())); + $this->addField(SeedDMS_SQLiteFTS_Field::Keyword('created', $document->getDate(), 'unindexed')); } } /* }}} */ @@ -216,8 +217,20 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { return $this->mimetype; } /* }}} */ + public function setContent($data) { /* {{{ */ + $this->addField(SeedDMS_SQLiteFTS_Field::Text('content', $data)); + } /* }}} */ + public function getCmd() { /* {{{ */ return $this->cmd; } /* }}} */ + + /* Use only for setting the command if e.g. an extension takes over the + * conversion to txt (like the office extension which uses the collabora + * conversion service). + */ + public function setCmd($cmd) { /* {{{ */ + $this->cmd = $cmd; + } /* }}} */ } ?> diff --git a/SeedDMS_SQLiteFTS/SQLiteFTS/Indexer.php b/SeedDMS_SQLiteFTS/SQLiteFTS/Indexer.php index bb77b9835..61987ccee 100644 --- a/SeedDMS_SQLiteFTS/SQLiteFTS/Indexer.php +++ b/SeedDMS_SQLiteFTS/SQLiteFTS/Indexer.php @@ -123,7 +123,14 @@ class SeedDMS_SQLiteFTS_Indexer { if(!$this->_conn) return false; - $sql = "INSERT INTO docs (documentid, title, comment, keywords, category, owner, content, mimetype, origfilename, created, users, status, path) VALUES (".$this->_conn->quote($doc->getFieldValue('document_id')).", ".$this->_conn->quote($doc->getFieldValue('title')).", ".$this->_conn->quote($doc->getFieldValue('comment')).", ".$this->_conn->quote($doc->getFieldValue('keywords')).", ".$this->_conn->quote($doc->getFieldValue('category')).", ".$this->_conn->quote($doc->getFieldValue('owner')).", ".$this->_conn->quote($doc->getFieldValue('content')).", ".$this->_conn->quote($doc->getFieldValue('mimetype')).", ".$this->_conn->quote($doc->getFieldValue('origfilename')).", ".(int)$doc->getFieldValue('created').", ".$this->_conn->quote($doc->getFieldValue('users')).", ".$this->_conn->quote($doc->getFieldValue('status')).", ".$this->_conn->quote($doc->getFieldValue('path'))/*time()*/.")"; + foreach(array('comment', 'keywords', 'category', 'content', 'mimetype', 'origfilename', 'status') as $kk) { + try { + ${$kk} = $doc->getFieldValue($kk); + } catch (Exception $e) { + ${$kk} = ''; + } + } + $sql = "INSERT INTO docs (documentid, title, comment, keywords, category, owner, content, mimetype, origfilename, created, users, status, path) VALUES (".$this->_conn->quote($doc->getFieldValue('document_id')).", ".$this->_conn->quote($doc->getFieldValue('title')).", ".$this->_conn->quote($comment).", ".$this->_conn->quote($keywords).", ".$this->_conn->quote($category).", ".$this->_conn->quote($doc->getFieldValue('owner')).", ".$this->_conn->quote($content).", ".$this->_conn->quote($mimetype).", ".$this->_conn->quote($origfilename).", ".(int)$doc->getFieldValue('created').", ".$this->_conn->quote($doc->getFieldValue('users')).", ".$this->_conn->quote($status).", ".$this->_conn->quote($doc->getFieldValue('path'))/*time()*/.")"; $res = $this->_conn->exec($sql); if($res === false) { return false; @@ -243,49 +250,21 @@ class SeedDMS_SQLiteFTS_Indexer { if($res) { $rec = $res->fetch(PDO::FETCH_ASSOC); $doc = new SeedDMS_SQLiteFTS_Document(); - $doc->addField('docid', $rec[$this->_rawid]); - $doc->addField('document_id', $rec['documentid']); - $doc->addField('title', $rec['title']); - $doc->addField('comment', $rec['comment']); - $doc->addField('keywords', $rec['keywords']); - $doc->addField('category', $rec['category']); - $doc->addField('mimetype', $rec['mimetype']); - $doc->addField('origfilename', $rec['origfilename']); - $doc->addField('owner', $rec['owner']); - $doc->addField('created', $rec['created']); - $doc->addField('users', $rec['users']); - $doc->addField('status', $rec['status']); - $doc->addField('path', $rec['path']); + $doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('docid', $rec[$this->_rawid])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('document_id', $rec['documentid'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Text('title', $rec['title'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Text('comment', $rec['comment'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Text('keywords', $rec['keywords'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Text('category', $rec['category'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('mimetype', $rec['mimetype'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('origfilename', $rec['origfilename'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Text('owner', $rec['owner'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('created', $rec['created'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Text('users', $rec['users'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('status', $rec['status'])); + $doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('path', $rec['path'])); if($content) - $doc->addField('content', $rec['content']); - } - return $doc; - } /* }}} */ - - /** - * Get a single folder from index - * - * @param integer $id id of folder - * @return boolean false in case of an error, otherwise true - */ - public function __getFolder($id) { /* {{{ */ - if(!$this->_conn) - return false; - - $sql = "SELECT ".$this->_rawid.", documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE documentid='F".$id."'"; - $res = $this->_conn->query($sql); - $doc = false; - if($res) { - $rec = $res->fetch(PDO::FETCH_ASSOC); - $doc = new SeedDMS_SQLiteFTS_Document(); - $doc->addField('docid', $rec[$this->_rawid]); - $doc->addField('document_id', $rec['documentid']); - $doc->addField('title', $rec['title']); - $doc->addField('comment', $rec['comment']); - $doc->addField('owner', $rec['owner']); - $doc->addField('created', $rec['created']); - $doc->addField('users', $rec['users']); - $doc->addField('path', $rec['path']); + $doc->addField(SeedDMS_SQLiteFTS_Field::UnStored('content', $rec['content'])); } return $doc; } /* }}} */ diff --git a/SeedDMS_SQLiteFTS/package.xml b/SeedDMS_SQLiteFTS/package.xml index 8e05d96ef..9ebb706fc 100644 --- a/SeedDMS_SQLiteFTS/package.xml +++ b/SeedDMS_SQLiteFTS/package.xml @@ -25,6 +25,7 @@ - close pipes in execWithTimeout(), also return exit code of command - add support for fts5 (make it the default) +- add class SeedDMS_SQLiteFTS_Field