mirror of
https://git.code.sf.net/p/seeddms/code
synced 2025-02-06 15:14:58 +00:00
427 lines
13 KiB
PHP
427 lines
13 KiB
PHP
<?php
|
|
/**
|
|
* Implementation of SQLiteFTS index
|
|
*
|
|
* @category DMS
|
|
* @package SeedDMS_Lucene
|
|
* @license GPL 2
|
|
* @version @version@
|
|
* @author Uwe Steinmann <uwe@steinmann.cx>
|
|
* @copyright Copyright (C) 2010, Uwe Steinmann
|
|
* @version Release: @package_version@
|
|
*/
|
|
|
|
|
|
/**
|
|
* Class for managing a SQLiteFTS index.
|
|
*
|
|
* @category DMS
|
|
* @package SeedDMS_Lucene
|
|
* @version @version@
|
|
* @author Uwe Steinmann <uwe@steinmann.cx>
|
|
* @copyright Copyright (C) 2011, Uwe Steinmann
|
|
* @version Release: @package_version@
|
|
*/
|
|
class SeedDMS_SQLiteFTS_Indexer {
|
|
|
|
/**
|
|
* @var string $ftstype
|
|
* @access protected
|
|
*/
|
|
protected $_ftstype;
|
|
|
|
/**
|
|
* @var object $index sqlite index
|
|
* @access protected
|
|
*/
|
|
protected $_conn;
|
|
|
|
const ftstype = 'fts5';
|
|
/**
|
|
* Constructor
|
|
*
|
|
*/
|
|
function __construct($indexerDir) { /* {{{ */
|
|
$this->_conn = new PDO('sqlite:'.$indexerDir.'/index.db');
|
|
$this->_ftstype = self::ftstype;
|
|
if($this->_ftstype == 'fts5')
|
|
$this->_rawid = 'rowid';
|
|
else
|
|
$this->_rawid = 'docid';
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Open an existing index
|
|
*
|
|
* @param string $indexerDir directory on disk containing the index
|
|
*/
|
|
static function open($conf) { /* {{{ */
|
|
if(file_exists($conf['indexdir'].'/index.db')) {
|
|
return new SeedDMS_SQLiteFTS_Indexer($conf['indexdir']);
|
|
} else
|
|
return self::create($conf);
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Create a new index
|
|
*
|
|
* @param array $conf $conf['indexdir'] is the directory on disk containing the index
|
|
*/
|
|
static function create($conf) { /* {{{ */
|
|
if(file_exists($conf['indexdir'].'/index.db'))
|
|
unlink($conf['indexdir'].'/index.db');
|
|
$index = new SeedDMS_SQLiteFTS_Indexer($conf['indexdir']);
|
|
/* Make sure the sequence of fields is identical to the field list
|
|
* in SeedDMS_SQLiteFTS_Term
|
|
*/
|
|
$version = SQLite3::version();
|
|
if(self::ftstype == 'fts4') {
|
|
if($version['versionNumber'] >= 3008000)
|
|
$sql = 'CREATE VIRTUAL TABLE docs USING fts4(documentid, record_type, title, comment, keywords, category, mimetype, origfilename, owner, content, created, indexed, users, status, path, notindexed=created, notindexed=indexed, matchinfo=fts3)';
|
|
else
|
|
$sql = 'CREATE VIRTUAL TABLE docs USING fts4(documentid, record_type, title, comment, keywords, category, mimetype, origfilename, owner, content, created, indexed, users, status, path, matchinfo=fts3)';
|
|
$res = $index->_conn->exec($sql);
|
|
if($res === false) {
|
|
return null;
|
|
}
|
|
$sql = 'CREATE VIRTUAL TABLE docs_terms USING fts4aux(docs);';
|
|
$res = $index->_conn->exec($sql);
|
|
if($res === false) {
|
|
return null;
|
|
}
|
|
} elseif(self::ftstype == 'fts5') {
|
|
$sql = 'CREATE VIRTUAL TABLE docs USING fts5(documentid, record_type, title, comment, keywords, category, mimetype, origfilename, owner, content, created unindexed, indexed unindexed, users, status, path)';
|
|
$res = $index->_conn->exec($sql);
|
|
if($res === false) {
|
|
return null;
|
|
}
|
|
$sql = 'CREATE VIRTUAL TABLE docs_terms USING fts5vocab(docs, \'col\');';
|
|
$res = $index->_conn->exec($sql);
|
|
if($res === false) {
|
|
return null;
|
|
}
|
|
} else
|
|
return null;
|
|
return($index);
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Do some initialization
|
|
*
|
|
*/
|
|
static function init($stopWordsFile='') { /* {{{ */
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Add document to index
|
|
*
|
|
* @param object $doc indexed document of class
|
|
* SeedDMS_SQLiteFTS_IndexedDocument
|
|
* @return boolean false in case of an error, otherwise true
|
|
*/
|
|
function addDocument($doc) { /* {{{ */
|
|
if(!$this->_conn)
|
|
return false;
|
|
|
|
foreach(array('comment', 'keywords', 'category', 'content', 'mimetype', 'origfilename', 'status', 'created', 'indexed') as $kk) {
|
|
try {
|
|
${$kk} = $doc->getFieldValue($kk);
|
|
} catch (Exception $e) {
|
|
${$kk} = '';
|
|
}
|
|
}
|
|
$sql = "DELETE FROM docs WHERE documentid=".$this->_conn->quote($doc->getFieldValue('document_id'));
|
|
$res = $this->_conn->exec($sql);
|
|
if($res === false) {
|
|
return false;
|
|
}
|
|
$sql = "INSERT INTO docs (documentid, record_type, title, comment, keywords, category, owner, content, mimetype, origfilename, created, indexed, users, status, path) VALUES (".$this->_conn->quote($doc->getFieldValue('document_id')).", ".$this->_conn->quote($doc->getFieldValue('record_type')).", ".$this->_conn->quote($doc->getFieldValue('title')).", ".$this->_conn->quote($comment).", ".$this->_conn->quote($keywords).", ".$this->_conn->quote($category).", ".$this->_conn->quote($doc->getFieldValue('owner')).", ".$this->_conn->quote($content).", ".$this->_conn->quote($mimetype).", ".$this->_conn->quote($origfilename).", ".(int)$created.", ".(int)$indexed.", ".$this->_conn->quote($doc->getFieldValue('users')).", ".$this->_conn->quote($status).", ".$this->_conn->quote($doc->getFieldValue('path'))/*time()*/.")";
|
|
$res = $this->_conn->exec($sql);
|
|
if($res === false) {
|
|
return false;
|
|
var_dump($this->_conn->errorInfo());
|
|
}
|
|
return $res;
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Remove document from index
|
|
*
|
|
* @param object $doc indexed document of class
|
|
* SeedDMS_SQLiteFTS_IndexedDocument
|
|
* @return boolean false in case of an error, otherwise true
|
|
*/
|
|
public function delete($id) { /* {{{ */
|
|
if(!$this->_conn)
|
|
return false;
|
|
|
|
$sql = "DELETE FROM docs WHERE ".$this->_rawid."=".(int) $id;
|
|
$res = $this->_conn->exec($sql);
|
|
return $res;
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Check if document was deleted
|
|
*
|
|
* Just for compatibility with lucene.
|
|
*
|
|
* @return boolean always false
|
|
*/
|
|
public function isDeleted($id) { /* {{{ */
|
|
return false;
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Find documents in index
|
|
*
|
|
* @param string $query
|
|
* @param array $limit array with elements 'limit' and 'offset'
|
|
* @return boolean false in case of an error, otherwise array with elements
|
|
* 'count', 'hits', 'facets'. 'hits' is an array of SeedDMS_SQLiteFTS_QueryHit
|
|
*/
|
|
public function find($query, $filter, $limit=array(), $order=array()) { /* {{{ */
|
|
if(!$this->_conn)
|
|
return false;
|
|
|
|
/* First count some records for facets */
|
|
foreach(array('owner', 'mimetype', 'category') as $facetname) {
|
|
$sql = "SELECT `".$facetname."`, count(*) AS `c` FROM `docs`";
|
|
if($query) {
|
|
$sql .= " WHERE docs MATCH ".$this->_conn->quote($query);
|
|
}
|
|
if($filter) {
|
|
if($query)
|
|
$sql .= " AND ".$filter;
|
|
else
|
|
$sql .= " WHERE ".$filter;
|
|
}
|
|
$res = $this->_conn->query($sql." GROUP BY `".$facetname."`");
|
|
if(!$res)
|
|
throw new SeedDMS_SQLiteFTS_Exception("Counting records in facet \"$facetname\" failed.");
|
|
// return false;
|
|
$facets[$facetname] = array();
|
|
foreach($res as $row) {
|
|
if($row[$facetname] && $row['c']) {
|
|
if($facetname == 'category') {
|
|
$tmp = explode(' ', $row[$facetname]);
|
|
if(count($tmp) > 1) {
|
|
foreach($tmp as $t) {
|
|
if(!isset($facets[$facetname][$t]))
|
|
$facets[$facetname][$t] = $row['c'];
|
|
else
|
|
$facets[$facetname][$t] += $row['c'];
|
|
}
|
|
} else {
|
|
if(!isset($facets[$facetname][$row[$facetname]]))
|
|
$facets[$facetname][$row[$facetname]] = $row['c'];
|
|
else
|
|
$facets[$facetname][$row[$facetname]] += $row['c'];
|
|
}
|
|
} else
|
|
$facets[$facetname][$row[$facetname]] = $row['c'];
|
|
}
|
|
}
|
|
}
|
|
|
|
$sql = "SELECT `record_type`, count(*) AS `c` FROM `docs`";
|
|
if($query)
|
|
$sql .= " WHERE docs MATCH ".$this->_conn->quote($query);
|
|
if($filter) {
|
|
if($query)
|
|
$sql .= " AND ".$filter;
|
|
else
|
|
$sql .= " WHERE ".$filter;
|
|
}
|
|
$res = $this->_conn->query($sql." GROUP BY `record_type`");
|
|
if(!$res)
|
|
throw new SeedDMS_SQLiteFTS_Exception("Counting records in facet \"record_type\" failed.");
|
|
// return false;
|
|
$facets['record_type'] = array('document'=>0, 'folder'=>0);
|
|
foreach($res as $row) {
|
|
$facets['record_type'][$row['record_type']] = $row['c'];
|
|
}
|
|
$total = $facets['record_type']['document'] + $facets['record_type']['folder'];
|
|
|
|
$sql = "SELECT ".$this->_rawid.", documentid FROM docs";
|
|
if($query)
|
|
$sql .= " WHERE docs MATCH ".$this->_conn->quote($query);
|
|
if($filter) {
|
|
if($query)
|
|
$sql .= " AND ".$filter;
|
|
else
|
|
$sql .= " WHERE ".$filter;
|
|
}
|
|
if($this->_ftstype == 'fts5') {
|
|
//$sql .= " ORDER BY rank";
|
|
// boost documentid, record_type, title, comment, keywords, category, mimetype, origfilename, owner, content, created unindexed, users, status, path
|
|
if(!empty($order['by'])) {
|
|
switch($order['by']) {
|
|
case "title":
|
|
$sql .= " ORDER BY title";
|
|
break;
|
|
case "created":
|
|
$sql .= " ORDER BY created";
|
|
break;
|
|
default:
|
|
$sql .= " ORDER BY bm25(docs, 10.0, 0.0, 10.0, 5.0, 5.0, 10.0)";
|
|
}
|
|
if(!empty($order['dir'])) {
|
|
if($order['dir'] == 'desc')
|
|
$sql .= " DESC";
|
|
}
|
|
}
|
|
}
|
|
if(!empty($limit['limit']))
|
|
$sql .= " LIMIT ".(int) $limit['limit'];
|
|
if(!empty($limit['offset']))
|
|
$sql .= " OFFSET ".(int) $limit['offset'];
|
|
$res = $this->_conn->query($sql);
|
|
if(!$res)
|
|
throw new SeedDMS_SQLiteFTS_Exception("Searching for documents failed.");
|
|
$hits = array();
|
|
if($res) {
|
|
foreach($res as $rec) {
|
|
$hit = new SeedDMS_SQLiteFTS_QueryHit($this);
|
|
$hit->id = $rec[$this->_rawid];
|
|
$hit->documentid = $rec['documentid'];
|
|
$hits[] = $hit;
|
|
}
|
|
}
|
|
return array('count'=>$total, 'hits'=>$hits, 'facets'=>$facets);
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Get a single document from index
|
|
*
|
|
* @param string $id id of document
|
|
* @return boolean false in case of an error, otherwise true
|
|
*/
|
|
public function findById($id) { /* {{{ */
|
|
if(!$this->_conn)
|
|
return false;
|
|
|
|
$sql = "SELECT ".$this->_rawid.", documentid FROM docs WHERE documentid=".$this->_conn->quote($id);
|
|
$res = $this->_conn->query($sql);
|
|
$hits = array();
|
|
if($res) {
|
|
while($rec = $res->fetch(PDO::FETCH_ASSOC)) {
|
|
$hit = new SeedDMS_SQLiteFTS_QueryHit($this);
|
|
$hit->id = $rec[$this->_rawid];
|
|
$hit->documentid = $rec['documentid'];
|
|
$hits[] = $hit;
|
|
}
|
|
}
|
|
return $hits;
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Get a single document from index
|
|
*
|
|
* @param integer $id id of index record
|
|
* @return boolean false in case of an error, otherwise true
|
|
*/
|
|
public function getDocument($id, $content=true) { /* {{{ */
|
|
if(!$this->_conn)
|
|
return false;
|
|
|
|
$sql = "SELECT ".$this->_rawid.", documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, indexed, users, status, path".($content ? ", content" : "")." FROM docs WHERE ".$this->_rawid."='".$id."'";
|
|
$res = $this->_conn->query($sql);
|
|
$doc = false;
|
|
if($res) {
|
|
if(!($rec = $res->fetch(PDO::FETCH_ASSOC)))
|
|
return false;
|
|
$doc = new SeedDMS_SQLiteFTS_Document();
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('docid', $rec[$this->_rawid]));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('document_id', $rec['documentid']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Text('title', $rec['title']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Text('comment', $rec['comment']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Text('keywords', $rec['keywords']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Text('category', $rec['category']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('mimetype', $rec['mimetype']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('origfilename', $rec['origfilename']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Text('owner', $rec['owner']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('created', $rec['created']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('indexed', $rec['indexed']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Text('users', $rec['users']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('status', $rec['status']));
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::Keyword('path', $rec['path']));
|
|
if($content)
|
|
$doc->addField(SeedDMS_SQLiteFTS_Field::UnStored('content', $rec['content']));
|
|
}
|
|
return $doc;
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Return list of terms in index
|
|
*
|
|
* @return array list of SeedDMS_SQLiteFTS_Term
|
|
*/
|
|
public function terms($query='', $col='') { /* {{{ */
|
|
if(!$this->_conn)
|
|
return false;
|
|
|
|
if($this->_ftstype == 'fts5') {
|
|
$sql = "SELECT term, col, doc as occurrences FROM docs_terms";
|
|
if($query || $col) {
|
|
$sql .= " WHERE";
|
|
if($query) {
|
|
$sql .= " term like '".$query."%'";
|
|
if($col)
|
|
$sql .= " AND";
|
|
}
|
|
if($col)
|
|
$sql .= " col = '".$col."'";
|
|
}
|
|
$sql .= " ORDER BY col, occurrences desc";
|
|
} else {
|
|
$sql = "SELECT term, col, occurrences FROM docs_terms WHERE col!='*'";
|
|
if($query)
|
|
$sql .= " AND term like '".$query."%'";
|
|
if($col)
|
|
$sql .= " AND col = '".$col."'";
|
|
$sql .= " ORDER BY col, occurrences desc";
|
|
}
|
|
$res = $this->_conn->query($sql);
|
|
$terms = array();
|
|
if($res) {
|
|
while($rec = $res->fetch(PDO::FETCH_ASSOC)) {
|
|
$term = new SeedDMS_SQLiteFTS_Term($rec['term'], $rec['col'], $rec['occurrences']);
|
|
$terms[] = $term;
|
|
}
|
|
}
|
|
return $terms;
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Return number of documents in index
|
|
*
|
|
* @return interger number of documents
|
|
*/
|
|
public function count() { /* {{{ */
|
|
$sql = "SELECT count(*) c FROM docs";
|
|
$res = $this->_conn->query($sql);
|
|
if($res) {
|
|
$rec = $res->fetch(PDO::FETCH_ASSOC);
|
|
return $rec['c'];
|
|
}
|
|
return 0;
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Commit changes
|
|
*
|
|
* This function does nothing!
|
|
*/
|
|
function commit() { /* {{{ */
|
|
} /* }}} */
|
|
|
|
/**
|
|
* Optimize index
|
|
*
|
|
* This function does nothing!
|
|
*/
|
|
function optimize() { /* {{{ */
|
|
} /* }}} */
|
|
}
|
|
?>
|