add support for fts5, start new version 1.0.16

This commit is contained in:
Uwe Steinmann 2021-04-20 20:52:17 +02:00
parent c559afd97c
commit 42c4b4eaaf
3 changed files with 81 additions and 28 deletions

View File

@ -23,18 +23,31 @@
* @version Release: @package_version@
*/
class SeedDMS_SQLiteFTS_Indexer {
/**
* @var string $ftstype
* @access protected
*/
protected $_ftstype;
/**
* @var object $index sqlite index
* @access protected
*/
protected $_conn;
const ftstype = 'fts5';
/**
* Constructor
*
*/
function __construct($indexerDir) { /* {{{ */
$this->_conn = new PDO('sqlite:'.$indexerDir.'/index.db');
$this->_ftstype = self::ftstype;
if($this->_ftstype == 'fts5')
$this->_rawid = 'rowid';
else
$this->_rawid = 'docid';
} /* }}} */
/**
@ -62,19 +75,33 @@ class SeedDMS_SQLiteFTS_Indexer {
* in SeedDMS_SQLiteFTS_Term
*/
$version = SQLite3::version();
if($version['versionNumber'] >= 3008000)
$sql = 'CREATE VIRTUAL TABLE docs USING fts4(documentid, title, comment, keywords, category, mimetype, origfilename, owner, content, created, users, status, path, notindexed=created, matchinfo=fts3)';
else
$sql = 'CREATE VIRTUAL TABLE docs USING fts4(documentid, title, comment, keywords, category, mimetype, origfilename, owner, content, created, users, status, path, matchinfo=fts3)';
$res = $index->_conn->exec($sql);
if($res === false) {
if(self::ftstype == 'fts4') {
if($version['versionNumber'] >= 3008000)
$sql = 'CREATE VIRTUAL TABLE docs USING fts4(documentid, title, comment, keywords, category, mimetype, origfilename, owner, content, created, users, status, path, notindexed=created, matchinfo=fts3)';
else
$sql = 'CREATE VIRTUAL TABLE docs USING fts4(documentid, title, comment, keywords, category, mimetype, origfilename, owner, content, created, users, status, path, matchinfo=fts3)';
$res = $index->_conn->exec($sql);
if($res === false) {
return null;
}
$sql = 'CREATE VIRTUAL TABLE docs_terms USING fts4aux(docs);';
$res = $index->_conn->exec($sql);
if($res === false) {
return null;
}
} elseif(self::ftstype == 'fts5') {
$sql = 'CREATE VIRTUAL TABLE docs USING fts5(documentid, title, comment, keywords, category, mimetype, origfilename, owner, content, created unindexed, users, status, path)';
$res = $index->_conn->exec($sql);
if($res === false) {
return null;
}
$sql = 'CREATE VIRTUAL TABLE docs_terms USING fts5vocab(docs, \'col\');';
$res = $index->_conn->exec($sql);
if($res === false) {
return null;
}
} else
return null;
}
$sql = 'CREATE VIRTUAL TABLE docs_terms USING fts4aux(docs);';
$res = $index->_conn->exec($sql);
if($res === false) {
return null;
}
return($index);
} /* }}} */
@ -116,7 +143,7 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn)
return false;
$sql = "DELETE FROM docs WHERE docid=".(int) $id;
$sql = "DELETE FROM docs WHERE ".$this->_rawid."=".(int) $id;
$res = $this->_conn->exec($sql);
return $res;
} /* }}} */
@ -150,10 +177,13 @@ class SeedDMS_SQLiteFTS_Indexer {
$res = $this->_conn->query($sql);
$row = $res->fetch();
$sql = "SELECT docid, documentid FROM docs";
$sql = "SELECT ".$this->_rawid.", documentid FROM docs";
if($query)
$sql .= " WHERE docs MATCH ".$this->_conn->quote($query);
$res = $this->_conn->query($sql);
if($this->_ftstype == 'fts5')
//$sql .= " ORDER BY rank";
// boost documentid, title and comment
$sql .= " ORDER BY bm25(docs, 10.0, 10.0, 10.0)";
if(!empty($limit['limit']))
$sql .= " LIMIT ".(int) $limit['limit'];
if(!empty($limit['offset']))
@ -163,7 +193,7 @@ class SeedDMS_SQLiteFTS_Indexer {
if($res) {
foreach($res as $rec) {
$hit = new SeedDMS_SQLiteFTS_QueryHit($this);
$hit->id = $rec['docid'];
$hit->id = $rec[$this->_rawid];
$hit->documentid = $rec['documentid'];
$hits[] = $hit;
}
@ -181,13 +211,13 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn)
return false;
$sql = "SELECT docid FROM docs WHERE docid=".(int) $id;
$sql = "SELECT ".$this->_rawid." FROM docs WHERE ".$this->_rawid."=".(int) $id;
$res = $this->_conn->query($sql);
$hits = array();
if($res) {
while($rec = $res->fetch(PDO::FETCH_ASSOC)) {
$hit = new SeedDMS_SQLiteFTS_QueryHit($this);
$hit->id = $rec['docid'];
$hit->id = $rec[$this->_rawid];
$hits[] = $hit;
}
}
@ -204,13 +234,13 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn)
return false;
$sql = "SELECT docid, documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE docid=".$id;
$sql = "SELECT ".$this->_rawid.", documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE documentid='D".$id."'";
$res = $this->_conn->query($sql);
$doc = false;
if($res) {
$rec = $res->fetch(PDO::FETCH_ASSOC);
$doc = new SeedDMS_SQLiteFTS_Document();
$doc->addField('docid', $rec['docid']);
$doc->addField('docid', $rec[$this->_rawid]);
$doc->addField('document_id', $rec['documentid']);
$doc->addField('title', $rec['title']);
$doc->addField('comment', $rec['comment']);
@ -237,13 +267,13 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn)
return false;
$sql = "SELECT docid, documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE documentid='F".$id."'";
$sql = "SELECT ".$this->_rawid.", documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE documentid='F".$id."'";
$res = $this->_conn->query($sql);
$doc = false;
if($res) {
$rec = $res->fetch(PDO::FETCH_ASSOC);
$doc = new SeedDMS_SQLiteFTS_Document();
$doc->addField('docid', $rec['docid']);
$doc->addField('docid', $rec[$this->_rawid]);
$doc->addField('document_id', $rec['documentid']);
$doc->addField('title', $rec['title']);
$doc->addField('comment', $rec['comment']);
@ -264,7 +294,10 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn)
return false;
$sql = "SELECT term, col, occurrences FROM docs_terms WHERE col!='*' ORDER BY col";
if($this->_ftstype == 'fts5')
$sql = "SELECT term, col, doc as occurrences FROM docs_terms WHERE col!='*' ORDER BY col";
else
$sql = "SELECT term, col, occurrences FROM docs_terms WHERE col!='*' ORDER BY col";
$res = $this->_conn->query($sql);
$terms = array();
if($res) {

View File

@ -62,7 +62,11 @@ class SeedDMS_SQLiteFTS_Term {
11 => 'status',
12 => 'path'
);
$this->field = $fields[$col];
/* fts5 pass the column name in $col, fts4 uses an integer */
if(is_int($col))
$this->field = $fields[$col];
else
$this->field = $col; //$fields[$col];
$this->_occurrence = $occurrence;
} /* }}} */

View File

@ -11,11 +11,11 @@
<email>uwe@steinmann.cx</email>
<active>yes</active>
</lead>
<date>2020-12-12</date>
<date>2021-04-19</date>
<time>08:57:44</time>
<version>
<release>1.0.15</release>
<api>1.0.15</api>
<release>1.0.16</release>
<api>1.0.16</api>
</version>
<stability>
<release>stable</release>
@ -23,7 +23,7 @@
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
- add indexing folders
- add support for fts5 (make it the default)
</notes>
<contents>
<dir baseinstalldir="SeedDMS" name="/">
@ -310,6 +310,22 @@ add user to list of terms
- SeedDMS_Lucene_Search::search() returns array of hits, count and facets
- pass config array instead of index directory to SeedDMS_Lucene_Indexer::create()
and SeedDMS_Lucene_Indexer::open()
</notes>
</release>
<release>
<date>2020-12-12</date>
<time>08:57:44</time>
<version>
<release>1.0.15</release>
<api>1.0.15</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
- add indexing folders
</notes>
</release>
</changelog>