add support for fts5, start new version 1.0.16

This commit is contained in:
Uwe Steinmann 2021-04-20 20:52:17 +02:00
parent c559afd97c
commit 42c4b4eaaf
3 changed files with 81 additions and 28 deletions

View File

@ -23,18 +23,31 @@
* @version Release: @package_version@ * @version Release: @package_version@
*/ */
class SeedDMS_SQLiteFTS_Indexer { class SeedDMS_SQLiteFTS_Indexer {
/**
* @var string $ftstype
* @access protected
*/
protected $_ftstype;
/** /**
* @var object $index sqlite index * @var object $index sqlite index
* @access protected * @access protected
*/ */
protected $_conn; protected $_conn;
const ftstype = 'fts5';
/** /**
* Constructor * Constructor
* *
*/ */
function __construct($indexerDir) { /* {{{ */ function __construct($indexerDir) { /* {{{ */
$this->_conn = new PDO('sqlite:'.$indexerDir.'/index.db'); $this->_conn = new PDO('sqlite:'.$indexerDir.'/index.db');
$this->_ftstype = self::ftstype;
if($this->_ftstype == 'fts5')
$this->_rawid = 'rowid';
else
$this->_rawid = 'docid';
} /* }}} */ } /* }}} */
/** /**
@ -62,6 +75,7 @@ class SeedDMS_SQLiteFTS_Indexer {
* in SeedDMS_SQLiteFTS_Term * in SeedDMS_SQLiteFTS_Term
*/ */
$version = SQLite3::version(); $version = SQLite3::version();
if(self::ftstype == 'fts4') {
if($version['versionNumber'] >= 3008000) if($version['versionNumber'] >= 3008000)
$sql = 'CREATE VIRTUAL TABLE docs USING fts4(documentid, title, comment, keywords, category, mimetype, origfilename, owner, content, created, users, status, path, notindexed=created, matchinfo=fts3)'; $sql = 'CREATE VIRTUAL TABLE docs USING fts4(documentid, title, comment, keywords, category, mimetype, origfilename, owner, content, created, users, status, path, notindexed=created, matchinfo=fts3)';
else else
@ -75,6 +89,19 @@ class SeedDMS_SQLiteFTS_Indexer {
if($res === false) { if($res === false) {
return null; return null;
} }
} elseif(self::ftstype == 'fts5') {
$sql = 'CREATE VIRTUAL TABLE docs USING fts5(documentid, title, comment, keywords, category, mimetype, origfilename, owner, content, created unindexed, users, status, path)';
$res = $index->_conn->exec($sql);
if($res === false) {
return null;
}
$sql = 'CREATE VIRTUAL TABLE docs_terms USING fts5vocab(docs, \'col\');';
$res = $index->_conn->exec($sql);
if($res === false) {
return null;
}
} else
return null;
return($index); return($index);
} /* }}} */ } /* }}} */
@ -116,7 +143,7 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn) if(!$this->_conn)
return false; return false;
$sql = "DELETE FROM docs WHERE docid=".(int) $id; $sql = "DELETE FROM docs WHERE ".$this->_rawid."=".(int) $id;
$res = $this->_conn->exec($sql); $res = $this->_conn->exec($sql);
return $res; return $res;
} /* }}} */ } /* }}} */
@ -150,10 +177,13 @@ class SeedDMS_SQLiteFTS_Indexer {
$res = $this->_conn->query($sql); $res = $this->_conn->query($sql);
$row = $res->fetch(); $row = $res->fetch();
$sql = "SELECT docid, documentid FROM docs"; $sql = "SELECT ".$this->_rawid.", documentid FROM docs";
if($query) if($query)
$sql .= " WHERE docs MATCH ".$this->_conn->quote($query); $sql .= " WHERE docs MATCH ".$this->_conn->quote($query);
$res = $this->_conn->query($sql); if($this->_ftstype == 'fts5')
//$sql .= " ORDER BY rank";
// boost documentid, title and comment
$sql .= " ORDER BY bm25(docs, 10.0, 10.0, 10.0)";
if(!empty($limit['limit'])) if(!empty($limit['limit']))
$sql .= " LIMIT ".(int) $limit['limit']; $sql .= " LIMIT ".(int) $limit['limit'];
if(!empty($limit['offset'])) if(!empty($limit['offset']))
@ -163,7 +193,7 @@ class SeedDMS_SQLiteFTS_Indexer {
if($res) { if($res) {
foreach($res as $rec) { foreach($res as $rec) {
$hit = new SeedDMS_SQLiteFTS_QueryHit($this); $hit = new SeedDMS_SQLiteFTS_QueryHit($this);
$hit->id = $rec['docid']; $hit->id = $rec[$this->_rawid];
$hit->documentid = $rec['documentid']; $hit->documentid = $rec['documentid'];
$hits[] = $hit; $hits[] = $hit;
} }
@ -181,13 +211,13 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn) if(!$this->_conn)
return false; return false;
$sql = "SELECT docid FROM docs WHERE docid=".(int) $id; $sql = "SELECT ".$this->_rawid." FROM docs WHERE ".$this->_rawid."=".(int) $id;
$res = $this->_conn->query($sql); $res = $this->_conn->query($sql);
$hits = array(); $hits = array();
if($res) { if($res) {
while($rec = $res->fetch(PDO::FETCH_ASSOC)) { while($rec = $res->fetch(PDO::FETCH_ASSOC)) {
$hit = new SeedDMS_SQLiteFTS_QueryHit($this); $hit = new SeedDMS_SQLiteFTS_QueryHit($this);
$hit->id = $rec['docid']; $hit->id = $rec[$this->_rawid];
$hits[] = $hit; $hits[] = $hit;
} }
} }
@ -204,13 +234,13 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn) if(!$this->_conn)
return false; return false;
$sql = "SELECT docid, documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE docid=".$id; $sql = "SELECT ".$this->_rawid.", documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE documentid='D".$id."'";
$res = $this->_conn->query($sql); $res = $this->_conn->query($sql);
$doc = false; $doc = false;
if($res) { if($res) {
$rec = $res->fetch(PDO::FETCH_ASSOC); $rec = $res->fetch(PDO::FETCH_ASSOC);
$doc = new SeedDMS_SQLiteFTS_Document(); $doc = new SeedDMS_SQLiteFTS_Document();
$doc->addField('docid', $rec['docid']); $doc->addField('docid', $rec[$this->_rawid]);
$doc->addField('document_id', $rec['documentid']); $doc->addField('document_id', $rec['documentid']);
$doc->addField('title', $rec['title']); $doc->addField('title', $rec['title']);
$doc->addField('comment', $rec['comment']); $doc->addField('comment', $rec['comment']);
@ -237,13 +267,13 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn) if(!$this->_conn)
return false; return false;
$sql = "SELECT docid, documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE documentid='F".$id."'"; $sql = "SELECT ".$this->_rawid.", documentid, title, comment, owner, keywords, category, mimetype, origfilename, created, users, status, path FROM docs WHERE documentid='F".$id."'";
$res = $this->_conn->query($sql); $res = $this->_conn->query($sql);
$doc = false; $doc = false;
if($res) { if($res) {
$rec = $res->fetch(PDO::FETCH_ASSOC); $rec = $res->fetch(PDO::FETCH_ASSOC);
$doc = new SeedDMS_SQLiteFTS_Document(); $doc = new SeedDMS_SQLiteFTS_Document();
$doc->addField('docid', $rec['docid']); $doc->addField('docid', $rec[$this->_rawid]);
$doc->addField('document_id', $rec['documentid']); $doc->addField('document_id', $rec['documentid']);
$doc->addField('title', $rec['title']); $doc->addField('title', $rec['title']);
$doc->addField('comment', $rec['comment']); $doc->addField('comment', $rec['comment']);
@ -264,6 +294,9 @@ class SeedDMS_SQLiteFTS_Indexer {
if(!$this->_conn) if(!$this->_conn)
return false; return false;
if($this->_ftstype == 'fts5')
$sql = "SELECT term, col, doc as occurrences FROM docs_terms WHERE col!='*' ORDER BY col";
else
$sql = "SELECT term, col, occurrences FROM docs_terms WHERE col!='*' ORDER BY col"; $sql = "SELECT term, col, occurrences FROM docs_terms WHERE col!='*' ORDER BY col";
$res = $this->_conn->query($sql); $res = $this->_conn->query($sql);
$terms = array(); $terms = array();

View File

@ -62,7 +62,11 @@ class SeedDMS_SQLiteFTS_Term {
11 => 'status', 11 => 'status',
12 => 'path' 12 => 'path'
); );
/* fts5 pass the column name in $col, fts4 uses an integer */
if(is_int($col))
$this->field = $fields[$col]; $this->field = $fields[$col];
else
$this->field = $col; //$fields[$col];
$this->_occurrence = $occurrence; $this->_occurrence = $occurrence;
} /* }}} */ } /* }}} */

View File

@ -11,11 +11,11 @@
<email>uwe@steinmann.cx</email> <email>uwe@steinmann.cx</email>
<active>yes</active> <active>yes</active>
</lead> </lead>
<date>2020-12-12</date> <date>2021-04-19</date>
<time>08:57:44</time> <time>08:57:44</time>
<version> <version>
<release>1.0.15</release> <release>1.0.16</release>
<api>1.0.15</api> <api>1.0.16</api>
</version> </version>
<stability> <stability>
<release>stable</release> <release>stable</release>
@ -23,7 +23,7 @@
</stability> </stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license> <license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes> <notes>
- add indexing folders - add support for fts5 (make it the default)
</notes> </notes>
<contents> <contents>
<dir baseinstalldir="SeedDMS" name="/"> <dir baseinstalldir="SeedDMS" name="/">
@ -312,5 +312,21 @@ add user to list of terms
and SeedDMS_Lucene_Indexer::open() and SeedDMS_Lucene_Indexer::open()
</notes> </notes>
</release> </release>
<release>
<date>2020-12-12</date>
<time>08:57:44</time>
<version>
<release>1.0.15</release>
<api>1.0.15</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
- add indexing folders
</notes>
</release>
</changelog> </changelog>
</package> </package>