fix indexing documents

This commit is contained in:
Uwe Steinmann 2023-01-10 15:44:30 +01:00
parent 6d6bbca94b
commit 328cbd9eb9

View File

@ -125,17 +125,33 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
protected $fulltextservice; protected $fulltextservice;
public function __construct($scheduler, $fulltextservice, $forceupdate) { /* {{{ */ protected $logger;
protected $dacount;
protected $facount;
protected $ducount;
protected $fucount;
public function __construct($scheduler, $fulltextservice, $forceupdate, $logger) { /* {{{ */
$this->scheduler = $scheduler; $this->scheduler = $scheduler;
$this->fulltextservice = $fulltextservice; $this->fulltextservice = $fulltextservice;
$this->logger = $logger;
$this->forceupdate = $forceupdate; $this->forceupdate = $forceupdate;
$this->numdocs = $this->fulltextservice->Indexer()->count(); $this->numdocs = $this->fulltextservice->Indexer()->count();
$this->dacount = 0;
$this->facount = 0;
$this->ducount = 0;
$this->fucount = 0;
} /* }}} */ } /* }}} */
public function process($folder, $depth=0) { /* {{{ */ public function process($folder, $depth=0) { /* {{{ */
$lucenesearch = $this->fulltextservice->Search(); $lucenesearch = $this->fulltextservice->Search();
$documents = $folder->getDocuments(); $documents = $folder->getDocuments();
echo str_repeat(' ', $depth+1).$folder->getId().":".$folder->getFolderPathPlain()." "; $logger = $this->logger;
// echo str_repeat(' ', $depth+1).$folder->getId().":".$folder->getFolderPathPlain()." ";
if(($this->numdocs == 0) || !($hit = $lucenesearch->getFolder($folder->getId()))) { if(($this->numdocs == 0) || !($hit = $lucenesearch->getFolder($folder->getId()))) {
try { try {
$idoc = $this->fulltextservice->IndexedDocument($folder, true); $idoc = $this->fulltextservice->IndexedDocument($folder, true);
@ -149,12 +165,16 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
} }
} }
$this->fulltextservice->Indexer()->addDocument($idoc); $this->fulltextservice->Indexer()->addDocument($idoc);
echo "(".getMLText('index_folder_added').")".PHP_EOL; // echo "(".getMLText('index_folder_added').")".PHP_EOL;
$logger->log('Task \'indexingdocs\': folder '.$folder->getId().' added', PEAR_LOG_INFO);
$this->facount++;
} else { } else {
echo "(".$error.")".PHP_EOL; // echo "(".$error.")".PHP_EOL;
$logger->log('Task \'indexingdocs\': adding folder '.$folder->getId().' failed', PEAR_LOG_ERR);
} }
} catch(Exception $e) { } catch(Exception $e) {
echo "(Timeout)".PHP_EOL; // echo "(Timeout)".PHP_EOL;
$logger->log('Task \'indexingdocs\': adding folder '.$folder->getId().' failed', PEAR_LOG_ERR);
} }
} else { } else {
/* Check if the attribute indexed is set or has a value older /* Check if the attribute indexed is set or has a value older
@ -168,7 +188,7 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
$indexed = 0; $indexed = 0;
} }
if($indexed >= $folder->getDate() && !$this->forceupdate) { if($indexed >= $folder->getDate() && !$this->forceupdate) {
echo "(".getMLText('index_folder_unchanged').")".PHP_EOL; // echo "(".getMLText('index_folder_unchanged').")".PHP_EOL;
} else { } else {
$this->fulltextservice->Indexer()->delete($hit->id); $this->fulltextservice->Indexer()->delete($hit->id);
try { try {
@ -183,18 +203,22 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
} }
} }
$this->fulltextservice->Indexer()->addDocument($idoc); $this->fulltextservice->Indexer()->addDocument($idoc);
echo "(".getMLText('index_folder_updated').")".PHP_EOL; // echo "(".getMLText('index_folder_updated').")".PHP_EOL;
$logger->log('Task \'indexingdocs\': folder '.$folder->getId().' updated', PEAR_LOG_INFO);
$this->fucount++;
} else { } else {
echo "(".$error.")".PHP_EOL; // echo "(".$error.")".PHP_EOL;
$logger->log('Task \'indexingdocs\': updating folder '.$folder->getId().' failed', PEAR_LOG_ERR);
} }
} catch(Exception $e) { } catch(Exception $e) {
echo "(Timeout)".PHP_EOL; // echo "(Timeout)".PHP_EOL;
$logger->log('Task \'indexingdocs\': updating folder '.$folder->getId().' failed. '.$e->getMessage(), PEAR_LOG_ERR);
} }
} }
} }
if($documents) { if($documents) {
foreach($documents as $document) { foreach($documents as $document) {
echo str_repeat(' ', $depth+2).$document->getId().":".$document->getName()." "; // echo str_repeat(' ', $depth+2).$document->getId().":".$document->getName()." ";
/* If the document wasn't indexed before then just add it */ /* If the document wasn't indexed before then just add it */
if(($this->numdocs == 0) || !($hit = $lucenesearch->getDocument($document->getId()))) { if(($this->numdocs == 0) || !($hit = $lucenesearch->getDocument($document->getId()))) {
try { try {
@ -206,10 +230,16 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
} }
} }
} }
$this->fulltextservice->Indexer()->addDocument($idoc); if($this->fulltextservice->Indexer()->addDocument($idoc)) {
echo "(".getMLText('index_document_added').")".PHP_EOL; // echo "(".getMLText('index_document_added').")".PHP_EOL;
$logger->log('Task \'indexingdocs\': document '.$document->getId().' added', PEAR_LOG_INFO);
} else {
$logger->log('Task \'indexingdocs\': adding document '.$document->getId().' failed', PEAR_LOG_ERR);
}
$this->dacount++;
} catch(Exception $e) { } catch(Exception $e) {
echo "(Timeout)".PHP_EOL; // echo "(Timeout)".PHP_EOL;
$logger->log('Task \'indexingdocs\': adding document '.$document->getId().' failed. '.$e->getMessage(), PEAR_LOG_ERR);
} }
} else { } else {
/* Check if the attribute indexed is set or has a value older /* Check if the attribute indexed is set or has a value older
@ -223,29 +253,44 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
$indexed = 0; $indexed = 0;
} }
$content = $document->getLatestContent(); $content = $document->getLatestContent();
if($indexed >= $content->getDate() && !$this->forceupdate) { if($content) {
echo "(".getMLText('index_document_unchanged').")".PHP_EOL; if($indexed >= $content->getDate() && !$this->forceupdate) {
} else { // echo "(".getMLText('index_document_unchanged').")".PHP_EOL;
$this->fulltextservice->Indexer()->delete($hit->id); } else {
try { $this->fulltextservice->Indexer()->delete($hit->id);
$idoc = $this->fulltextservice->IndexedDocument($document, true); try {
if(isset($GLOBALS['SEEDDMS_HOOKS']['indexDocument'])) { $idoc = $this->fulltextservice->IndexedDocument($document, true);
foreach($GLOBALS['SEEDDMS_HOOKS']['indexDocument'] as $hookObj) { if(isset($GLOBALS['SEEDDMS_HOOKS']['indexDocument'])) {
if (method_exists($hookObj, 'preIndexDocument')) { foreach($GLOBALS['SEEDDMS_HOOKS']['indexDocument'] as $hookObj) {
$hookObj->preIndexDocument(null, $document, $idoc); if (method_exists($hookObj, 'preIndexDocument')) {
$hookObj->preIndexDocument(null, $document, $idoc);
}
} }
} }
if($this->fulltextservice->Indexer()->addDocument($idoc)) {
// echo "(".getMLText('index_document_updated').")".PHP_EOL;
$logger->log('Task \'indexingdocs\': document '.$document->getId().' updated', PEAR_LOG_INFO);
} else {
$logger->log('Task \'indexingdocs\': updating document '.$document->getId().' failed', PEAR_LOG_ERR);
}
$this->ducount++;
} catch(Exception $e) {
// echo "(Timeout)".PHP_EOL;
$logger->log('Task \'indexingdocs\': updating document '.$document->getId().' failed', PEAR_LOG_ERR);
} }
$this->fulltextservice->Indexer()->addDocument($idoc);
echo "(".getMLText('index_document_updated').")".PHP_EOL;
} catch(Exception $e) {
echo "(Timeout)".PHP_EOL;
} }
} else {
// echo "(Missing content)".PHP_EOL;
$logger->log('Task \'indexingdocs\': document '.$document->getId().' misses content', PEAR_LOG_ERR);
} }
} }
} }
} }
} /* }}} */ } /* }}} */
public function statistics() {
return array('folder'=>array('add'=>$this->facount, 'update'=>$this->fucount), 'document'=>array('add'=>$this->dacount, 'update'=>$this->ducount));
}
} /* }}} */ } /* }}} */
/** /**
@ -287,9 +332,11 @@ class SeedDMS_IndexingDocumentsTask extends SeedDMS_SchedulerTaskBase { /* {{{ *
} }
} }
$folderprocess = new SeedDMS_Task_Indexer_Process_Folder($this, $fulltextservice, $recreate); $folderprocess = new SeedDMS_Task_Indexer_Process_Folder($this, $fulltextservice, $recreate, $logger);
call_user_func(array($folderprocess, 'process'), $folder, -1); call_user_func(array($folderprocess, 'process'), $folder, -1);
$tree = new SeedDMS_FolderTree($folder, array($folderprocess, 'process')); $tree = new SeedDMS_FolderTree($folder, array($folderprocess, 'process'));
$stat = $folderprocess->statistics();
$logger->log('Task \'indexingdocs\': '.$stat['folder']['add'].' folders added, '.$stat['folder']['update'].' folders updated, '.$stat['document']['add'].' documents added, '.$stat['document']['update'].' documents updated', PEAR_LOG_INFO);
} else { } else {
$logger->log('Task \'indexingdocs\': fulltext search is turned off', PEAR_LOG_WARNING); $logger->log('Task \'indexingdocs\': fulltext search is turned off', PEAR_LOG_WARNING);
} }