fix indexing documents

This commit is contained in:
Uwe Steinmann 2023-01-10 15:44:30 +01:00
parent 6d6bbca94b
commit 328cbd9eb9

View File

@ -125,17 +125,33 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
protected $fulltextservice;
public function __construct($scheduler, $fulltextservice, $forceupdate) { /* {{{ */
protected $logger;
protected $dacount;
protected $facount;
protected $ducount;
protected $fucount;
public function __construct($scheduler, $fulltextservice, $forceupdate, $logger) { /* {{{ */
$this->scheduler = $scheduler;
$this->fulltextservice = $fulltextservice;
$this->logger = $logger;
$this->forceupdate = $forceupdate;
$this->numdocs = $this->fulltextservice->Indexer()->count();
$this->dacount = 0;
$this->facount = 0;
$this->ducount = 0;
$this->fucount = 0;
} /* }}} */
public function process($folder, $depth=0) { /* {{{ */
$lucenesearch = $this->fulltextservice->Search();
$documents = $folder->getDocuments();
echo str_repeat(' ', $depth+1).$folder->getId().":".$folder->getFolderPathPlain()." ";
$logger = $this->logger;
// echo str_repeat(' ', $depth+1).$folder->getId().":".$folder->getFolderPathPlain()." ";
if(($this->numdocs == 0) || !($hit = $lucenesearch->getFolder($folder->getId()))) {
try {
$idoc = $this->fulltextservice->IndexedDocument($folder, true);
@ -149,12 +165,16 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
}
}
$this->fulltextservice->Indexer()->addDocument($idoc);
echo "(".getMLText('index_folder_added').")".PHP_EOL;
// echo "(".getMLText('index_folder_added').")".PHP_EOL;
$logger->log('Task \'indexingdocs\': folder '.$folder->getId().' added', PEAR_LOG_INFO);
$this->facount++;
} else {
echo "(".$error.")".PHP_EOL;
// echo "(".$error.")".PHP_EOL;
$logger->log('Task \'indexingdocs\': adding folder '.$folder->getId().' failed', PEAR_LOG_ERR);
}
} catch(Exception $e) {
echo "(Timeout)".PHP_EOL;
// echo "(Timeout)".PHP_EOL;
$logger->log('Task \'indexingdocs\': adding folder '.$folder->getId().' failed', PEAR_LOG_ERR);
}
} else {
/* Check if the attribute indexed is set or has a value older
@ -168,7 +188,7 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
$indexed = 0;
}
if($indexed >= $folder->getDate() && !$this->forceupdate) {
echo "(".getMLText('index_folder_unchanged').")".PHP_EOL;
// echo "(".getMLText('index_folder_unchanged').")".PHP_EOL;
} else {
$this->fulltextservice->Indexer()->delete($hit->id);
try {
@ -183,18 +203,22 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
}
}
$this->fulltextservice->Indexer()->addDocument($idoc);
echo "(".getMLText('index_folder_updated').")".PHP_EOL;
// echo "(".getMLText('index_folder_updated').")".PHP_EOL;
$logger->log('Task \'indexingdocs\': folder '.$folder->getId().' updated', PEAR_LOG_INFO);
$this->fucount++;
} else {
echo "(".$error.")".PHP_EOL;
// echo "(".$error.")".PHP_EOL;
$logger->log('Task \'indexingdocs\': updating folder '.$folder->getId().' failed', PEAR_LOG_ERR);
}
} catch(Exception $e) {
echo "(Timeout)".PHP_EOL;
// echo "(Timeout)".PHP_EOL;
$logger->log('Task \'indexingdocs\': updating folder '.$folder->getId().' failed. '.$e->getMessage(), PEAR_LOG_ERR);
}
}
}
if($documents) {
foreach($documents as $document) {
echo str_repeat(' ', $depth+2).$document->getId().":".$document->getName()." ";
// echo str_repeat(' ', $depth+2).$document->getId().":".$document->getName()." ";
/* If the document wasn't indexed before then just add it */
if(($this->numdocs == 0) || !($hit = $lucenesearch->getDocument($document->getId()))) {
try {
@ -206,10 +230,16 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
}
}
}
$this->fulltextservice->Indexer()->addDocument($idoc);
echo "(".getMLText('index_document_added').")".PHP_EOL;
if($this->fulltextservice->Indexer()->addDocument($idoc)) {
// echo "(".getMLText('index_document_added').")".PHP_EOL;
$logger->log('Task \'indexingdocs\': document '.$document->getId().' added', PEAR_LOG_INFO);
} else {
$logger->log('Task \'indexingdocs\': adding document '.$document->getId().' failed', PEAR_LOG_ERR);
}
$this->dacount++;
} catch(Exception $e) {
echo "(Timeout)".PHP_EOL;
// echo "(Timeout)".PHP_EOL;
$logger->log('Task \'indexingdocs\': adding document '.$document->getId().' failed. '.$e->getMessage(), PEAR_LOG_ERR);
}
} else {
/* Check if the attribute indexed is set or has a value older
@ -223,29 +253,44 @@ class SeedDMS_Task_Indexer_Process_Folder { /* {{{ */
$indexed = 0;
}
$content = $document->getLatestContent();
if($indexed >= $content->getDate() && !$this->forceupdate) {
echo "(".getMLText('index_document_unchanged').")".PHP_EOL;
} else {
$this->fulltextservice->Indexer()->delete($hit->id);
try {
$idoc = $this->fulltextservice->IndexedDocument($document, true);
if(isset($GLOBALS['SEEDDMS_HOOKS']['indexDocument'])) {
foreach($GLOBALS['SEEDDMS_HOOKS']['indexDocument'] as $hookObj) {
if (method_exists($hookObj, 'preIndexDocument')) {
$hookObj->preIndexDocument(null, $document, $idoc);
if($content) {
if($indexed >= $content->getDate() && !$this->forceupdate) {
// echo "(".getMLText('index_document_unchanged').")".PHP_EOL;
} else {
$this->fulltextservice->Indexer()->delete($hit->id);
try {
$idoc = $this->fulltextservice->IndexedDocument($document, true);
if(isset($GLOBALS['SEEDDMS_HOOKS']['indexDocument'])) {
foreach($GLOBALS['SEEDDMS_HOOKS']['indexDocument'] as $hookObj) {
if (method_exists($hookObj, 'preIndexDocument')) {
$hookObj->preIndexDocument(null, $document, $idoc);
}
}
}
if($this->fulltextservice->Indexer()->addDocument($idoc)) {
// echo "(".getMLText('index_document_updated').")".PHP_EOL;
$logger->log('Task \'indexingdocs\': document '.$document->getId().' updated', PEAR_LOG_INFO);
} else {
$logger->log('Task \'indexingdocs\': updating document '.$document->getId().' failed', PEAR_LOG_ERR);
}
$this->ducount++;
} catch(Exception $e) {
// echo "(Timeout)".PHP_EOL;
$logger->log('Task \'indexingdocs\': updating document '.$document->getId().' failed', PEAR_LOG_ERR);
}
$this->fulltextservice->Indexer()->addDocument($idoc);
echo "(".getMLText('index_document_updated').")".PHP_EOL;
} catch(Exception $e) {
echo "(Timeout)".PHP_EOL;
}
} else {
// echo "(Missing content)".PHP_EOL;
$logger->log('Task \'indexingdocs\': document '.$document->getId().' misses content', PEAR_LOG_ERR);
}
}
}
}
} /* }}} */
public function statistics() {
return array('folder'=>array('add'=>$this->facount, 'update'=>$this->fucount), 'document'=>array('add'=>$this->dacount, 'update'=>$this->ducount));
}
} /* }}} */
/**
@ -287,9 +332,11 @@ class SeedDMS_IndexingDocumentsTask extends SeedDMS_SchedulerTaskBase { /* {{{ *
}
}
$folderprocess = new SeedDMS_Task_Indexer_Process_Folder($this, $fulltextservice, $recreate);
$folderprocess = new SeedDMS_Task_Indexer_Process_Folder($this, $fulltextservice, $recreate, $logger);
call_user_func(array($folderprocess, 'process'), $folder, -1);
$tree = new SeedDMS_FolderTree($folder, array($folderprocess, 'process'));
$stat = $folderprocess->statistics();
$logger->log('Task \'indexingdocs\': '.$stat['folder']['add'].' folders added, '.$stat['folder']['update'].' folders updated, '.$stat['document']['add'].' documents added, '.$stat['document']['update'].' documents updated', PEAR_LOG_INFO);
} else {
$logger->log('Task \'indexingdocs\': fulltext search is turned off', PEAR_LOG_WARNING);
}