From 8b642f78b186e32f75085a7aa312a35058210c53 Mon Sep 17 00:00:00 2001 From: Uwe Steinmann Date: Tue, 16 Jul 2024 08:41:57 +0200 Subject: [PATCH] check if fulltext search is turned on --- class.paperless.php | 136 +++++++++++++++++++++----------------------- lang.php | 4 ++ 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/class.paperless.php b/class.paperless.php index b0e11c7..85f745e 100644 --- a/class.paperless.php +++ b/class.paperless.php @@ -46,9 +46,12 @@ class SeedDMS_ExtPaperless_Settings { /* {{{ */ $settings = $view->getParam('settings'); if($extname != 'paperless') return; - if(empty($settings->_extensions['paperless']['jwtsecret'])) { + if(empty($settings->_extensions['paperless']['jwtsecret']) || empty($settings->_enableFullSearch)) { echo $view->contentSubHeading(getMLText($extname)); - echo $view->warningMsg(getMLText('paperless_jwtsecret_not_set')); + if(empty($settings->_extensions['paperless']['jwtsecret'])) + echo $view->warningMsg(getMLText('paperless_jwtsecret_not_set')); + if(empty($settings->_enableXsendfile)) + echo $view->warningMsg(getMLText('paperless_needs_fulltextsearch')); } } } /* }}} */ @@ -107,8 +110,7 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */ if(file_exists($file)) $content = file_get_contents($file); } else { - $index = $fulltextservice->Indexer(); - if($index) { + if($fulltextservice && $index = $fulltextservice->Indexer()) { $lucenesearch = $fulltextservice->Search(); if($searchhit = $lucenesearch->getDocument($document->getID())) { $idoc = $searchhit->getDocument(); @@ -264,8 +266,7 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */ } elseif(!isset($settings->_extensions['paperless']['rootfolder']) || !($startfolder = $dms->getFolder($settings->_extensions['paperless']['rootfolder']))) $startfolder = $dms->getFolder($settings->_rootFolderID); - $index = $fulltextservice->Indexer(); - if($index) { + if($fulltextservice && $index = $fulltextservice->Indexer()) { $lucenesearch = $fulltextservice->Search(); $searchresult = $lucenesearch->search('', array('record_type'=>['document'], 'status'=>[2], 'user'=>[$userobj->getLogin()], 'startFolder'=>$startfolder, 'rootFolder'=>$startfolder), array('limit'=>20), array()); if($searchresult === false) { @@ -658,70 +659,70 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */ /* more_like_id is set to find similar documents {{{ */ if(isset($params['more_like_id'])) { - $index = $fulltextservice->Indexer(); - $lucenesearch = $fulltextservice->Search(); - if($searchhit = $lucenesearch->getDocument((int) $params['more_like_id'])) { - $idoc = $searchhit->getDocument(); - if($idoc) { - try { - $fullcontent = $idoc->getFieldValue('content'); - } catch (Exception $e) { - $fullcontent = ''; - } - $wcl = 2000; - $shortcontent = mb_strimwidth($fullcontent, 0, $wcl); + if($fulltextservice && $lucenesearch = $fulltextservice->Search()) { + if($searchhit = $lucenesearch->getDocument((int) $params['more_like_id'])) { + $idoc = $searchhit->getDocument(); + if($idoc) { + try { + $fullcontent = $idoc->getFieldValue('content'); + } catch (Exception $e) { + $fullcontent = ''; + } + $wcl = 2000; + $shortcontent = mb_strimwidth($fullcontent, 0, $wcl); - /* Create a list of words and its occurences to be passed - * to the classification. - * The '.' is added as valid character in a word, because solr's - * standard tokenizer treats it as a valid char as well. - * But sqlitefts treats '.' as a separator - */ - $wordcount = self::mb_word_count($shortcontent, MB_CASE_LOWER, ''); - arsort($wordcount); - $newquery = []; - foreach($wordcount as $word=>$n) { - if(mb_strlen($word) > 4 && ($n > 2 || count($newquery) < 5)) - $newquery[] = $word; - } -// echo implode(' ', $newquery); - $logger->log("Query for '".implode(' ', $newquery)."'", PEAR_LOG_DEBUG); - /* $newquery is empty if the document doesn't have a fulltext. - * In that case it makes no sense to search for similar documents - * Otherwise search for documents with newquery, but if doesn't yield - * a result, short the newquery by the last term and try again until - * newquery is void - */ - while($newquery) { - $searchresult = $lucenesearch->search(implode(' ', $newquery), array('record_type'=>['document'], 'status'=>[2], 'user'=>[$userobj->getLogin()], 'startFolder'=>$startfolder, 'rootFolder'=>$rootfolder), array('limit'=>$limit, 'offset'=>$offset), $order); - if($searchresult) { - $recs = array(); - if($searchresult['hits']) { - $allids = ''; - foreach($searchresult['hits'] as $hit) { - if(($hit['document_id'][0] == 'D') && ($hit['document_id'] != 'D'.((int)$params['more_like_id']))) { - if($tmp = $dms->getDocument((int) substr($hit['document_id'], 1))) { - $allids .= $hit['document_id'].' '; - $recs[] = $this->__getDocumentData($tmp, $truncate_content); + /* Create a list of words and its occurences to be passed + * to the classification. + * The '.' is added as valid character in a word, because solr's + * standard tokenizer treats it as a valid char as well. + * But sqlitefts treats '.' as a separator + */ + $wordcount = self::mb_word_count($shortcontent, MB_CASE_LOWER, ''); + arsort($wordcount); + $newquery = []; + foreach($wordcount as $word=>$n) { + if(mb_strlen($word) > 4 && ($n > 2 || count($newquery) < 5)) + $newquery[] = $word; + } + // echo implode(' ', $newquery); + $logger->log("Query for '".implode(' ', $newquery)."'", PEAR_LOG_DEBUG); + /* $newquery is empty if the document doesn't have a fulltext. + * In that case it makes no sense to search for similar documents + * Otherwise search for documents with newquery, but if doesn't yield + * a result, short the newquery by the last term and try again until + * newquery is void + */ + while($newquery) { + $searchresult = $lucenesearch->search(implode(' ', $newquery), array('record_type'=>['document'], 'status'=>[2], 'user'=>[$userobj->getLogin()], 'startFolder'=>$startfolder, 'rootFolder'=>$rootfolder), array('limit'=>$limit, 'offset'=>$offset), $order); + if($searchresult) { + $recs = array(); + if($searchresult['hits']) { + $allids = ''; + foreach($searchresult['hits'] as $hit) { + if(($hit['document_id'][0] == 'D') && ($hit['document_id'] != 'D'.((int)$params['more_like_id']))) { + if($tmp = $dms->getDocument((int) substr($hit['document_id'], 1))) { + $allids .= $hit['document_id'].' '; + $recs[] = $this->__getDocumentData($tmp, $truncate_content); + } + } else { + $searchresult['count']--; } - } else { - $searchresult['count']--; } - } - $logger->log('Result is '.$allids, PEAR_LOG_DEBUG); - if($recs) - return $response->withJson(array('count'=>$searchresult['count'], 'next'=>null, 'previous'=>null, 'offset'=>$offset, 'limit'=>$limit, 'results'=>$recs), 200); - else { + $logger->log('Result is '.$allids, PEAR_LOG_DEBUG); + if($recs) + return $response->withJson(array('count'=>$searchresult['count'], 'next'=>null, 'previous'=>null, 'offset'=>$offset, 'limit'=>$limit, 'results'=>$recs), 200); + else { + /* Still nothing found, so try a shorter query */ + array_pop($newquery); + } + } else { /* Still nothing found, so try a shorter query */ array_pop($newquery); } } else { - /* Still nothing found, so try a shorter query */ - array_pop($newquery); + /* Quit the while loop right away, if the search failed */ + $newquery = false; } - } else { - /* Quit the while loop right away, if the search failed */ - $newquery = false; } } } @@ -799,8 +800,7 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */ $aend = (int) makeTsFromDate($params['created__date__lt']); } - $index = $fulltextservice->Indexer(); - if($index) { + if($fulltextservice && $lucenesearch = $fulltextservice->Search()) { $logger->log('Query is '.$query, PEAR_LOG_DEBUG); /* $logger->log('User is '.$userobj->getLogin(), PEAR_LOG_DEBUG); @@ -813,7 +813,6 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */ $logger->log('limit is '.$limit, PEAR_LOG_DEBUG); $logger->log('offset is '.$offset, PEAR_LOG_DEBUG); */ - $lucenesearch = $fulltextservice->Search(); $searchresult = $lucenesearch->search($query, array('record_type'=>['document'], 'status'=>[2], 'user'=>[$userobj->getLogin()], 'category'=>$categorynames, 'created_start'=>$astart, 'created_end'=>$aend, 'modified_start'=>$mstart, 'modified_end'=>$mend, 'startFolder'=>$startfolder, 'rootFolder'=>$rootfolder, 'attributes'=>$cattrs), array('limit'=>$limit, 'offset'=>$offset), $order, array('no_facets'=>true)); if($searchresult) { $recs = array(); @@ -899,8 +898,7 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */ $logger->log(var_export($params, true), PEAR_LOG_DEBUG); $list = []; - $index = $fulltextservice->Indexer(); - if($index) { + if($fulltextservice && ($index = $fulltextservice->Indexer())) { if($terms = $index->terms($query, $field)) { foreach($terms as $term) $list[] = implode(' ', $allterms).' '.$term->text; @@ -993,9 +991,7 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */ 'documents_inbox'=>0, ); - $index = $fulltextservice->Indexer(); - if($index) { - $lucenesearch = $fulltextservice->Search(); + if($fulltextservice && ($lucenesearch = $fulltextservice->Search())) { $searchresult = $lucenesearch->search('', array('record_type'=>['document'], 'status'=>[2], 'user'=>[$userobj->getLogin()], 'startFolder'=>$startfolder, 'rootFolder'=>$startfolder), array('limit'=>1), array(), array('no_facets'=>true)); if($searchresult === false) { return $response->withStatus(500); diff --git a/lang.php b/lang.php index 129e04a..014fa56 100644 --- a/lang.php +++ b/lang.php @@ -1,5 +1,6 @@ 'Paperless', 'paperless_upload_succeded'=>'Upload succeded', 'paperless_upload_failed'=>'Upload failed', 'paperless_missing_target_folder'=>'Missing target folder', @@ -7,8 +8,10 @@ $__lang['en_GB'] = array( 'paperless_upload_maxsize'=>'Max file size exceeded', 'paperless_token_has_expired'=>'Token has expired. Login again before you proceed.', 'paperless_jwtsecret_not_set'=>'The secret for the JSON Web Token is not set. This is required for successful login.', + 'paperless_needs_fulltextsearch'=>'The extension needs fulltext search to be on.', ); $__lang['de_DE'] = array( + 'paperless'=>'Paperless', 'paperless_upload_succeded'=>'Erfolgreich hochgeladen', 'paperless_upload_failed'=>'Hochladen fehlgeschlagen', 'paperless_missing_target_folder'=>'Zielordner nicht vorhanden', @@ -16,5 +19,6 @@ $__lang['de_DE'] = array( 'paperless_upload_maxsize'=>'Maximale Dateigröße überschritten', 'paperless_token_has_expired'=>'Token abgelaufen. Melden Sie sich bitte neu an.', 'paperless_jwtsecret_not_set'=>'Das Geheimnis des JSON Web Token ist nicht gesetzt. Dies ist für eine erfolgreiche Anmeldung erforderlich.', + 'paperless_needs_fulltextsearch'=>'Die Erweiterung erfordert eine konfigurierte Volltextsuche .', );