mirror of
https://codeberg.org/SeedDMS/paperless
synced 2025-02-06 06:55:28 +00:00
improve search for similar documents (extend or reduce the list of search terms)
This commit is contained in:
parent
16a3083f33
commit
2580c56a09
|
@ -584,8 +584,9 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */
|
||||||
* to the classification.
|
* to the classification.
|
||||||
* The '.' is added as valid character in a word, because solr's
|
* The '.' is added as valid character in a word, because solr's
|
||||||
* standard tokenizer treats it as a valid char as well.
|
* standard tokenizer treats it as a valid char as well.
|
||||||
|
* But sqlitefts treats '.' as a separator
|
||||||
*/
|
*/
|
||||||
$wordcount = self::mb_word_count($shortcontent, MB_CASE_LOWER, '.');
|
$wordcount = self::mb_word_count($shortcontent, MB_CASE_LOWER, '');
|
||||||
arsort($wordcount);
|
arsort($wordcount);
|
||||||
$newquery = [];
|
$newquery = [];
|
||||||
foreach($wordcount as $word=>$n) {
|
foreach($wordcount as $word=>$n) {
|
||||||
|
@ -594,21 +595,42 @@ class SeedDMS_ExtPaperless_RestAPI_Controller { /* {{{ */
|
||||||
}
|
}
|
||||||
// echo implode(' ', $newquery);
|
// echo implode(' ', $newquery);
|
||||||
$logger->log("Query for '".implode(' ', $newquery)."'", PEAR_LOG_DEBUG);
|
$logger->log("Query for '".implode(' ', $newquery)."'", PEAR_LOG_DEBUG);
|
||||||
$searchresult = $lucenesearch->search(implode(' ', $newquery), array('record_type'=>['document'], 'status'=>[2], 'user'=>[$userobj->getLogin()], 'startFolder'=>$startfolder, 'rootFolder'=>$rootfolder), array('limit'=>$limit, 'offset'=>$offset), $order);
|
/* $newquery is empty if the document doesn't have a fulltext.
|
||||||
if($searchresult) {
|
* In that case it makes no sense to search for similar documents
|
||||||
$recs = array();
|
* Otherwise search for documents with newquery, but if doesn't yield
|
||||||
if($searchresult['hits']) {
|
* a result, short the newquery by the last term and try again until
|
||||||
$allids = '';
|
* newquery is void
|
||||||
foreach($searchresult['hits'] as $hit) {
|
*/
|
||||||
if($hit['document_id'][0] == 'D') {
|
while($newquery) {
|
||||||
if($tmp = $dms->getDocument((int) substr($hit['document_id'], 1))) {
|
$searchresult = $lucenesearch->search(implode(' ', $newquery), array('record_type'=>['document'], 'status'=>[2], 'user'=>[$userobj->getLogin()], 'startFolder'=>$startfolder, 'rootFolder'=>$rootfolder), array('limit'=>$limit, 'offset'=>$offset), $order);
|
||||||
$allids .= $hit['document_id'].' ';
|
if($searchresult) {
|
||||||
$recs[] = $this->__getDocumentData($tmp, $truncate_content);
|
$recs = array();
|
||||||
|
if($searchresult['hits']) {
|
||||||
|
$allids = '';
|
||||||
|
foreach($searchresult['hits'] as $hit) {
|
||||||
|
if(($hit['document_id'][0] == 'D') && ($hit['document_id'] != 'D'.((int)$params['more_like_id']))) {
|
||||||
|
if($tmp = $dms->getDocument((int) substr($hit['document_id'], 1))) {
|
||||||
|
$allids .= $hit['document_id'].' ';
|
||||||
|
$recs[] = $this->__getDocumentData($tmp, $truncate_content);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$searchresult['count']--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
$logger->log('Result is '.$allids, PEAR_LOG_DEBUG);
|
||||||
|
if($recs)
|
||||||
|
return $response->withJson(array('count'=>$searchresult['count'], 'next'=>null, 'previous'=>null, 'offset'=>$offset, 'limit'=>$limit, 'results'=>$recs), 200);
|
||||||
|
else {
|
||||||
|
/* Still nothing found, so try a shorter query */
|
||||||
|
array_pop($newquery);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Still nothing found, so try a shorter query */
|
||||||
|
array_pop($newquery);
|
||||||
}
|
}
|
||||||
$logger->log('Result is '.$allids, PEAR_LOG_DEBUG);
|
} else {
|
||||||
return $response->withJson(array('count'=>$searchresult['count'], 'next'=>null, 'previous'=>null, 'offset'=>$offset, 'limit'=>$limit, 'results'=>$recs), 200);
|
/* Quit the while loop right away, if the search failed */
|
||||||
|
$newquery = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user