mirror of
https://git.code.sf.net/p/seeddms/code
synced 2025-02-06 15:14:58 +00:00
Merge branch 'seeddms-5.1.x' into seeddms-6.0.x
This commit is contained in:
commit
12ffb0174e
|
@ -51,7 +51,7 @@ class SeedDMS_Lucene_Indexer {
|
|||
* Do some initialization
|
||||
*
|
||||
*/
|
||||
static function init($stopWordsFile='') { /* {{{ */
|
||||
public function init($stopWordsFile='') { /* {{{ */
|
||||
$analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive();
|
||||
if($stopWordsFile && file_exists($stopWordsFile)) {
|
||||
$stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords();
|
||||
|
|
|
@ -31,7 +31,8 @@ class SeedDMS_Preview_PdfPreviewer extends SeedDMS_Preview_Base {
|
|||
} /* }}} */
|
||||
|
||||
/**
|
||||
* Return the physical filename of the preview image on disk
|
||||
* Return the physical filename of the preview image on disc
|
||||
* including the path
|
||||
*
|
||||
* @param object $object document content or document file
|
||||
* @return string file name of preview image
|
||||
|
@ -47,7 +48,7 @@ class SeedDMS_Preview_PdfPreviewer extends SeedDMS_Preview_Base {
|
|||
case $dms->getClassname('documentcontent'):
|
||||
$target = $dir.'p'.$object->getVersion();
|
||||
break;
|
||||
case "SeedDMS_Core_DocumentFile":
|
||||
case $dms->getClassname('documentfile'):
|
||||
$target = $dir.'f'.$object->getID();
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -29,6 +29,16 @@ class SeedDMS_Preview_Previewer extends SeedDMS_Preview_Base {
|
|||
*/
|
||||
protected $width;
|
||||
|
||||
/**
|
||||
* Create instance of image previewer
|
||||
*
|
||||
* @param string $previewDir path of base directory where all images are
|
||||
* stored. This directory will have a subdirectory derived from the object id.
|
||||
* @param integer $width default width of an image
|
||||
* @param integer $timeout timeout for shell commands to create a preview image
|
||||
* @param boolean $xsendfile if set to true the apache module xsendfile will
|
||||
* be used.
|
||||
*/
|
||||
function __construct($previewDir, $width=40, $timeout=5, $xsendfile=true) { /* {{{ */
|
||||
parent::__construct($previewDir, $timeout, $xsendfile);
|
||||
$this->converters = array(
|
||||
|
@ -46,7 +56,8 @@ class SeedDMS_Preview_Previewer extends SeedDMS_Preview_Base {
|
|||
} /* }}} */
|
||||
|
||||
/**
|
||||
* Return the physical filename of the preview image on disk
|
||||
* Return the physical filename of the preview image on disc
|
||||
* including the path
|
||||
*
|
||||
* @param object $object document content or document file
|
||||
* @param integer $width width of preview image
|
||||
|
@ -103,6 +114,7 @@ class SeedDMS_Preview_Previewer extends SeedDMS_Preview_Base {
|
|||
* @param string $mimetype MimeType of input file
|
||||
* @param integer $width width of generated preview image
|
||||
* @param string $target optional name of preview image (without extension)
|
||||
* @param boolean $new will be set to true if the preview images was created
|
||||
* @return boolean true on success, false on failure
|
||||
*/
|
||||
public function createRawPreview($infile, $dir, $mimetype, $width=0, $target='', &$new=false) { /* {{{ */
|
||||
|
@ -171,6 +183,7 @@ class SeedDMS_Preview_Previewer extends SeedDMS_Preview_Base {
|
|||
* @param object $object instance of SeedDMS_Core_DocumentContent
|
||||
* or SeedDMS_Core_DocumentFile
|
||||
* @param integer $width desired width of preview image
|
||||
* @param boolean $new will be set to true if the preview images was created
|
||||
* @return boolean true on success, false on failure
|
||||
*/
|
||||
public function createPreview($object, $width=0, &$new=false) { /* {{{ */
|
||||
|
|
|
@ -25,18 +25,48 @@
|
|||
class SeedDMS_SQLiteFTS_Indexer {
|
||||
|
||||
/**
|
||||
* @var string $ftstype
|
||||
* @var string $_ftstype
|
||||
* @access protected
|
||||
*/
|
||||
protected $_ftstype;
|
||||
|
||||
/**
|
||||
* @var object $index sqlite index
|
||||
* @var object $_conn sqlite index
|
||||
* @access protected
|
||||
*/
|
||||
protected $_conn;
|
||||
|
||||
/**
|
||||
* @var array $_stop_words array of stop words
|
||||
* @access protected
|
||||
*/
|
||||
protected $_stop_words;
|
||||
|
||||
const ftstype = 'fts5';
|
||||
|
||||
/**
|
||||
* Remove stopwords from string
|
||||
*/
|
||||
protected function strip_stopwords($str = "") { /* {{{ */
|
||||
// 1.) break string into words
|
||||
// [^-\w\'] matches characters, that are not [0-9a-zA-Z_-']
|
||||
// if input is unicode/utf-8, the u flag is needed: /pattern/u
|
||||
$words = preg_split('/[^-\w\']+/u', $str, -1, PREG_SPLIT_NO_EMPTY);
|
||||
|
||||
// 2.) if we have at least 2 words, remove stopwords
|
||||
if(count($words) > 1) {
|
||||
$stopwords = $this->_stop_words;
|
||||
$words = array_filter($words, function ($w) use (&$stopwords) {
|
||||
return ((mb_strlen($w, 'utf-8') > 2) && !isset($stopwords[mb_strtolower($w, "utf- 8")]));
|
||||
});
|
||||
}
|
||||
|
||||
// check if not too much was removed such as "the the" would return empty
|
||||
if(!empty($words))
|
||||
return implode(" ", $words);
|
||||
return $str;
|
||||
} /* }}} */
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
|
@ -48,6 +78,7 @@ class SeedDMS_SQLiteFTS_Indexer {
|
|||
$this->_rawid = 'rowid';
|
||||
else
|
||||
$this->_rawid = 'docid';
|
||||
$this->_stop_words = [];
|
||||
} /* }}} */
|
||||
|
||||
/**
|
||||
|
@ -109,7 +140,9 @@ class SeedDMS_SQLiteFTS_Indexer {
|
|||
* Do some initialization
|
||||
*
|
||||
*/
|
||||
static function init($stopWordsFile='') { /* {{{ */
|
||||
public function init($stopWordsFile='') { /* {{{ */
|
||||
if($stopWordsFile)
|
||||
$this->_stop_words = array_flip(preg_split("/[\s,]+/", file_get_contents($stopWordsFile)));
|
||||
} /* }}} */
|
||||
|
||||
/**
|
||||
|
@ -135,6 +168,9 @@ class SeedDMS_SQLiteFTS_Indexer {
|
|||
if($res === false) {
|
||||
return false;
|
||||
}
|
||||
if($this->_stop_words)
|
||||
$content = $this->strip_stopwords($content);
|
||||
|
||||
$sql = "INSERT INTO docs (documentid, record_type, title, comment, keywords, category, owner, content, mimetype, origfilename, created, indexed, users, status, path) VALUES (".$this->_conn->quote($doc->getFieldValue('document_id')).", ".$this->_conn->quote($doc->getFieldValue('record_type')).", ".$this->_conn->quote($doc->getFieldValue('title')).", ".$this->_conn->quote($comment).", ".$this->_conn->quote($keywords).", ".$this->_conn->quote($category).", ".$this->_conn->quote($doc->getFieldValue('owner')).", ".$this->_conn->quote($content).", ".$this->_conn->quote($mimetype).", ".$this->_conn->quote($origfilename).", ".(int)$created.", ".(int)$indexed.", ".$this->_conn->quote($doc->getFieldValue('users')).", ".$this->_conn->quote($status).", ".$this->_conn->quote($doc->getFieldValue('path'))/*time()*/.")";
|
||||
$res = $this->_conn->exec($sql);
|
||||
if($res === false) {
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
- add optional parameter $order to SeedDMS_SQLiteFTS_Indexer::find()
|
||||
- add optional parameters $query and $col to SeedDMS_SQLiteFTS_Indexer::terms()
|
||||
- IndexedDocument() accepts a callable for conversion to text
|
||||
- remove stop words from content
|
||||
</notes>
|
||||
<contents>
|
||||
<dir baseinstalldir="SeedDMS" name="/">
|
||||
|
|
|
@ -90,6 +90,9 @@ image/jpeg
|
|||
image/png
|
||||
convert -resize %wx '%f' 'png:%o'
|
||||
|
||||
text/plain
|
||||
convert -density 100 -resize %wx 'text:%f[0]' 'png:%o'
|
||||
|
||||
application/pdf
|
||||
gs -dBATCH -dNOPAUSE -sDEVICE=png16m -dPDFFitPage -r72x72 -sOutputFile=- -dFirstPage=1 -dLastPage=1 -q '%f' | convert -resize %wx png:- '%o'
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ require_once("inc/inc.ClassConversionServiceImageToImage.php");
|
|||
require_once("inc/inc.ClassConversionServiceImageToText.php");
|
||||
require_once("inc/inc.ClassConversionServicePdfToImage.php");
|
||||
require_once("inc/inc.ClassConversionServiceTextToText.php");
|
||||
require_once("inc/inc.ClassConversionServiceTextToImage.php");
|
||||
|
||||
/**
|
||||
* Implementation of conversion manager
|
||||
|
|
|
@ -36,9 +36,15 @@ abstract class SeedDMS_ConversionServiceBase {
|
|||
*/
|
||||
protected $logger;
|
||||
|
||||
/**
|
||||
* @var $success set to false if conversion failed
|
||||
*/
|
||||
protected $success;
|
||||
|
||||
public function __construct() {
|
||||
$this->from = null;
|
||||
$this->to = null;
|
||||
$this->success = true;
|
||||
}
|
||||
|
||||
public function setLogger($logger) {
|
||||
|
@ -53,6 +59,10 @@ abstract class SeedDMS_ConversionServiceBase {
|
|||
return [];
|
||||
} /* }}} */
|
||||
|
||||
public function wasSuccessful() { /* {{{ */
|
||||
return $this->success;
|
||||
} /* }}} */
|
||||
|
||||
/**
|
||||
* This method does the conversion
|
||||
*
|
||||
|
|
|
@ -92,6 +92,7 @@ class SeedDMS_ConversionServiceExec extends SeedDMS_ConversionServiceBase {
|
|||
} /* }}} */
|
||||
|
||||
public function __construct($from, $to, $cmd, $timeout=5) {
|
||||
parent::__construct();
|
||||
$this->from = $from;
|
||||
$this->to = $to;
|
||||
$this->cmd = $cmd;
|
||||
|
|
|
@ -29,6 +29,7 @@ class SeedDMS_ConversionServiceImageToImage extends SeedDMS_ConversionServiceBas
|
|||
public $timeout;
|
||||
|
||||
public function __construct($from, $to) { /* {{{ */
|
||||
parent::__construct();
|
||||
$this->from = $from;
|
||||
$this->to = $to;
|
||||
$this->timeout = 5;
|
||||
|
|
|
@ -29,6 +29,7 @@ class SeedDMS_ConversionServiceImageToText extends SeedDMS_ConversionServiceBase
|
|||
public $timeout;
|
||||
|
||||
public function __construct($from, $to) { /* {{{ */
|
||||
parent::__construct();
|
||||
$this->from = $from;
|
||||
$this->to = $to;
|
||||
} /* }}} */
|
||||
|
|
|
@ -29,6 +29,7 @@ class SeedDMS_ConversionServicePdfToImage extends SeedDMS_ConversionServiceBase
|
|||
public $timeout;
|
||||
|
||||
public function __construct($from, $to) {
|
||||
parent::__construct();
|
||||
$this->from = $from;
|
||||
$this->to = $to;
|
||||
$this->timeout = 5;
|
||||
|
@ -75,6 +76,7 @@ class SeedDMS_ConversionServicePdfToImage extends SeedDMS_ConversionServiceBase
|
|||
}
|
||||
}
|
||||
} catch (ImagickException $e) {
|
||||
$this->success = false;
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
|
|
144
inc/inc.ClassConversionServiceTextToImage.php
Normal file
144
inc/inc.ClassConversionServiceTextToImage.php
Normal file
|
@ -0,0 +1,144 @@
|
|||
<?php
|
||||
/**
|
||||
* Implementation of conversion service class
|
||||
*
|
||||
* @category DMS
|
||||
* @package SeedDMS
|
||||
* @license GPL 2
|
||||
* @version @version@
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2023 Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
|
||||
require_once("inc/inc.ClassConversionServiceBase.php");
|
||||
|
||||
/**
|
||||
* Implementation of conversion service from text to image
|
||||
*
|
||||
* @category DMS
|
||||
* @package SeedDMS
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2023 Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
class SeedDMS_ConversionServiceTextToImage extends SeedDMS_ConversionServiceBase {
|
||||
public function __construct($from, $to) {
|
||||
parent::__construct();
|
||||
$this->from = $from;
|
||||
$this->to = $to;
|
||||
}
|
||||
|
||||
public function getInfo() {
|
||||
return "Convert with imagick php functions";
|
||||
}
|
||||
|
||||
public function getAdditionalParams() { /* {{{ */
|
||||
return [
|
||||
['name'=>'width', 'type'=>'number', 'description'=>'Width of converted image'],
|
||||
['name'=>'page', 'type'=>'number', 'description'=>'Page of text document'],
|
||||
];
|
||||
} /* }}} */
|
||||
|
||||
private function wordWrapAnnotation($image, $draw, $text, $maxWidth) { /* {{{ */
|
||||
$words = preg_split('%\s%', trim($text), -1, PREG_SPLIT_NO_EMPTY);
|
||||
$lines = array();
|
||||
$i = 0;
|
||||
$lineHeight = 0;
|
||||
|
||||
while (count($words) > 0) {
|
||||
$metrics = $image->queryFontMetrics($draw, implode(' ', array_slice($words, 0, ++$i)));
|
||||
$lineHeight = max($metrics['textHeight'], $lineHeight);
|
||||
|
||||
// check if we have found the word that exceeds the line width
|
||||
if ($metrics['textWidth'] > $maxWidth or count($words) < $i) {
|
||||
// handle case where a single word is longer than the allowed line width (just add this as a word on its own line?)
|
||||
if ($i == 1)
|
||||
$i++;
|
||||
|
||||
$lines[] = implode(' ', array_slice($words, 0, --$i));
|
||||
$words = array_slice($words, $i);
|
||||
$i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return array($lines, $lineHeight);
|
||||
} /* }}} */
|
||||
|
||||
public function convert($infile, $target = null, $params = array()) { /* {{{ */
|
||||
$boxWidth = 596;
|
||||
$boxHeight = 842;
|
||||
$boxTop = 30;
|
||||
$boxBottom = 30;
|
||||
$boxLeft = 30;
|
||||
$boxRight = 30;
|
||||
$parSep = 10;
|
||||
$fontSize = 10;
|
||||
|
||||
$start = microtime(true);
|
||||
$imagick = new Imagick();
|
||||
/* Setting a smaller resolution will speed up the conversion
|
||||
* A resolution of 72,72 will create a 596x842 image
|
||||
* Setting it to 36,36 will create a 298x421 image which should
|
||||
* be sufficient in most cases, but keep in mind that images are
|
||||
* not scaled up. Hence, a width of 400px still results in a 298px
|
||||
* wide image
|
||||
*/
|
||||
$imagick->setResolution(72,72);
|
||||
$page = 0;
|
||||
if(!empty($params['page']) && intval($params['page']) > 0)
|
||||
$page = intval($params['page'])-1;
|
||||
try {
|
||||
if($imagick->newImage($boxWidth, $boxHeight, "white")) {
|
||||
$draw = new ImagickDraw();
|
||||
$draw->setStrokeColor("none");
|
||||
$draw->setFont("Courier");
|
||||
$draw->setFontSize($fontSize);
|
||||
$draw->setTextAlignment(Imagick::ALIGN_LEFT);
|
||||
|
||||
$content = file_get_contents($infile);
|
||||
$lines = preg_split('~\R~',$content);
|
||||
$boxY = $boxTop;
|
||||
$pagecount = 0;
|
||||
foreach($lines as $line) {
|
||||
if($line) {
|
||||
$rlines = $this->wordWrapAnnotation($imagick, $draw, $line, $boxWidth-$boxLeft-$boxRight);
|
||||
foreach($rlines[0] as $rline) {
|
||||
if($pagecount == $page && $boxY < ($boxHeight-$boxBottom)) {
|
||||
$imagick->annotateImage($draw, $boxLeft, $boxY, 0, $rline);
|
||||
}
|
||||
$boxY = $boxY + $rlines[1];
|
||||
}
|
||||
} else {
|
||||
$boxY += $parSep;
|
||||
}
|
||||
if($boxY >= ($boxHeight-$boxBottom)) {
|
||||
$pagecount++;
|
||||
$boxY = $boxTop;
|
||||
if($pagecount > $page)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!empty($params['width']))
|
||||
$imagick->scaleImage(min((int) $params['width'], $imagick->getImageWidth()), 0);
|
||||
$imagick->setImageFormat('png');
|
||||
$end = microtime(true);
|
||||
if($this->logger) {
|
||||
$this->logger->log('Conversion from '.$this->from.' to '.$this->to.' with text service took '.($end-$start).' sec.', PEAR_LOG_INFO);
|
||||
}
|
||||
if($target) {
|
||||
return $imagick->writeImage($target);
|
||||
} else {
|
||||
return $imagick->getImageBlob();
|
||||
}
|
||||
}
|
||||
} catch (ImagickException $e) {
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
} /* }}} */
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -24,6 +24,7 @@ require_once("inc/inc.ClassConversionServiceBase.php");
|
|||
*/
|
||||
class SeedDMS_ConversionServiceTextToText extends SeedDMS_ConversionServiceBase {
|
||||
public function __construct($from, $to) {
|
||||
parent::__construct();
|
||||
$this->from = $from;
|
||||
$this->to = $to;
|
||||
}
|
||||
|
|
|
@ -7,17 +7,27 @@
|
|||
* @license GPL 2
|
||||
* @version @version@
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2016 Uwe Steinmann
|
||||
* @copyright Copyright (C) 2021-2023 Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Implementation of fulltext service
|
||||
*
|
||||
* The fulltext service is wrapper around single services for a full text
|
||||
* search. Such a service can be based on Solr, SQlite, etc. It implements
|
||||
* three major methods:
|
||||
* IndexedDocument() for creating an instance of an indexed document
|
||||
* Indexer() for creating an instance of the index
|
||||
* Search() fro creating an instance of a search frontend
|
||||
*
|
||||
* Though this class can manage more than one service, it will only
|
||||
* use the first one.
|
||||
*
|
||||
* @category DMS
|
||||
* @package SeedDMS
|
||||
* @author Uwe Steinmann <uwe@steinmann.cx>
|
||||
* @copyright Copyright (C) 2016 Uwe Steinmann
|
||||
* @copyright Copyright (C) 2021-2023 Uwe Steinmann
|
||||
* @version Release: @package_version@
|
||||
*/
|
||||
class SeedDMS_FulltextService {
|
||||
|
@ -136,7 +146,7 @@ class SeedDMS_FulltextService {
|
|||
/**
|
||||
* Returns callback function to convert a document into plain text
|
||||
*
|
||||
* This variant just uses the text previewer which
|
||||
* This variant uses the text previewer which
|
||||
* caches the converted document
|
||||
*/
|
||||
public function getConversionWithPreviewCallback() { /* {{{ */
|
||||
|
@ -162,7 +172,7 @@ class SeedDMS_FulltextService {
|
|||
} /* }}} */
|
||||
|
||||
/**
|
||||
* Return an indexable document from the given document or folder
|
||||
* Return an indexable document based on the given document or folder
|
||||
*
|
||||
* @param SeedDMS_Core_Document|SeedDMS_Core_Folder $object document or folder
|
||||
* to be indexed
|
||||
|
@ -183,7 +193,7 @@ class SeedDMS_FulltextService {
|
|||
/**
|
||||
* Returns an instance of the indexer
|
||||
*
|
||||
* The indexer provides access to fulltext index. It allows to add and
|
||||
* The indexer provides access to the fulltext index. It allows to add and
|
||||
* get documents.
|
||||
*
|
||||
* @return object instance of class specified in 'Indexer'
|
||||
|
|
|
@ -34,6 +34,10 @@ if(extension_loaded('gd') || extension_loaded('imagick')) {
|
|||
$conversionmgr->addService(new SeedDMS_ConversionServiceImageToImage('image/gif', 'image/png'))->setLogger($logger);
|
||||
}
|
||||
|
||||
if(extension_loaded('imagick')) {
|
||||
$conversionmgr->addService(new SeedDMS_ConversionServiceTextToImage('text/plain', 'image/png'))->setLogger($logger);
|
||||
}
|
||||
|
||||
$conversionmgr->addService(new SeedDMS_ConversionServiceImageToText('image/jpeg', 'text/plain'))->setLogger($logger);
|
||||
$conversionmgr->addService(new SeedDMS_ConversionServiceImageToText('image/jpg', 'text/plain'))->setLogger($logger);
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ if($settings->_enableFullSearch) {
|
|||
$fulltextservice->addService($settings->_fullSearchEngine, $indexconf);
|
||||
}
|
||||
}
|
||||
/* setConverters() is deprecated */
|
||||
$fulltextservice->setConverters(isset($settings->_converters['fulltext']) ? $settings->_converters['fulltext'] : null);
|
||||
$fulltextservice->setConversionMgr($conversionmgr);
|
||||
$fulltextservice->setMaxSize($settings->_maxSizeForFullText);
|
||||
|
|
|
@ -1064,6 +1064,7 @@ switch($command) {
|
|||
}
|
||||
if($object) {
|
||||
if($index = $fulltextservice->Indexer()) {
|
||||
$index->init($settings->_stopWordsFile);
|
||||
$idoc = $fulltextservice->IndexedDocument($object, true);
|
||||
$error = $idoc->getErrorMsg();
|
||||
if(!$error) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user