better error reporting when indexing fails

This commit is contained in:
Uwe Steinmann 2018-04-11 15:31:35 +02:00
parent 162353b5e0
commit 9fb899a61a
6 changed files with 89 additions and 10 deletions

View File

@ -29,6 +29,16 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
*/
protected $errormsg;
/**
* @var string
*/
protected $mimetype;
/**
* @var string
*/
protected $cmd;
/**
* @param $cmd
* @param int $timeout
@ -88,6 +98,8 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
*/
public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { /* {{{ */
$this->errormsg = '';
$this->cmd = '';
$this->mimetype = '';
$_convcmd = array(
'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
'application/postscript' => 'ps2pdf14 %s - | pdftotext -enc UTF-8 -nopgbrk - - | sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
@ -148,6 +160,7 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
$path = $dms->contentDir . $version->getPath();
$content = '';
$mimetype = $version->getMimeType();
$this->mimetype = $mimetype;
$cmd = '';
$mimeparts = explode('/', $mimetype, 2);
if(isset($_convcmd[$mimetype])) {
@ -158,6 +171,7 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
$cmd = sprintf($_convcmd[$mimetype], $path);
}
if($cmd) {
$this->cmd = $cmd;
try {
$content = self::execWithTimeout($cmd, $timeout);
if($content['stdout']) {
@ -175,5 +189,13 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
public function getErrorMsg() { /* {{{ */
return $this->errormsg;
} /* }}} */
public function getMimeType() { /* {{{ */
return $this->mimetype;
} /* }}} */
public function getCmd() { /* {{{ */
return $this->cmd;
} /* }}} */
}
?>

View File

@ -11,11 +11,11 @@
<email>uwe@steinmann.cx</email>
<active>yes</active>
</lead>
<date>2018-01-30</date>
<date>2018-04-11</date>
<time>10:58:13</time>
<version>
<release>1.1.12</release>
<api>1.1.12</api>
<release>1.1.13</release>
<api>1.1.13</api>
</version>
<stability>
<release>stable</release>
@ -23,7 +23,7 @@
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
execWithTimeout() reads data from stderr and saves it into error msg
IndexedDocument() remembers cmd and mimetype
</notes>
<contents>
<dir baseinstalldir="SeedDMS" name="/">
@ -283,5 +283,21 @@ catch exception in execWithTimeout()
allow conversion commands for mimetypes with wildcards
</notes>
</release>
<release>
<date>2018-01-30</date>
<time>10:58:13</time>
<version>
<release>1.1.12</release>
<api>1.1.12</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
execWithTimeout() reads data from stderr and saves it into error msg
</notes>
</release>
</changelog>
</package>

View File

@ -29,8 +29,21 @@ require_once('Document.php');
*/
class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
/**
* @var string
*/
protected $errormsg;
/**
* @var string
*/
protected $mimetype;
/**
* @var string
*/
protected $cmd;
static function execWithTimeout($cmd, $timeout=2) { /* {{{ */
$descriptorspec = array(
0 => array("pipe", "r"),
@ -79,6 +92,8 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
*/
public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { /* {{{ */
$this->errormsg = '';
$this->cmd = '';
$this->mimetype = '';
$_convcmd = array(
'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
'application/postscript' => 'ps2pdf14 %s - | pdftotext -enc UTF-8 -nopgbrk - - | sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
@ -139,6 +154,7 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
$path = $dms->contentDir . $version->getPath();
$content = '';
$mimetype = $version->getMimeType();
$this->mimetype = $mimetype;
$cmd = '';
$mimeparts = explode('/', $mimetype, 2);
if(isset($_convcmd[$mimetype])) {
@ -149,6 +165,7 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
$cmd = sprintf($_convcmd[$mimetype], $path);
}
if($cmd) {
$this->cmd = $cmd;
try {
$content = self::execWithTimeout($cmd, $timeout);
if($content['stdout']) {
@ -166,5 +183,13 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
public function getErrorMsg() { /* {{{ */
return $this->errormsg;
} /* }}} */
public function getMimeType() { /* {{{ */
return $this->mimetype;
} /* }}} */
public function getCmd() { /* {{{ */
return $this->cmd;
} /* }}} */
}
?>

View File

@ -11,11 +11,11 @@
<email>uwe@steinmann.cx</email>
<active>yes</active>
</lead>
<date>2018-01-30</date>
<date>2018-04-11</date>
<time>11:00:40</time>
<version>
<release>1.0.9</release>
<api>1.0.9</api>
<release>1.0.10</release>
<api>1.0.10</api>
</version>
<stability>
<release>stable</release>
@ -23,7 +23,7 @@
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
execWithTimeout() reads data from stderr and saves it into error msg
IndexedDocument() remembers cmd and mimetype
</notes>
<contents>
<dir baseinstalldir="SeedDMS" name="/">
@ -210,5 +210,21 @@ catch exception in execWithTimeout()
allow conversion commands for mimetypes with wildcards
</notes>
</release>
<release>
<date>2018-01-30</date>
<time>11:00:40</time>
<version>
<release>1.0.9</release>
<api>1.0.9</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
execWithTimeout() reads data from stderr and saves it into error msg
</notes>
</release>
</changelog>
</package>

View File

@ -805,7 +805,7 @@ switch($command) {
echo json_encode(array('success'=>true, 'message'=>getMLText('splash_document_indexed'), 'data'=>$document->getID()));
} else {
header('Content-Type: application/json');
echo json_encode(array('success'=>false, 'message'=>$error, 'data'=>$document->getID()));
echo json_encode(array('success'=>false, 'message'=>$error, 'data'=>$document->getID(), 'mimetype'=>$idoc->getMimeType(), 'cmd'=>$idoc->getCmd()));
}
} else {
header('Content-Type: application/json');

View File

@ -81,7 +81,7 @@ function check_queue() {
} else {
$('#status_'+data.data).html('<?php printMLText('index_error'); ?>');
noty({
text: data.message,
text: '<p><strong>Docid: ' + data.data + ' (' + data.mimetype + ')</strong></p>' + '<p>Cmd: ' + data.cmd + '</p>' + data.message,
type: 'error',
dismissQueue: true,
layout: 'topRight',