better error reporting when indexing fails

This commit is contained in:
Uwe Steinmann 2018-04-11 15:31:35 +02:00
parent 162353b5e0
commit 9fb899a61a
6 changed files with 89 additions and 10 deletions

View File

@ -29,6 +29,16 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
*/ */
protected $errormsg; protected $errormsg;
/**
* @var string
*/
protected $mimetype;
/**
* @var string
*/
protected $cmd;
/** /**
* @param $cmd * @param $cmd
* @param int $timeout * @param int $timeout
@ -88,6 +98,8 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
*/ */
public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { /* {{{ */ public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { /* {{{ */
$this->errormsg = ''; $this->errormsg = '';
$this->cmd = '';
$this->mimetype = '';
$_convcmd = array( $_convcmd = array(
'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
'application/postscript' => 'ps2pdf14 %s - | pdftotext -enc UTF-8 -nopgbrk - - | sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'application/postscript' => 'ps2pdf14 %s - | pdftotext -enc UTF-8 -nopgbrk - - | sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
@ -148,6 +160,7 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
$path = $dms->contentDir . $version->getPath(); $path = $dms->contentDir . $version->getPath();
$content = ''; $content = '';
$mimetype = $version->getMimeType(); $mimetype = $version->getMimeType();
$this->mimetype = $mimetype;
$cmd = ''; $cmd = '';
$mimeparts = explode('/', $mimetype, 2); $mimeparts = explode('/', $mimetype, 2);
if(isset($_convcmd[$mimetype])) { if(isset($_convcmd[$mimetype])) {
@ -158,6 +171,7 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
$cmd = sprintf($_convcmd[$mimetype], $path); $cmd = sprintf($_convcmd[$mimetype], $path);
} }
if($cmd) { if($cmd) {
$this->cmd = $cmd;
try { try {
$content = self::execWithTimeout($cmd, $timeout); $content = self::execWithTimeout($cmd, $timeout);
if($content['stdout']) { if($content['stdout']) {
@ -175,5 +189,13 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
public function getErrorMsg() { /* {{{ */ public function getErrorMsg() { /* {{{ */
return $this->errormsg; return $this->errormsg;
} /* }}} */ } /* }}} */
public function getMimeType() { /* {{{ */
return $this->mimetype;
} /* }}} */
public function getCmd() { /* {{{ */
return $this->cmd;
} /* }}} */
} }
?> ?>

View File

@ -11,11 +11,11 @@
<email>uwe@steinmann.cx</email> <email>uwe@steinmann.cx</email>
<active>yes</active> <active>yes</active>
</lead> </lead>
<date>2018-01-30</date> <date>2018-04-11</date>
<time>10:58:13</time> <time>10:58:13</time>
<version> <version>
<release>1.1.12</release> <release>1.1.13</release>
<api>1.1.12</api> <api>1.1.13</api>
</version> </version>
<stability> <stability>
<release>stable</release> <release>stable</release>
@ -23,7 +23,7 @@
</stability> </stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license> <license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes> <notes>
execWithTimeout() reads data from stderr and saves it into error msg IndexedDocument() remembers cmd and mimetype
</notes> </notes>
<contents> <contents>
<dir baseinstalldir="SeedDMS" name="/"> <dir baseinstalldir="SeedDMS" name="/">
@ -283,5 +283,21 @@ catch exception in execWithTimeout()
allow conversion commands for mimetypes with wildcards allow conversion commands for mimetypes with wildcards
</notes> </notes>
</release> </release>
<release>
<date>2018-01-30</date>
<time>10:58:13</time>
<version>
<release>1.1.12</release>
<api>1.1.12</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
execWithTimeout() reads data from stderr and saves it into error msg
</notes>
</release>
</changelog> </changelog>
</package> </package>

View File

@ -29,8 +29,21 @@ require_once('Document.php');
*/ */
class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
/**
* @var string
*/
protected $errormsg; protected $errormsg;
/**
* @var string
*/
protected $mimetype;
/**
* @var string
*/
protected $cmd;
static function execWithTimeout($cmd, $timeout=2) { /* {{{ */ static function execWithTimeout($cmd, $timeout=2) { /* {{{ */
$descriptorspec = array( $descriptorspec = array(
0 => array("pipe", "r"), 0 => array("pipe", "r"),
@ -79,6 +92,8 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
*/ */
public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { /* {{{ */ public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { /* {{{ */
$this->errormsg = ''; $this->errormsg = '';
$this->cmd = '';
$this->mimetype = '';
$_convcmd = array( $_convcmd = array(
'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
'application/postscript' => 'ps2pdf14 %s - | pdftotext -enc UTF-8 -nopgbrk - - | sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'application/postscript' => 'ps2pdf14 %s - | pdftotext -enc UTF-8 -nopgbrk - - | sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
@ -139,6 +154,7 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
$path = $dms->contentDir . $version->getPath(); $path = $dms->contentDir . $version->getPath();
$content = ''; $content = '';
$mimetype = $version->getMimeType(); $mimetype = $version->getMimeType();
$this->mimetype = $mimetype;
$cmd = ''; $cmd = '';
$mimeparts = explode('/', $mimetype, 2); $mimeparts = explode('/', $mimetype, 2);
if(isset($_convcmd[$mimetype])) { if(isset($_convcmd[$mimetype])) {
@ -149,6 +165,7 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
$cmd = sprintf($_convcmd[$mimetype], $path); $cmd = sprintf($_convcmd[$mimetype], $path);
} }
if($cmd) { if($cmd) {
$this->cmd = $cmd;
try { try {
$content = self::execWithTimeout($cmd, $timeout); $content = self::execWithTimeout($cmd, $timeout);
if($content['stdout']) { if($content['stdout']) {
@ -166,5 +183,13 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document {
public function getErrorMsg() { /* {{{ */ public function getErrorMsg() { /* {{{ */
return $this->errormsg; return $this->errormsg;
} /* }}} */ } /* }}} */
public function getMimeType() { /* {{{ */
return $this->mimetype;
} /* }}} */
public function getCmd() { /* {{{ */
return $this->cmd;
} /* }}} */
} }
?> ?>

View File

@ -11,11 +11,11 @@
<email>uwe@steinmann.cx</email> <email>uwe@steinmann.cx</email>
<active>yes</active> <active>yes</active>
</lead> </lead>
<date>2018-01-30</date> <date>2018-04-11</date>
<time>11:00:40</time> <time>11:00:40</time>
<version> <version>
<release>1.0.9</release> <release>1.0.10</release>
<api>1.0.9</api> <api>1.0.10</api>
</version> </version>
<stability> <stability>
<release>stable</release> <release>stable</release>
@ -23,7 +23,7 @@
</stability> </stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license> <license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes> <notes>
execWithTimeout() reads data from stderr and saves it into error msg IndexedDocument() remembers cmd and mimetype
</notes> </notes>
<contents> <contents>
<dir baseinstalldir="SeedDMS" name="/"> <dir baseinstalldir="SeedDMS" name="/">
@ -210,5 +210,21 @@ catch exception in execWithTimeout()
allow conversion commands for mimetypes with wildcards allow conversion commands for mimetypes with wildcards
</notes> </notes>
</release> </release>
<release>
<date>2018-01-30</date>
<time>11:00:40</time>
<version>
<release>1.0.9</release>
<api>1.0.9</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="http://opensource.org/licenses/gpl-license">GPL License</license>
<notes>
execWithTimeout() reads data from stderr and saves it into error msg
</notes>
</release>
</changelog> </changelog>
</package> </package>

View File

@ -805,7 +805,7 @@ switch($command) {
echo json_encode(array('success'=>true, 'message'=>getMLText('splash_document_indexed'), 'data'=>$document->getID())); echo json_encode(array('success'=>true, 'message'=>getMLText('splash_document_indexed'), 'data'=>$document->getID()));
} else { } else {
header('Content-Type: application/json'); header('Content-Type: application/json');
echo json_encode(array('success'=>false, 'message'=>$error, 'data'=>$document->getID())); echo json_encode(array('success'=>false, 'message'=>$error, 'data'=>$document->getID(), 'mimetype'=>$idoc->getMimeType(), 'cmd'=>$idoc->getCmd()));
} }
} else { } else {
header('Content-Type: application/json'); header('Content-Type: application/json');

View File

@ -81,7 +81,7 @@ function check_queue() {
} else { } else {
$('#status_'+data.data).html('<?php printMLText('index_error'); ?>'); $('#status_'+data.data).html('<?php printMLText('index_error'); ?>');
noty({ noty({
text: data.message, text: '<p><strong>Docid: ' + data.data + ' (' + data.mimetype + ')</strong></p>' + '<p>Cmd: ' + data.cmd + '</p>' + data.message,
type: 'error', type: 'error',
dismissQueue: true, dismissQueue: true,
layout: 'topRight', layout: 'topRight',