diff --git a/SeedDMS_Lucene/Lucene/IndexedDocument.php b/SeedDMS_Lucene/Lucene/IndexedDocument.php index 32720007a..6e890cbed 100644 --- a/SeedDMS_Lucene/Lucene/IndexedDocument.php +++ b/SeedDMS_Lucene/Lucene/IndexedDocument.php @@ -24,6 +24,8 @@ */ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document { + protected $errormsg; + static function execWithTimeout($cmd, $timeout=2) { /* {{{ */ $descriptorspec = array( 0 => array("pipe", "r"), @@ -37,17 +39,23 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document { if (!is_resource($process)) { throw new Exception("proc_open failed on: " . $cmd); } + stream_set_blocking($pipes[1], 0); + stream_set_blocking($pipes[2], 0); - $output = ''; + $output = $error = ''; $timeleft = $timeout - time(); - $read = array($pipes[1]); + $read = array($pipes[1], $pipes[2]); $write = NULL; $exeptions = NULL; do { - stream_select($read, $write, $exeptions, $timeleft, 200000); - - if (!empty($read)) { + $num_changed_streams = stream_select($read, $write, $exeptions, $timeleft, 200000); + + if ($num_changed_streams === false) { + proc_terminate($process); + throw new Exception("stream select failed on: " . $cmd); + } elseif ($num_changed_streams > 0) { $output .= fread($pipes[1], 8192); + $error .= fread($pipes[2], 8192); } $timeleft = $timeout - time(); } while (!feof($pipes[1]) && $timeleft > 0); @@ -56,7 +64,7 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document { proc_terminate($process); throw new Exception("command timeout on: " . $cmd); } else { - return $output; + return array('stdout'=>$output, 'stderr'=>$error); } } /* }}} */ @@ -64,7 +72,8 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document { * Constructor. Creates our indexable document and adds all * necessary fields to it using the passed in document */ - public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { + public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { /* {{{ */ + $this->errormsg = ''; $_convcmd = array( 'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'application/postscript' => 'ps2pdf14 %s - | pdftotext -enc UTF-8 -nopgbrk - - | sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', @@ -137,13 +146,20 @@ class SeedDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document { if($cmd) { try { $content = self::execWithTimeout($cmd, $timeout); - if($content) { - $this->addField(Zend_Search_Lucene_Field::UnStored('content', $content, 'utf-8')); + if($content['stdout']) { + $this->addField(Zend_Search_Lucene_Field::UnStored('content', $content['stdout'], 'utf-8')); + } + if($content['stderr']) { + $this->errormsg = $content['stderr']; } } catch (Exception $e) { } } - } - } + } + } /* }}} */ + + public function getErrorMsg() { /* {{{ */ + return $this->errormsg; + } /* }}} */ } ?> diff --git a/SeedDMS_SQLiteFTS/SQLiteFTS/IndexedDocument.php b/SeedDMS_SQLiteFTS/SQLiteFTS/IndexedDocument.php index 5a8e1cbfc..cf40b4a95 100644 --- a/SeedDMS_SQLiteFTS/SQLiteFTS/IndexedDocument.php +++ b/SeedDMS_SQLiteFTS/SQLiteFTS/IndexedDocument.php @@ -29,6 +29,8 @@ require_once('Document.php'); */ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { + protected $errormsg; + static function execWithTimeout($cmd, $timeout=2) { /* {{{ */ $descriptorspec = array( 0 => array("pipe", "r"), @@ -36,23 +38,29 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { 2 => array("pipe", "w") ); $pipes = array(); - + $timeout += time(); $process = proc_open($cmd, $descriptorspec, $pipes); if (!is_resource($process)) { throw new Exception("proc_open failed on: " . $cmd); } + stream_set_blocking($pipes[1], 0); + stream_set_blocking($pipes[2], 0); - $output = ''; + $output = $error = ''; $timeleft = $timeout - time(); - $read = array($pipes[1]); + $read = array($pipes[1], $pipes[2]); $write = NULL; $exeptions = NULL; do { - stream_select($read, $write, $exeptions, $timeleft, 200000); - - if (!empty($read)) { + $num_changed_streams = stream_select($read, $write, $exeptions, $timeleft, 200000); + + if ($num_changed_streams === false) { + proc_terminate($process); + throw new Exception("stream select failed on: " . $cmd); + } elseif ($num_changed_streams > 0) { $output .= fread($pipes[1], 8192); + $error .= fread($pipes[2], 8192); } $timeleft = $timeout - time(); } while (!feof($pipes[1]) && $timeleft > 0); @@ -61,7 +69,7 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { proc_terminate($process); throw new Exception("command timeout on: " . $cmd); } else { - return $output; + return array('stdout'=>$output, 'stderr'=>$error); } } /* }}} */ @@ -69,7 +77,8 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { * Constructor. Creates our indexable document and adds all * necessary fields to it using the passed in document */ - public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { + public function __construct($dms, $document, $convcmd=null, $nocontent=false, $timeout=5) { /* {{{ */ + $this->errormsg = ''; $_convcmd = array( 'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'application/postscript' => 'ps2pdf14 %s - | pdftotext -enc UTF-8 -nopgbrk - - | sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', @@ -142,13 +151,20 @@ class SeedDMS_SQLiteFTS_IndexedDocument extends SeedDMS_SQLiteFTS_Document { if($cmd) { try { $content = self::execWithTimeout($cmd, $timeout); - if($content) { - $this->addField('content', $content, 'unstored'); + if($content['stdout']) { + $this->addField('content', $content['stdout'], 'unstored'); + } + if($content['stderr']) { + $this->errormsg = $content['stderr']; } } catch (Exception $e) { } } } - } + } /* }}} */ + + public function getErrorMsg() { /* {{{ */ + return $this->errormsg; + } /* }}} */ } ?>