From 56cc64d1cffc156691f2724311a684bfe1f6d413 Mon Sep 17 00:00:00 2001 From: Uwe Steinmann Date: Tue, 7 May 2024 13:51:21 +0200 Subject: [PATCH] add new conversion service from text/html to text/plain --- inc/inc.ClassConversionMgr.php | 1 + inc/inc.ClassConversionServiceHtmlToText.php | 51 ++++++++++++++++++++ inc/inc.ConversionInit.php | 2 + 3 files changed, 54 insertions(+) create mode 100644 inc/inc.ClassConversionServiceHtmlToText.php diff --git a/inc/inc.ClassConversionMgr.php b/inc/inc.ClassConversionMgr.php index b06430543..28e717c80 100644 --- a/inc/inc.ClassConversionMgr.php +++ b/inc/inc.ClassConversionMgr.php @@ -16,6 +16,7 @@ require_once("inc/inc.ClassConversionServiceImageToImage.php"); require_once("inc/inc.ClassConversionServiceImageToText.php"); require_once("inc/inc.ClassConversionServicePdfToImage.php"); require_once("inc/inc.ClassConversionServiceTextToText.php"); +require_once("inc/inc.ClassConversionServiceHtmlToText.php"); require_once("inc/inc.ClassConversionServiceTextToImage.php"); /** diff --git a/inc/inc.ClassConversionServiceHtmlToText.php b/inc/inc.ClassConversionServiceHtmlToText.php new file mode 100644 index 000000000..c9342e564 --- /dev/null +++ b/inc/inc.ClassConversionServiceHtmlToText.php @@ -0,0 +1,51 @@ + + * @copyright Copyright (C) 2021 Uwe Steinmann + * @version Release: @package_version@ + */ + +require_once("inc/inc.ClassConversionServiceBase.php"); + +/** + * Implementation of conversion service class for text to text + * + * @category DMS + * @package SeedDMS + * @author Uwe Steinmann + * @copyright Copyright (C) 2021 Uwe Steinmann + * @version Release: @package_version@ + */ +class SeedDMS_ConversionServiceHtmlToText extends SeedDMS_ConversionServiceBase { + public function __construct($from, $to) { + parent::__construct(); + $this->from = $from; + $this->to = $to; + } + + public function getInfo() { + return "Strip tags from document contents"; + } + + public function convert($infile, $target = null, $params = array()) { + $d = new DOMDocument; + $d->loadHTMLFile($infile); + $body = $d->getElementsByTagName('body')->item(0); + $str = ''; + foreach($body->childNodes as $childNode) { + $str .= $d->saveHTML($childNode); + } + if($target) { + file_put_contents($target, strip_tags($str)); + return true; + } else + return strip_tags($str); + } +} + diff --git a/inc/inc.ConversionInit.php b/inc/inc.ConversionInit.php index 46c034609..67014251a 100644 --- a/inc/inc.ConversionInit.php +++ b/inc/inc.ConversionInit.php @@ -45,6 +45,8 @@ $conversionmgr->addService(new SeedDMS_ConversionServiceTextToText('text/plain', $conversionmgr->addService(new SeedDMS_ConversionServiceTextToText('text/markdown', 'text/plain'))->setLogger($logger); $conversionmgr->addService(new SeedDMS_ConversionServiceTextToText('text/x-rst', 'text/plain'))->setLogger($logger); +$conversionmgr->addService(new SeedDMS_ConversionServiceHtmlToText('text/html', 'text/plain'))->setLogger($logger); + if(isset($GLOBALS['SEEDDMS_HOOKS']['initConversion'])) { foreach($GLOBALS['SEEDDMS_HOOKS']['initConversion'] as $hookObj) { if (method_exists($hookObj, 'getConversionServices')) {