* @copyright Copyright (C) 2021 Uwe Steinmann * @version Release: @package_version@ */ require_once("inc/inc.ClassConversionServiceBase.php"); define ('UTF32_BIG_ENDIAN_BOM' , chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF)); define ('UTF32_LITTLE_ENDIAN_BOM', chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00)); define ('UTF16_BIG_ENDIAN_BOM' , chr(0xFE) . chr(0xFF)); define ('UTF16_LITTLE_ENDIAN_BOM', chr(0xFF) . chr(0xFE)); define ('UTF8_BOM' , chr(0xEF) . chr(0xBB) . chr(0xBF)); /** * Implementation of conversion service class for text to text * * @category DMS * @package SeedDMS * @author Uwe Steinmann * @copyright Copyright (C) 2021 Uwe Steinmann * @version Release: @package_version@ */ class SeedDMS_ConversionServiceTextToText extends SeedDMS_ConversionServiceBase { public function __construct($from, $to) { parent::__construct(); $this->from = $from; $this->to = $to; } public function getInfo() { return "Pass through document contents"; } protected function detect_utf_encoding($text) { $first2 = substr($text, 0, 2); $first3 = substr($text, 0, 3); $first4 = substr($text, 0, 3); if ($first3 == UTF8_BOM) return 'UTF-8'; elseif ($first4 == UTF32_BIG_ENDIAN_BOM) return 'UTF-32BE'; elseif ($first4 == UTF32_LITTLE_ENDIAN_BOM) return 'UTF-32LE'; elseif ($first2 == UTF16_BIG_ENDIAN_BOM) return 'UTF-16BE'; elseif ($first2 == UTF16_LITTLE_ENDIAN_BOM) return 'UTF-16LE'; else return ''; } public function convert($infile, $target = null, $params = array()) { $content = file_get_contents($infile); $encoding = $this->detect_utf_encoding($content); // mb_detect_encoding() is not a good as detect_utf_encoding() // $encoding = mb_detect_encoding($content); $content = mb_convert_encoding($content, 'utf8', $encoding); if($target) { file_put_contents($target, $content); return true; } else return $content; } }