reasonableframework/vendor/_dist/spreadsheet-reader/SpreadsheetReader_CSV.php
2019-05-20 18:03:31 +09:00

277 lines
6.9 KiB
PHP

<?php
/**
* Class for parsing CSV files
*
* @author Martins Pilsetnieks
*/
class SpreadsheetReader_CSV implements Iterator, Countable
{
/**
* @var array Options array, pre-populated with the default values.
*/
private $Options = array(
'Delimiter' => ';',
'Enclosure' => '"'
);
private $Encoding = 'UTF-8';
private $BOMLength = 0;
/**
* @var resource File handle
*/
private $Handle = false;
private $Filepath = '';
private $Index = 0;
private $CurrentRow = null;
/**
* @param string Path to file
* @param array Options:
* Enclosure => string CSV enclosure
* Separator => string CSV separator
*/
public function __construct($Filepath, array $Options = null)
{
$this -> Filepath = $Filepath;
if (!is_readable($Filepath))
{
throw new Exception('SpreadsheetReader_CSV: File not readable ('.$Filepath.')');
}
// For safety's sake
@ini_set('auto_detect_line_endings', true);
$this -> Options = array_merge($this -> Options, $Options);
$this -> Handle = fopen($Filepath, 'r');
// Checking the file for byte-order mark to determine encoding
$BOM16 = bin2hex(fread($this -> Handle, 2));
if ($BOM16 == 'fffe')
{
$this -> Encoding = 'UTF-16LE';
//$this -> Encoding = 'UTF-16';
$this -> BOMLength = 2;
}
elseif ($BOM16 == 'feff')
{
$this -> Encoding = 'UTF-16BE';
//$this -> Encoding = 'UTF-16';
$this -> BOMLength = 2;
}
if (!$this -> BOMLength)
{
fseek($this -> Handle, 0);
$BOM32 = bin2hex(fread($this -> Handle, 4));
if ($BOM32 == '0000feff')
{
//$this -> Encoding = 'UTF-32BE';
$this -> Encoding = 'UTF-32';
$this -> BOMLength = 4;
}
elseif ($BOM32 == 'fffe0000')
{
//$this -> Encoding = 'UTF-32LE';
$this -> Encoding = 'UTF-32';
$this -> BOMLength = 4;
}
}
fseek($this -> Handle, 0);
$BOM8 = bin2hex(fread($this -> Handle, 3));
if ($BOM8 == 'efbbbf')
{
$this -> Encoding = 'UTF-8';
$this -> BOMLength = 3;
}
// Seeking the place right after BOM as the start of the real content
if ($this -> BOMLength)
{
fseek($this -> Handle, $this -> BOMLength);
}
// Checking for the delimiter if it should be determined automatically
if (!$this -> Options['Delimiter'])
{
// fgetcsv needs single-byte separators
$Semicolon = ';';
$Tab = "\t";
$Comma = ',';
// Reading the first row and checking if a specific separator character
// has more columns than others (it means that most likely that is the delimiter).
$SemicolonCount = count(fgetcsv($this -> Handle, null, $Semicolon));
fseek($this -> Handle, $this -> BOMLength);
$TabCount = count(fgetcsv($this -> Handle, null, $Tab));
fseek($this -> Handle, $this -> BOMLength);
$CommaCount = count(fgetcsv($this -> Handle, null, $Comma));
fseek($this -> Handle, $this -> BOMLength);
$Delimiter = $Semicolon;
if ($TabCount > $SemicolonCount || $CommaCount > $SemicolonCount)
{
$Delimiter = $CommaCount > $TabCount ? $Comma : $Tab;
}
$this -> Options['Delimiter'] = $Delimiter;
}
}
/**
* Returns information about sheets in the file.
* Because CSV doesn't have any, it's just a single entry.
*
* @return array Sheet data
*/
public function Sheets()
{
return array(0 => basename($this -> Filepath));
}
/**
* Changes sheet to another. Because CSV doesn't have any sheets
* it just rewinds the file so the behaviour is compatible with other
* sheet readers. (If an invalid index is given, it doesn't do anything.)
*
* @param bool Status
*/
public function ChangeSheet($Index)
{
if ($Index == 0)
{
$this -> rewind();
return true;
}
return false;
}
// !Iterator interface methods
/**
* Rewind the Iterator to the first element.
* Similar to the reset() function for arrays in PHP
*/
public function rewind()
{
fseek($this -> Handle, $this -> BOMLength);
$this -> CurrentRow = null;
$this -> Index = 0;
}
/**
* Return the current element.
* Similar to the current() function for arrays in PHP
*
* @return mixed current element from the collection
*/
public function current()
{
if ($this -> Index == 0 && is_null($this -> CurrentRow))
{
$this -> next();
$this -> Index--;
}
return $this -> CurrentRow;
}
/**
* Move forward to next element.
* Similar to the next() function for arrays in PHP
*/
public function next()
{
$this -> CurrentRow = array();
// Finding the place the next line starts for UTF-16 encoded files
// Line breaks could be 0x0D 0x00 0x0A 0x00 and PHP could split lines on the
// first or the second linebreak leaving unnecessary \0 characters that mess up
// the output.
if ($this -> Encoding == 'UTF-16LE' || $this -> Encoding == 'UTF-16BE')
{
while (!feof($this -> Handle))
{
// While bytes are insignificant whitespace, do nothing
$Char = ord(fgetc($this -> Handle));
if (!$Char || $Char == 10 || $Char == 13)
{
continue;
}
else
{
// When significant bytes are found, step back to the last place before them
if ($this -> Encoding == 'UTF-16LE')
{
fseek($this -> Handle, ftell($this -> Handle) - 1);
}
else
{
fseek($this -> Handle, ftell($this -> Handle) - 2);
}
break;
}
}
}
$this -> Index++;
$this -> CurrentRow = fgetcsv($this -> Handle, null, $this -> Options['Delimiter'], $this -> Options['Enclosure']);
if ($this -> CurrentRow)
{
// Converting multi-byte unicode strings
// and trimming enclosure symbols off of them because those aren't recognized
// in the relevan encodings.
if ($this -> Encoding != 'ASCII' && $this -> Encoding != 'UTF-8')
{
$Encoding = $this -> Encoding;
foreach ($this -> CurrentRow as $Key => $Value)
{
$this -> CurrentRow[$Key] = trim(trim(
mb_convert_encoding($Value, 'UTF-8', $this -> Encoding),
$this -> Options['Enclosure']
));
}
}
}
return $this -> CurrentRow;
}
/**
* Return the identifying key of the current element.
* Similar to the key() function for arrays in PHP
*
* @return mixed either an integer or a string
*/
public function key()
{
return $this -> Index;
}
/**
* Check if there is a current element after calls to rewind() or next().
* Used to check if we've iterated to the end of the collection
*
* @return boolean FALSE if there's nothing more to iterate over
*/
public function valid()
{
return ($this -> CurrentRow || !feof($this -> Handle));
}
// !Countable interface method
/**
* Ostensibly should return the count of the contained items but this just returns the number
* of rows read so far. It's not really correct but at least coherent.
*/
public function count()
{
return $this -> Index + 1;
}
}
?>