0) { $output.= self::DELIMITER; } $h = $b; while($h < $input_length) { $m = PHP_INT_MAX; // Find the minimum code point >= n for($i=0; $i<$input_length; $i++) { $chr = self::mb_substr($input,$i,1); $c = self::uniord( $chr ); if ($c >= $n && $c < $m) { $m = $c; } } if (($m - $n) > (PHP_INT_MAX - $delta) / ($h+1)) { throw new Exception("PunycodeException.OVERFLOW"); } $delta = $delta + ($m - $n) * ($h + 1); $n = $m; for($j=0; $j<$input_length; $j++) { $chr = self::mb_substr($input,$j,1); $c = self::uniord( $chr ); if ($c < $n) { $delta++; if (0==$delta) { throw new Exception("PunycodeException.OVERFLOW"); } } if ($c == $n) { $q = $delta; for($k= self::BASE;; $k+=self::BASE) { $t=0; if ($k <= $bias) { $t= self::TMIN; } else if ($k >= $bias + self::TMAX) { $t= self::TMAX; } else { $t = $k - $bias; } if ($q < $t) { break; } $output.= chr( self::digit2codepoint($t + ($q - $t) % (self::BASE - $t)) ); $q = floor( ($q-$t) / (self::BASE - $t) );//integer division } $output.= chr( self::digit2codepoint($q) ); $bias = self::adapt($delta, $h+1, $h==$b); $delta=0; $h++; } } $delta++; $n++; } } catch (Exception $e) { error_log("[PUNYCODE] error ".$e->getMessage()); return $input; } return $output; } protected static function decode($input) { try { $n = self::INITIAL_N; $i = 0; $bias = self::INITIAL_BIAS; $output = ''; $d = self::rstrpos($input, self::DELIMITER); if ($d>0) { for($j=0; $j<$d; $j++) { $chr = self::mb_substr($input,$j,1); $c = self::uniord( $chr ); if ($c>=self::INITIAL_N) { throw new Exception("PunycodeException.BAD_INPUT"); } $output.=$chr; } $d++; } else { $d = 0; } $input_length = self::mb_strlen($input); while ($d < $input_length) { $oldi = $i; $w = 1; for($k= self::BASE;; $k += self::BASE) { if ($d == $input_length) { throw new Exception("PunycodeException.BAD_INPUT"); } $chr = self::mb_substr($input,$d++,1); $c = self::uniord( $chr ); $digit = self::codepoint2digit($c); if ($digit > (PHP_INT_MAX - $i) / $w) { throw new Exception("PunycodeException.OVERFLOW"); } $i = $i + $digit * $w; $t=0; if ($k <= $bias) { $t = self::TMIN; } else if ($k >= $bias + self::TMAX) { $t = self::TMAX; } else { $t = $k - $bias; } if ($digit < $t) { break; } $w = $w * (self::BASE - $t); } $output_length = self::mb_strlen($output); $bias = self::adapt($i - $oldi, $output_length + 1, $oldi == 0); if ($i / ($output_length + 1) > PHP_INT_MAX - $n) { throw new Exception("PunycodeException.OVERFLOW"); } $n = floor($n + $i / ($output_length + 1)); $i = $i % ($output_length + 1); $output = self::mb_strinsert($output, self::utf8($n), $i); $i++; } } catch(Exception $e) { error_log("[PUNYCODE] error ".$e->getMessage()); return $input; } return $output; } //adapt patched from: //https://github.com/takezoh/php-PunycodeEncoder/blob/master/punycode.php protected static function adapt($delta, $numpoints, $firsttime) { $delta = (int)($firsttime ? $delta / self::DAMP : $delta / 2); $delta += (int)($delta / $numpoints); $k = 0; while ($delta > (((self::BASE - self::TMIN) * self::TMAX) / 2)) { $delta = (int)($delta / (self::BASE - self::TMIN)); $k += self::BASE; } return $k + (int)((self::BASE - self::TMIN + 1) * $delta / ($delta + self::SKEW)); } protected static function digit2codepoint($d) { if ($d < 26) { // 0..25 : 'a'..'z' return $d + ord('a'); } else if ($d < 36) { // 26..35 : '0'..'9'; return $d - 26 + ord('0'); } else { throw new Exception("PunycodeException.BAD_INPUT"); } } protected static function codepoint2digit($c) { if ($c - ord('0') < 10) { // '0'..'9' : 26..35 return $c - ord('0') + 26; } else if ($c - ord('a') < 26) { // 'a'..'z' : 0..25 return $c - ord('a'); } else { throw new Exception("PunycodeException.BAD_INPUT"); } } protected static function rstrpos($haystack, $needle) { $pos = strpos (strrev($haystack), $needle); if ($pos === false) return false; return strlen ($haystack)-1 - $pos; } protected static function mb_strinsert($haystack, $needle, $position) { $old_encoding = mb_internal_encoding(); mb_internal_encoding("UTF-8"); $r = mb_substr($haystack,0,$position).$needle.mb_substr($haystack,$position); mb_internal_encoding($old_encoding); return $r; } protected static function mb_substr($str,$start,$length) { $old_encoding = mb_internal_encoding(); mb_internal_encoding("UTF-8"); $r = mb_substr($str,$start,$length); mb_internal_encoding($old_encoding); return $r; } protected static function mb_strlen($str) { $old_encoding = mb_internal_encoding(); mb_internal_encoding("UTF-8"); $r = mb_strlen($str); mb_internal_encoding($old_encoding); return $r; } protected static function mb_strtolower($str) { $old_encoding = mb_internal_encoding(); mb_internal_encoding("UTF-8"); $r = mb_strtolower($str); mb_internal_encoding($old_encoding); return $r; } public static function uniord($c)//cousin of ord() but for unicode { $ord0 = ord($c[0]); if ($ord0>=0 && $ord0<=127) return $ord0; $ord1 = ord($c[1]); if ($ord0>=192 && $ord0<=223) return ($ord0-192)*64 + ($ord1-128); if ($ord0==0xed && ($ord1 & 0xa0) == 0xa0) return false; //code points, 0xd800 to 0xdfff $ord2 = ord($c[2]); if ($ord0>=224 && $ord0<=239) return ($ord0-224)*4096 + ($ord1-128)*64 + ($ord2-128); $ord3 = ord($c[3]); if ($ord0>=240 && $ord0<=247) return ($ord0-240)*262144 + ($ord1-128)*4096 + ($ord2-128)*64 + ($ord3-128); return false; } public static function utf8($num)//cousin of ascii() but for utf8 { if($num<=0x7F) return chr($num); if($num<=0x7FF) return chr(($num>>6)+192).chr(($num&63)+128); if(0xd800<=$num && $num<=0xdfff) return '';//invalid block of utf8 if($num<=0xFFFF) return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr(($num&63)+128); if($num<=0x10FFFF) return chr(($num>>18)+240).chr((($num>>12)&63)+128).chr((($num>>6)&63)+128).chr(($num&63)+128); return ''; } public static function is_valid_utf8($string) { for ($i=0, $ix=strlen($string); $i < $ix; $i++) { $c = ord($string[$i]); if ($c==0x09 || $c==0x0a || $c==0x0d || (0x20 <= $c && $c < 0x7e) ) $n = 0; # 0bbbbbbb else if (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb else if ($c==0xed && (ord($string[$i+1]) & 0xa0)==0xa0) return false; //code points, 0xd800 to 0xdfff else if (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb else if (($c & 0xF8) == 0xF0) $n=3; # 11110bbb //else if (($c & 0xFC) == 0xF8) $n=4; # 111110bb //byte 5, unnecessary in 4 byte UTF-8 //else if (($c & 0xFE) == 0xFC) $n=5; # 1111110b //byte 6, unnecessary in 4 byte UTF-8 else return false; for ($j=0; $j<$n; $j++) { // n bytes matching 10bbbbbb follow ? if ((++$i == $ix) || ((ord($string[$i]) & 0xC0) != 0x80)) return false; } } return true; } }