File: D:/HostingSpaces/EvLuik/vanluiktegelwerken.nl/wwwroot/cmsimple/utf8.php
<?php
/**
* @file utf8.php
*
* UTF-8 related string functions.
*
* @category CMSimple_XH
* @package XH
* @author Harry Fuecks <hfuecks@gmail.com>
* @author The CMSimple_XH developers <devs@cmsimple-xh.org>
* @copyright 2006-2007 Harry Fuecks
* @copyright 2009-2017 The CMSimple_XH developers <http://cmsimple-xh.org/?The_Team>
* @license http://www.gnu.org/licenses/gpl-3.0.en.html GNU GPLv3
* @link http://cmsimple-xh.org/
*/
/**
* Returns the number of Unicode code points in a string.
*
* Note: this function does not count bad bytes in the string - these
* are simply ignored.
*
* @param string $string A UTF-8 encoded string.
*
* @return int
*/
function utf8_strlen($string)
{
return mb_strlen($string, 'UTF-8');
}
/**
* Returns part of a string given character offset and optionally length.
*
* @param string $string A UTF-8 encoded string.
* @param int $offset A number of UTF-8 code points offset.
* @param int $length A length in UTF-8 code points from offset
*
* @return string
*/
function utf8_substr($string, $offset, $length = null)
{
return mb_substr($string, $offset, $length, 'UTF-8');
}
/**
* Makes a string lowercase.
*
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings.
*
* @param string $string A UTF-8 encoded string.
*
* @return string
*/
function utf8_strtolower($string)
{
return mb_strtolower($string, 'UTF-8');
}
/**
* Makes a string uppercase.
*
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings.
*
* @param string $string A UTF-8 encoded string.
*
* @return string
*/
function utf8_strtoupper($string)
{
return mb_strtoupper($string, 'UTF-8');
}
/**
* Finds position of first occurrence of a string within another, case
* sensitive. Returns <var>false</var> if needle is not found.
*
* @param string $haystack A haystack.
* @param string $needle A needle.
* @param int $offset An offset in Unicode code points.
*
* @return int
*/
function utf8_strpos($haystack, $needle, $offset = 0)
{
return mb_strpos($haystack, $needle, $offset, 'UTF-8');
}
/**
* Finds position of first occurrence of a string within another, case
* insensitive. Returns <var>false</var> if needle is not found.
*
* @param string $haystack A haystack.
* @param string $needle A needle.
* @param int $offset An offset in Unicode code points.
*
* @return int
*/
function utf8_stripos($haystack, $needle, $offset = 0)
{
return mb_stripos($haystack, $needle, $offset, 'UTF-8');
}
/**
* Makes a string's first character uppercase.
*
* @param string $string A UTF-8 encoded string.
*
* @return string
*/
function utf8_ucfirst($string)
{
switch (utf8_strlen($string)) {
case 0:
return '';
case 1:
return utf8_strtoupper($string);
default:
preg_match('/^(.{1})(.*)$/us', $string, $matches);
return utf8_strtoupper($matches[1]) . $matches[2];
}
}
/**
* Tests a string as to whether it's valid UTF-8 and supported by the
* Unicode standard.
*
* @param string $string A UTF-8 encoded string.
*
* @return boolean
*/
function utf8_is_valid($string)
{
if (strlen($string) == 0) {
return true;
}
// If even just the first character can be matched, when the /u
// modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
// invalid, nothing at all will match, even if the string contains
// some valid sequences
return (bool) preg_match('/^.{1}/us', $string);
}
/**
* Replace bad bytes with an alternative character - ASCII character
* recommended is replacement char.
*
* PCRE Pattern to locate bad bytes in a UTF-8 string
* Comes from W3 FAQ: Multilingual Forms.
*
* Note: modified to include full ASCII range including control chars
*
* @param string $string A string to search.
* @param string $replace A string to replace bad bytes with - use ASCII.
*
* @return string
*
* @see http://www.w3.org/International/questions/qa-forms-utf-8
*/
function utf8_bad_replace($string, $replace = '?')
{
$bad = '([\x00-\x7F]' // ASCII (including control chars)
. '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte
. '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs
. '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' // straight 3-byte
. '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates
. '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3
. '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15
. '|\xF4[\x80-\x8F][\x80-\xBF]{2}' // plane 16
. '|(.{1}))'; // invalid byte
$result = '';
while (preg_match('/' . $bad . '/S', $string, $matches)) {
if (!isset($matches[2])) {
$result .= $matches[0];
} else {
$result .= $replace;
}
$string = substr($string, strlen($matches[0]));
}
return $result;
}