File source for ascii.php

Joomla! 1.5 Documentation

Packages

Package: utf8

Developer Network License

Source code for file /phputf8/utils/ascii.php

Documentation is available at ascii.php

<?php
/**
* Tools to help with ASCII in UTF-8
* @version $Id: ascii.php,v 1.1 2006/02/26 13:17:29 harryf Exp $
* @package utf8
* @subpackage ascii
*/
//---------------------------------------------------------------
/**
* UTF-8 lookup table for lower case accented letters
* This lookuptable defines replacements for accented characters from the ASCII-7
* range. This are lower case letters only.
* @author Andreas Gohr <[email protected]>
* @see utf8_deaccent()
* @package utf8
* @subpackage ascii
*/
$GLOBALS['UTF8_LOWER_ACCENTS'] = array(
'Ãƒ ' => 'a', 'ÃƒÂ´' => 'o', 'Ã„Â?' => 'd', 'Ã¡Â¸Å¸' => 'f', 'ÃƒÂ«' => 'e', 'Ã…Â¡' => 's', 'Ã†Â¡' => 'o',
'ÃƒÅ¸' => 'ss', 'Ã„Æ’' => 'a', 'Ã…â„¢' => 'r', 'Ãˆâ€º' => 't', 'Ã…Ë†' => 'n', 'Ã„Â?' => 'a', 'Ã„Â·' => 'k',
'Ã…Â?' => 's', 'Ã¡Â»Â³' => 'y', 'Ã…â€ ' => 'n', 'Ã„Âº' => 'l', 'Ã„Â§' => 'h', 'Ã¡Â¹â€”' => 'p', 'ÃƒÂ³' => 'o',
'Ã¡Â¹Â¡' => 's', 'ÃƒÂ¸' => 'o', 'Ã„Â£' => 'g', 'Ã…Â§' => 't', 'Ãˆâ„¢' => 's', 'Ã„â€”' => 'e', 'Ã„â€°' => 'c',
'Ã…â€º' => 's', 'ÃƒÂ®' => 'i', 'Ã…Â±' => 'u', 'Ã„â€¡' => 'c', 'Ã„â„¢' => 'e', 'Ã…Âµ' => 'w', 'Ã¡Â¹Â«' => 't',
'Ã…Â«' => 'u', 'Ã„Â?' => 'c', 'ÃƒÂ¶' => 'oe', 'ÃƒÂ¨' => 'e', 'Ã…Â·' => 'y', 'Ã„â€¦' => 'a', 'Ã…â€š' => 'l',
'Ã…Â³' => 'u', 'Ã…Â¯' => 'u', 'Ã…Å¸' => 's', 'Ã„Å¸' => 'g', 'Ã„Â¼' => 'l', 'Ã†â€™' => 'f', 'Ã…Â¾' => 'z',
'Ã¡ÂºÆ’' => 'w', 'Ã¡Â¸Æ’' => 'b', 'ÃƒÂ¥' => 'a', 'ÃƒÂ¬' => 'i', 'ÃƒÂ¯' => 'i', 'Ã¡Â¸â€¹' => 'd', 'Ã…Â¥' => 't',
'Ã…â€”' => 'r', 'ÃƒÂ¤' => 'ae', 'ÃƒÂ' => 'i', 'Ã…â€¢' => 'r', 'ÃƒÂª' => 'e', 'ÃƒÂ¼' => 'ue', 'ÃƒÂ²' => 'o',
'Ã„â€œ' => 'e', 'ÃƒÂ±' => 'n', 'Ã…â€ž' => 'n', 'Ã„Â¥' => 'h', 'Ã„Â?' => 'g', 'Ã„â€˜' => 'd', 'Ã„Âµ' => 'j',
'ÃƒÂ¢' => 'a', 'Ã„Â¾' => 'l', 'Ã¡Âºâ€¦' => 'w', 'Ã…Â¼' => 'z', 'Ã„Â«' => 'i', 'ÃƒÂ£' => 'a', 'Ã„Â¡' => 'g',
'ÃƒÂ»' => 'u', 'ÃƒÂ¾' => 'th', 'ÃƒÂ°' => 'dh', 'ÃƒÂ¦' => 'ae', 'Ã‚Âµ' => 'u',
);
//---------------------------------------------------------------
/**
* UTF-8 lookup table for upper case accented letters
* This lookuptable defines replacements for accented characters from the ASCII-7
* range. This are upper case letters only.
* @author Andreas Gohr <[email protected]>
* @see utf8_deaccent()
* @package utf8
* @subpackage ascii
*/
$GLOBALS['UTF8_UPPER_ACCENTS'] = array(
'Ãƒ ' => 'A', 'ÃƒÂ´' => 'O', 'Ã„Â?' => 'D', 'Ã¡Â¸Å¸' => 'F', 'ÃƒÂ«' => 'E', 'Ã…Â¡' => 'S', 'Ã†Â¡' => 'O',
'ÃƒÅ¸' => 'Ss', 'Ã„Æ’' => 'A', 'Ã…â„¢' => 'R', 'Ãˆâ€º' => 'T', 'Ã…Ë†' => 'N', 'Ã„Â?' => 'A', 'Ã„Â·' => 'K',
'Ã…Â?' => 'S', 'Ã¡Â»Â³' => 'Y', 'Ã…â€ ' => 'N', 'Ã„Âº' => 'L', 'Ã„Â§' => 'H', 'Ã¡Â¹â€”' => 'P', 'ÃƒÂ³' => 'O',
'Ã¡Â¹Â¡' => 'S', 'ÃƒÂ¸' => 'O', 'Ã„Â£' => 'G', 'Ã…Â§' => 'T', 'Ãˆâ„¢' => 'S', 'Ã„â€”' => 'E', 'Ã„â€°' => 'C',
'Ã…â€º' => 'S', 'ÃƒÂ®' => 'I', 'Ã…Â±' => 'U', 'Ã„â€¡' => 'C', 'Ã„â„¢' => 'E', 'Ã…Âµ' => 'W', 'Ã¡Â¹Â«' => 'T',
'Ã…Â«' => 'U', 'Ã„Â?' => 'C', 'ÃƒÂ¶' => 'Oe', 'ÃƒÂ¨' => 'E', 'Ã…Â·' => 'Y', 'Ã„â€¦' => 'A', 'Ã…â€š' => 'L',
'Ã…Â³' => 'U', 'Ã…Â¯' => 'U', 'Ã…Å¸' => 'S', 'Ã„Å¸' => 'G', 'Ã„Â¼' => 'L', 'Ã†â€™' => 'F', 'Ã…Â¾' => 'Z',
'Ã¡ÂºÆ’' => 'W', 'Ã¡Â¸Æ’' => 'B', 'ÃƒÂ¥' => 'A', 'ÃƒÂ¬' => 'I', 'ÃƒÂ¯' => 'I', 'Ã¡Â¸â€¹' => 'D', 'Ã…Â¥' => 'T',
'Ã…â€”' => 'R', 'ÃƒÂ¤' => 'Ae', 'ÃƒÂ' => 'I', 'Ã…â€¢' => 'R', 'ÃƒÂª' => 'E', 'ÃƒÂ¼' => 'Ue', 'ÃƒÂ²' => 'O',
'Ã„â€œ' => 'E', 'ÃƒÂ±' => 'N', 'Ã…â€ž' => 'N', 'Ã„Â¥' => 'H', 'Ã„Â?' => 'G', 'Ã„â€˜' => 'D', 'Ã„Âµ' => 'J',
'ÃƒÂ¢' => 'A', 'Ã„Â¾' => 'L', 'Ã¡Âºâ€¦' => 'W', 'Ã…Â¼' => 'Z', 'Ã„Â«' => 'I', 'ÃƒÂ£' => 'A', 'Ã„Â¡' => 'G',
'ÃƒÂ»' => 'U', 'ÃƒÅ¾' => 'Th', 'ÃƒÂ?' => 'Dh', 'Ãƒâ€ ' => 'Ae',
);
//--------------------------------------------------------------------
/**
* Tests whether a string contains only 7bit ASCII bytes.
* You might use this to conditionally check whether a string
* needs handling as UTF-8 or not, potentially offering performance
* benefits by using the native PHP equivalent if it's just ASCII e.g.;
*
* <code>
* <?php
* if ( utf8_is_ascii($someString) ) {
* // It's just ASCII - use the native PHP version
* $someString = strtolower($someString);
* } else {
* $someString = utf8_strtolower($someString);
* }
* ?>
* </code>
*
* @param string
* @return boolean TRUE if it's all ASCII
* @package utf8
* @subpackage ascii
* @see utf8_is_ascii_ctrl
*/
function utf8_is_ascii($str) {
if ( strlen($str) > 0 ) {
// Search for any bytes which are outside the ASCII range...
return (preg_match('/[^\x00-\x7F]/',$str) !== 1);
}
return FALSE;
}
//--------------------------------------------------------------------
/**
* Tests whether a string contains only 7bit ASCII bytes with device
* control codes omitted. The device control codes can be found on the
* second table here: http://www.w3schools.com/tags/ref_ascii.asp
*
* @param string
* @return boolean TRUE if it's all ASCII without device control codes
* @package utf8
* @subpackage ascii
* @see utf8_is_ascii
*/
function utf8_is_ascii_ctrl($str) {
if ( strlen($str) > 0 ) {
// Search for any bytes which are outside the ASCII range,
// or are device control codes
return (preg_match('/[^\x09\x0A\x0D\x20-\x7E]/',$str) !== 1);
}
return FALSE;
}
//--------------------------------------------------------------------
/**
* Strip out all non-7bit ASCII bytes
* If you need to transmit a string to system which you know can only
* support 7bit ASCII, you could use this function.
* @param string
* @return string with non ASCII bytes removed
* @package utf8
* @subpackage ascii
* @see utf8_strip_non_ascii_ctrl
*/
function utf8_strip_non_ascii($str) {
ob_start();
while ( preg_match(
'/^([\x00-\x7F]+)|([^\x00-\x7F]+)/S',
$str, $matches) ) {
if ( !isset($matches[2]) ) {
echo $matches[0];
}
$str = substr($str, strlen($matches[0]));
}
$result = ob_get_contents();
ob_end_clean();
return $result;
}
//--------------------------------------------------------------------
/**
* Strip out all non 7bit ASCII bytes and ASCII device control codes.
* For a list of ASCII device control codes see the 2nd table here:
* http://www.w3schools.com/tags/ref_ascii.asp
*
* @param string
* @return boolean TRUE if it's all ASCII
* @package utf8
* @subpackage ascii
*/
function utf8_strip_non_ascii_ctrl($str) {
ob_start();
while ( preg_match(
'/^([\x09\x0A\x0D\x20-\x7E]+)|([^\x09\x0A\x0D\x20-\x7E]+)/S',
$str, $matches) ) {
if ( !isset($matches[2]) ) {
echo $matches[0];
}
$str = substr($str, strlen($matches[0]));
}
$result = ob_get_contents();
ob_end_clean();
return $result;
}
//---------------------------------------------------------------
/**
* Replace accented UTF-8 characters by unaccented ASCII-7 "equivalents".
* The purpose of this function is to replace characters commonly found in Latin
* alphabets with something more or less equivalent from the ASCII range. This can
* be useful for converting a UTF-8 to something ready for a filename, for example.
* Following the use of this function, you would probably also pass the string
* through utf8_strip_non_ascii to clean out any other non-ASCII chars
* Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
* letters. Default is to deaccent both cases ($case = 0)
* @param string UTF-8 string
* @param int (optional) -1 lowercase only, +1 uppercase only, 1 both cases
* @param string UTF-8 with accented characters replaced by ASCII chars
* @return string accented chars replaced with ascii equivalents
* @author Andreas Gohr <[email protected]>
* @package utf8
* @subpackage ascii
*/
function utf8_accents_to_ascii( $str, $case=0 ){
if($case <= 0){
global $UTF8_LOWER_ACCENTS;
$string = str_replace(
array_keys($UTF8_LOWER_ACCENTS),
array_values($UTF8_LOWER_ACCENTS),
$str
);
}
if($case >= 0){
global $UTF8_UPPER_ACCENTS;
$string = str_replace(
array_keys($UTF8_UPPER_ACCENTS),
array_values($UTF8_UPPER_ACCENTS),
$str
);
}
return $str;
}

Documentation generated on Mon, 05 Mar 2007 20:52:29 +0000 by phpDocumentor 1.3.1

Joomla! 1.5 Documentation

Packages

Package: utf8

Other documents

Developer Network License