Support Joomla!

Joomla! 1.5 Documentation

Packages

Package: utf8

Developer Network License

The Joomla! Developer Network content is © copyright 2006 by the individual contributors and can be used in accordance with the Creative Commons License, Attribution- NonCommercial- ShareAlike 2.5
Source code for file /phputf8/utils/patterns.php

Documentation is available at patterns.php

  1. <?php
  2. /**
  3. * PCRE Regular expressions for UTF-8. Note this file is not actually used by
  4. * the rest of the library but these regular expressions can be useful to have
  5. * available.
  6. @version $Id: patterns.php,v 1.1 2006/02/25 14:20:02 harryf Exp $
  7. @see http://www.w3.org/International/questions/qa-forms-utf-8
  8. @package utf8
  9. @subpackage patterns
  10. */
  11.  
  12. //--------------------------------------------------------------------
  13. /**
  14. * PCRE Pattern to check a UTF-8 string is valid
  15. * Comes from W3 FAQ: Multilingual Forms
  16. * Note: modified to include full ASCII range including control chars
  17. @see http://www.w3.org/International/questions/qa-forms-utf-8
  18. @package utf8
  19. @subpackage patterns
  20. */
  21. $UTF8_VALID '^('.
  22.     '[\x00-\x7F]'.                          # ASCII (including control chars)
  23.         '|[\xC2-\xDF][\x80-\xBF]'.              # non-overlong 2-byte
  24.         '|\xE0[\xA0-\xBF][\x80-\xBF]'.          # excluding overlongs
  25.         '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'.   # straight 3-byte
  26.         '|\xED[\x80-\x9F][\x80-\xBF]'.          # excluding surrogates
  27.         '|\xF0[\x90-\xBF][\x80-\xBF]{2}'.       # planes 1-3
  28.         '|[\xF1-\xF3][\x80-\xBF]{3}'.           # planes 4-15
  29.         '|\xF4[\x80-\x8F][\x80-\xBF]{2}'.       # plane 16
  30.         ')*$';
  31.  
  32. //--------------------------------------------------------------------
  33. /**
  34. * PCRE Pattern to match single UTF-8 characters
  35. * Comes from W3 FAQ: Multilingual Forms
  36. * Note: modified to include full ASCII range including control chars
  37. @see http://www.w3.org/International/questions/qa-forms-utf-8
  38. @package utf8
  39. @subpackage patterns
  40. */
  41. $UTF8_MATCH =
  42.     '([\x00-\x7F])'.                          # ASCII (including control chars)
  43.         '|([\xC2-\xDF][\x80-\xBF])'.              # non-overlong 2-byte
  44.         '|(\xE0[\xA0-\xBF][\x80-\xBF])'.          # excluding overlongs
  45.         '|([\xE1-\xEC\xEE\xEF][\x80-\xBF]{2})'.   # straight 3-byte
  46.         '|(\xED[\x80-\x9F][\x80-\xBF])'.          # excluding surrogates
  47.         '|(\xF0[\x90-\xBF][\x80-\xBF]{2})'.       # planes 1-3
  48.         '|([\xF1-\xF3][\x80-\xBF]{3})'.           # planes 4-15
  49.         '|(\xF4[\x80-\x8F][\x80-\xBF]{2})';       # plane 16
  50.  
  51. //--------------------------------------------------------------------
  52. /**
  53. * PCRE Pattern to locate bad bytes in a UTF-8 string
  54. * Comes from W3 FAQ: Multilingual Forms
  55. * Note: modified to include full ASCII range including control chars
  56. @see http://www.w3.org/International/questions/qa-forms-utf-8
  57. @package utf8
  58. @subpackage patterns
  59. */
  60. $UTF8_BAD =
  61.     '([\x00-\x7F]'.                          # ASCII (including control chars)
  62.         '|[\xC2-\xDF][\x80-\xBF]'.               # non-overlong 2-byte
  63.         '|\xE0[\xA0-\xBF][\x80-\xBF]'.           # excluding overlongs
  64.         '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'.    # straight 3-byte
  65.         '|\xED[\x80-\x9F][\x80-\xBF]'.           # excluding surrogates
  66.         '|\xF0[\x90-\xBF][\x80-\xBF]{2}'.        # planes 1-3
  67.         '|[\xF1-\xF3][\x80-\xBF]{3}'.            # planes 4-15
  68.         '|\xF4[\x80-\x8F][\x80-\xBF]{2}'.        # plane 16
  69.         '|(.{1}))';                              

Documentation generated on Mon, 05 Mar 2007 21:16:26 +0000 by phpDocumentor 1.3.1