Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 1 | <?php if ( ! defined('BASEPATH')) exit('No direct script access allowed'); |
| 2 | /** |
| 3 | * CodeIgniter |
| 4 | * |
| 5 | * An open source application development framework for PHP 4.3.2 or newer |
| 6 | * |
| 7 | * @package CodeIgniter |
| 8 | * @author ExpressionEngine Dev Team |
| 9 | * @copyright Copyright (c) 2008 - 2010, EllisLab, Inc. |
| 10 | * @license http://codeigniter.com/user_guide/license.html |
| 11 | * @link http://codeigniter.com |
| 12 | * @since Version 1.0 |
| 13 | * @filesource |
| 14 | */ |
| 15 | |
| 16 | // ------------------------------------------------------------------------ |
| 17 | |
| 18 | /** |
| 19 | * Output Class |
| 20 | * |
| 21 | * Responsible for sending final output to browser |
| 22 | * |
| 23 | * @package CodeIgniter |
| 24 | * @subpackage Libraries |
| 25 | * @category Unicode |
| 26 | * @author ExpressionEngine Dev Team |
| 27 | * @link http://codeigniter.com/user_guide/libraries/unicode.html |
| 28 | */ |
| 29 | class CI_Unicode { |
| 30 | |
| 31 | /** |
| 32 | * Constructor |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 33 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 34 | * Determines if UTF-8 support is to be enabled |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 35 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 36 | */ |
| 37 | function CI_Unicode() |
| 38 | { |
| 39 | log_message('debug', "Unicode Class Initialized"); |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 40 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 41 | global $CFG; |
| 42 | |
| 43 | if ( |
| 44 | preg_match('/./u', 'é') === 1 // PCRE must support UTF-8 |
| 45 | AND function_exists('iconv') // iconv must be installed |
| 46 | AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled |
| 47 | AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8 |
| 48 | ) |
| 49 | { |
| 50 | log_message('debug', "Unicode Class - UTF-8 Support Enabled"); |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 51 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 52 | define('UTF8_ENABLED', TRUE); |
| 53 | |
| 54 | // set internal encoding for multibyte string functions if necessary |
| 55 | // and set a flag so we don't have to repeatedly use extension_loaded() |
| 56 | // or function_exists() |
| 57 | if (extension_loaded('mbstring')) |
| 58 | { |
| 59 | define('MB_ENABLED', TRUE); |
| 60 | mb_internal_encoding('UTF-8'); |
| 61 | } |
| 62 | else |
| 63 | { |
| 64 | define('MB_ENABLED', FALSE); |
| 65 | } |
| 66 | } |
| 67 | else |
| 68 | { |
| 69 | log_message('debug', "Unicode Class - UTF-8 Support Disabled"); |
| 70 | define('UTF8_ENABLED', FALSE); |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 71 | } |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 72 | } |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 73 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 74 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 75 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 76 | /** |
| 77 | * Clean UTF-8 strings |
| 78 | * |
| 79 | * Ensures strings are UTF-8 |
| 80 | * |
| 81 | * @access public |
| 82 | * @param string |
| 83 | * @return string |
| 84 | */ |
| 85 | function clean_string($str) |
| 86 | { |
| 87 | if ($this->_is_ascii($str) === FALSE) |
| 88 | { |
| 89 | $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); |
| 90 | } |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 91 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 92 | return $str; |
| 93 | } |
| 94 | |
| 95 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 96 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 97 | /** |
| 98 | * Remove ASCII control characters |
| 99 | * |
| 100 | * Removes all ASCII control characters except horizontal tabs, |
| 101 | * line feeds, and carriage returns, as all others can cause |
| 102 | * problems in XML |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 103 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 104 | * @access public |
| 105 | * @param string |
| 106 | * @return string |
| 107 | */ |
| 108 | function safe_ascii_for_xml($str) |
| 109 | { |
| 110 | return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str); |
| 111 | } |
| 112 | |
| 113 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 114 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 115 | /** |
| 116 | * Convert to UTF-8 |
| 117 | * |
| 118 | * Attempts to convert a string to UTF-8 |
| 119 | * |
| 120 | * @access public |
| 121 | * @param string |
| 122 | * @param string - input encoding |
| 123 | * @return string |
| 124 | */ |
| 125 | function convert_to_utf8($str, $encoding) |
| 126 | { |
| 127 | if (function_exists('iconv')) |
| 128 | { |
| 129 | $str = @iconv($encoding, 'UTF-8', $str); |
| 130 | } |
| 131 | elseif (function_exists('mb_convert_encoding')) |
| 132 | { |
| 133 | $str = @mb_convert_encoding($str, 'UTF-8', $encoding); |
| 134 | } |
| 135 | else |
| 136 | { |
| 137 | return FALSE; |
| 138 | } |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 139 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 140 | return $str; |
| 141 | } |
| 142 | |
| 143 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 144 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 145 | /** |
| 146 | * Is ASCII? |
| 147 | * |
| 148 | * Tests if a string is standard 7-bit ASCII or not |
| 149 | * |
| 150 | * @access public |
| 151 | * @param string |
| 152 | * @return bool |
| 153 | */ |
| 154 | function _is_ascii($str) |
| 155 | { |
| 156 | return (preg_match('/[^\x00-\x7F]/S', $str) == 0); |
| 157 | } |
| 158 | |
| 159 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame^] | 160 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 161 | } |
| 162 | // End Unicode Class |
| 163 | |
| 164 | /* End of file Unicode.php */ |
| 165 | /* Location: ./system/core/Unicode.php */ |