Derek Jones | 37f4b9c | 2011-07-01 17:56:50 -0500 | [diff] [blame] | 1 | <?php if ( ! defined('BASEPATH')) exit('No direct script access allowed'); |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 2 | /** |
| 3 | * CodeIgniter |
| 4 | * |
Greg Aker | 741de1c | 2010-11-10 14:52:57 -0600 | [diff] [blame] | 5 | * An open source application development framework for PHP 5.1.6 or newer |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 6 | * |
Derek Jones | f4a4bd8 | 2011-10-20 12:18:42 -0500 | [diff] [blame] | 7 | * NOTICE OF LICENSE |
| 8 | * |
| 9 | * Licensed under the Open Software License version 3.0 |
| 10 | * |
| 11 | * This source file is subject to the Open Software License (OSL 3.0) that is |
| 12 | * bundled with this package in the files license.txt / license.rst. It is |
| 13 | * also available through the world wide web at this URL: |
| 14 | * http://opensource.org/licenses/OSL-3.0 |
| 15 | * If you did not receive a copy of the license and are unable to obtain it |
| 16 | * through the world wide web, please send an email to |
| 17 | * licensing@ellislab.com so we can send you a copy immediately. |
| 18 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 19 | * @package CodeIgniter |
Derek Jones | f4a4bd8 | 2011-10-20 12:18:42 -0500 | [diff] [blame] | 20 | * @author EllisLab Dev Team |
| 21 | * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc. (http://ellislab.com/) |
| 22 | * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0) |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 23 | * @link http://codeigniter.com |
Pascal Kriete | 5b2d2da | 2010-11-04 17:23:40 -0400 | [diff] [blame] | 24 | * @since Version 2.0 |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 25 | * @filesource |
| 26 | */ |
| 27 | |
| 28 | // ------------------------------------------------------------------------ |
| 29 | |
| 30 | /** |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 31 | * Utf8 Class |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 32 | * |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 33 | * Provides support for UTF-8 environments |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 34 | * |
| 35 | * @package CodeIgniter |
| 36 | * @subpackage Libraries |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 37 | * @category UTF-8 |
Derek Jones | f4a4bd8 | 2011-10-20 12:18:42 -0500 | [diff] [blame] | 38 | * @author EllisLab Dev Team |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 39 | * @link http://codeigniter.com/user_guide/libraries/utf8.html |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 40 | */ |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 41 | class CI_Utf8 { |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 42 | |
| 43 | /** |
| 44 | * Constructor |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 45 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 46 | * Determines if UTF-8 support is to be enabled |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 47 | */ |
Greg Aker | d2c4ec6 | 2011-12-25 22:52:57 -0600 | [diff] [blame^] | 48 | public function __construct() |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 49 | { |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 50 | log_message('debug', "Utf8 Class Initialized"); |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 51 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 52 | global $CFG; |
| 53 | |
| 54 | if ( |
Greg Aker | d2c4ec6 | 2011-12-25 22:52:57 -0600 | [diff] [blame^] | 55 | @preg_match('/./u', 'é') === 1 // PCRE must support UTF-8 |
| 56 | && function_exists('iconv') // iconv must be installed |
| 57 | && ini_get('mbstring.func_overload') !== 1 // Multibyte string function overloading cannot be enabled |
| 58 | && $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8 |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 59 | ) |
| 60 | { |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 61 | log_message('debug', "UTF-8 Support Enabled"); |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 62 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 63 | define('UTF8_ENABLED', TRUE); |
| 64 | |
| 65 | // set internal encoding for multibyte string functions if necessary |
| 66 | // and set a flag so we don't have to repeatedly use extension_loaded() |
| 67 | // or function_exists() |
| 68 | if (extension_loaded('mbstring')) |
| 69 | { |
| 70 | define('MB_ENABLED', TRUE); |
| 71 | mb_internal_encoding('UTF-8'); |
| 72 | } |
| 73 | else |
| 74 | { |
| 75 | define('MB_ENABLED', FALSE); |
| 76 | } |
| 77 | } |
| 78 | else |
| 79 | { |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 80 | log_message('debug', "UTF-8 Support Disabled"); |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 81 | define('UTF8_ENABLED', FALSE); |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 82 | } |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 83 | } |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 84 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 85 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 86 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 87 | /** |
| 88 | * Clean UTF-8 strings |
| 89 | * |
| 90 | * Ensures strings are UTF-8 |
| 91 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 92 | * @param string |
| 93 | * @return string |
| 94 | */ |
Greg Aker | d2c4ec6 | 2011-12-25 22:52:57 -0600 | [diff] [blame^] | 95 | public function clean_string($str) |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 96 | { |
| 97 | if ($this->_is_ascii($str) === FALSE) |
| 98 | { |
| 99 | $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); |
| 100 | } |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 101 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 102 | return $str; |
| 103 | } |
| 104 | |
| 105 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 106 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 107 | /** |
| 108 | * Remove ASCII control characters |
| 109 | * |
| 110 | * Removes all ASCII control characters except horizontal tabs, |
| 111 | * line feeds, and carriage returns, as all others can cause |
| 112 | * problems in XML |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 113 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 114 | * @param string |
| 115 | * @return string |
| 116 | */ |
Greg Aker | d2c4ec6 | 2011-12-25 22:52:57 -0600 | [diff] [blame^] | 117 | public function safe_ascii_for_xml($str) |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 118 | { |
Pascal Kriete | 14a0ac6 | 2011-04-05 14:55:56 -0400 | [diff] [blame] | 119 | return remove_invisible_characters($str, FALSE); |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 120 | } |
| 121 | |
| 122 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 123 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 124 | /** |
| 125 | * Convert to UTF-8 |
| 126 | * |
| 127 | * Attempts to convert a string to UTF-8 |
| 128 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 129 | * @param string |
| 130 | * @param string - input encoding |
| 131 | * @return string |
| 132 | */ |
Greg Aker | d2c4ec6 | 2011-12-25 22:52:57 -0600 | [diff] [blame^] | 133 | public function convert_to_utf8($str, $encoding) |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 134 | { |
| 135 | if (function_exists('iconv')) |
| 136 | { |
| 137 | $str = @iconv($encoding, 'UTF-8', $str); |
| 138 | } |
| 139 | elseif (function_exists('mb_convert_encoding')) |
| 140 | { |
| 141 | $str = @mb_convert_encoding($str, 'UTF-8', $encoding); |
| 142 | } |
| 143 | else |
| 144 | { |
| 145 | return FALSE; |
| 146 | } |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 147 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 148 | return $str; |
| 149 | } |
| 150 | |
| 151 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 152 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 153 | /** |
| 154 | * Is ASCII? |
| 155 | * |
| 156 | * Tests if a string is standard 7-bit ASCII or not |
| 157 | * |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 158 | * @param string |
| 159 | * @return bool |
| 160 | */ |
Greg Aker | d2c4ec6 | 2011-12-25 22:52:57 -0600 | [diff] [blame^] | 161 | protected function _is_ascii($str) |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 162 | { |
Greg Aker | d2c4ec6 | 2011-12-25 22:52:57 -0600 | [diff] [blame^] | 163 | return (preg_match('/[^\x00-\x7F]/S', $str) === 0); |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 164 | } |
| 165 | |
| 166 | // -------------------------------------------------------------------- |
Barry Mieny | dd67197 | 2010-10-04 16:33:58 +0200 | [diff] [blame] | 167 | |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 168 | } |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 169 | // End Utf8 Class |
Derek Jones | 98badc1 | 2010-03-02 13:08:02 -0600 | [diff] [blame] | 170 | |
Pascal Kriete | aaec1e4 | 2011-01-20 00:01:21 -0500 | [diff] [blame] | 171 | /* End of file Utf8.php */ |
| 172 | /* Location: ./system/core/Utf8.php */ |