blob: 7abe4e43b83971a956b65fffc15755a1fc31afba [file] [log] [blame]
Derek Jones37f4b9c2011-07-01 17:56:50 -05001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
Derek Jones98badc12010-03-02 13:08:02 -06002/**
3 * CodeIgniter
4 *
Greg Aker741de1c2010-11-10 14:52:57 -06005 * An open source application development framework for PHP 5.1.6 or newer
Derek Jones98badc12010-03-02 13:08:02 -06006 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05007 * NOTICE OF LICENSE
8 *
9 * Licensed under the Open Software License version 3.0
10 *
11 * This source file is subject to the Open Software License (OSL 3.0) that is
12 * bundled with this package in the files license.txt / license.rst. It is
13 * also available through the world wide web at this URL:
14 * http://opensource.org/licenses/OSL-3.0
15 * If you did not receive a copy of the license and are unable to obtain it
16 * through the world wide web, please send an email to
17 * licensing@ellislab.com so we can send you a copy immediately.
18 *
Derek Jones98badc12010-03-02 13:08:02 -060019 * @package CodeIgniter
Derek Jonesf4a4bd82011-10-20 12:18:42 -050020 * @author EllisLab Dev Team
21 * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc. (http://ellislab.com/)
22 * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
Derek Jones98badc12010-03-02 13:08:02 -060023 * @link http://codeigniter.com
Pascal Kriete5b2d2da2010-11-04 17:23:40 -040024 * @since Version 2.0
Derek Jones98badc12010-03-02 13:08:02 -060025 * @filesource
26 */
27
28// ------------------------------------------------------------------------
29
30/**
Pascal Krieteaaec1e42011-01-20 00:01:21 -050031 * Utf8 Class
Derek Jones98badc12010-03-02 13:08:02 -060032 *
Pascal Krieteaaec1e42011-01-20 00:01:21 -050033 * Provides support for UTF-8 environments
Derek Jones98badc12010-03-02 13:08:02 -060034 *
35 * @package CodeIgniter
36 * @subpackage Libraries
Pascal Krieteaaec1e42011-01-20 00:01:21 -050037 * @category UTF-8
Derek Jonesf4a4bd82011-10-20 12:18:42 -050038 * @author EllisLab Dev Team
Pascal Krieteaaec1e42011-01-20 00:01:21 -050039 * @link http://codeigniter.com/user_guide/libraries/utf8.html
Derek Jones98badc12010-03-02 13:08:02 -060040 */
Pascal Krieteaaec1e42011-01-20 00:01:21 -050041class CI_Utf8 {
Derek Jones98badc12010-03-02 13:08:02 -060042
43 /**
44 * Constructor
Barry Mienydd671972010-10-04 16:33:58 +020045 *
Derek Jones98badc12010-03-02 13:08:02 -060046 * Determines if UTF-8 support is to be enabled
Barry Mienydd671972010-10-04 16:33:58 +020047 *
Derek Jones98badc12010-03-02 13:08:02 -060048 */
Greg Akera9263282010-11-10 15:26:43 -060049 function __construct()
Derek Jones98badc12010-03-02 13:08:02 -060050 {
Pascal Krieteaaec1e42011-01-20 00:01:21 -050051 log_message('debug', "Utf8 Class Initialized");
Barry Mienydd671972010-10-04 16:33:58 +020052
Derek Jones98badc12010-03-02 13:08:02 -060053 global $CFG;
54
55 if (
56 preg_match('/./u', 'é') === 1 // PCRE must support UTF-8
57 AND function_exists('iconv') // iconv must be installed
58 AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled
59 AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8
60 )
61 {
Pascal Krieteaaec1e42011-01-20 00:01:21 -050062 log_message('debug', "UTF-8 Support Enabled");
Barry Mienydd671972010-10-04 16:33:58 +020063
Derek Jones98badc12010-03-02 13:08:02 -060064 define('UTF8_ENABLED', TRUE);
65
66 // set internal encoding for multibyte string functions if necessary
67 // and set a flag so we don't have to repeatedly use extension_loaded()
68 // or function_exists()
69 if (extension_loaded('mbstring'))
70 {
71 define('MB_ENABLED', TRUE);
72 mb_internal_encoding('UTF-8');
73 }
74 else
75 {
76 define('MB_ENABLED', FALSE);
77 }
78 }
79 else
80 {
Pascal Krieteaaec1e42011-01-20 00:01:21 -050081 log_message('debug', "UTF-8 Support Disabled");
Derek Jones98badc12010-03-02 13:08:02 -060082 define('UTF8_ENABLED', FALSE);
Barry Mienydd671972010-10-04 16:33:58 +020083 }
Derek Jones98badc12010-03-02 13:08:02 -060084 }
Barry Mienydd671972010-10-04 16:33:58 +020085
Derek Jones98badc12010-03-02 13:08:02 -060086 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020087
Derek Jones98badc12010-03-02 13:08:02 -060088 /**
89 * Clean UTF-8 strings
90 *
91 * Ensures strings are UTF-8
92 *
93 * @access public
94 * @param string
95 * @return string
96 */
97 function clean_string($str)
98 {
99 if ($this->_is_ascii($str) === FALSE)
100 {
101 $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
102 }
Barry Mienydd671972010-10-04 16:33:58 +0200103
Derek Jones98badc12010-03-02 13:08:02 -0600104 return $str;
105 }
106
107 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200108
Derek Jones98badc12010-03-02 13:08:02 -0600109 /**
110 * Remove ASCII control characters
111 *
112 * Removes all ASCII control characters except horizontal tabs,
113 * line feeds, and carriage returns, as all others can cause
114 * problems in XML
Barry Mienydd671972010-10-04 16:33:58 +0200115 *
Derek Jones98badc12010-03-02 13:08:02 -0600116 * @access public
117 * @param string
118 * @return string
119 */
120 function safe_ascii_for_xml($str)
121 {
Pascal Kriete14a0ac62011-04-05 14:55:56 -0400122 return remove_invisible_characters($str, FALSE);
Derek Jones98badc12010-03-02 13:08:02 -0600123 }
124
125 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200126
Derek Jones98badc12010-03-02 13:08:02 -0600127 /**
128 * Convert to UTF-8
129 *
130 * Attempts to convert a string to UTF-8
131 *
132 * @access public
133 * @param string
134 * @param string - input encoding
135 * @return string
136 */
137 function convert_to_utf8($str, $encoding)
138 {
139 if (function_exists('iconv'))
140 {
141 $str = @iconv($encoding, 'UTF-8', $str);
142 }
143 elseif (function_exists('mb_convert_encoding'))
144 {
145 $str = @mb_convert_encoding($str, 'UTF-8', $encoding);
146 }
147 else
148 {
149 return FALSE;
150 }
Barry Mienydd671972010-10-04 16:33:58 +0200151
Derek Jones98badc12010-03-02 13:08:02 -0600152 return $str;
153 }
154
155 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200156
Derek Jones98badc12010-03-02 13:08:02 -0600157 /**
158 * Is ASCII?
159 *
160 * Tests if a string is standard 7-bit ASCII or not
161 *
162 * @access public
163 * @param string
164 * @return bool
165 */
166 function _is_ascii($str)
167 {
168 return (preg_match('/[^\x00-\x7F]/S', $str) == 0);
169 }
170
171 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200172
Derek Jones98badc12010-03-02 13:08:02 -0600173}
Pascal Krieteaaec1e42011-01-20 00:01:21 -0500174// End Utf8 Class
Derek Jones98badc12010-03-02 13:08:02 -0600175
Pascal Krieteaaec1e42011-01-20 00:01:21 -0500176/* End of file Utf8.php */
177/* Location: ./system/core/Utf8.php */