blob: 2a27d1f35e28c8caed21f17e403373b3f4f4ac0b [file] [log] [blame]
Derek Jones98badc12010-03-02 13:08:02 -06001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * CodeIgniter
4 *
Greg Aker741de1c2010-11-10 14:52:57 -06005 * An open source application development framework for PHP 5.1.6 or newer
Derek Jones98badc12010-03-02 13:08:02 -06006 *
7 * @package CodeIgniter
8 * @author ExpressionEngine Dev Team
Greg Aker0711dc82011-01-05 10:49:40 -06009 * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc.
Derek Jones98badc12010-03-02 13:08:02 -060010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
Pascal Kriete5b2d2da2010-11-04 17:23:40 -040012 * @since Version 2.0
Derek Jones98badc12010-03-02 13:08:02 -060013 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
Pascal Krieteaaec1e42011-01-20 00:01:21 -050019 * Utf8 Class
Derek Jones98badc12010-03-02 13:08:02 -060020 *
Pascal Krieteaaec1e42011-01-20 00:01:21 -050021 * Provides support for UTF-8 environments
Derek Jones98badc12010-03-02 13:08:02 -060022 *
23 * @package CodeIgniter
24 * @subpackage Libraries
Pascal Krieteaaec1e42011-01-20 00:01:21 -050025 * @category UTF-8
Derek Jones98badc12010-03-02 13:08:02 -060026 * @author ExpressionEngine Dev Team
Pascal Krieteaaec1e42011-01-20 00:01:21 -050027 * @link http://codeigniter.com/user_guide/libraries/utf8.html
Derek Jones98badc12010-03-02 13:08:02 -060028 */
Pascal Krieteaaec1e42011-01-20 00:01:21 -050029class CI_Utf8 {
Derek Jones98badc12010-03-02 13:08:02 -060030
31 /**
32 * Constructor
Barry Mienydd671972010-10-04 16:33:58 +020033 *
Derek Jones98badc12010-03-02 13:08:02 -060034 * Determines if UTF-8 support is to be enabled
Barry Mienydd671972010-10-04 16:33:58 +020035 *
Derek Jones98badc12010-03-02 13:08:02 -060036 */
Greg Akera9263282010-11-10 15:26:43 -060037 function __construct()
Derek Jones98badc12010-03-02 13:08:02 -060038 {
Pascal Krieteaaec1e42011-01-20 00:01:21 -050039 log_message('debug', "Utf8 Class Initialized");
Barry Mienydd671972010-10-04 16:33:58 +020040
Derek Jones98badc12010-03-02 13:08:02 -060041 global $CFG;
42
43 if (
44 preg_match('/./u', 'é') === 1 // PCRE must support UTF-8
45 AND function_exists('iconv') // iconv must be installed
46 AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled
47 AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8
48 )
49 {
Pascal Krieteaaec1e42011-01-20 00:01:21 -050050 log_message('debug', "UTF-8 Support Enabled");
Barry Mienydd671972010-10-04 16:33:58 +020051
Derek Jones98badc12010-03-02 13:08:02 -060052 define('UTF8_ENABLED', TRUE);
53
54 // set internal encoding for multibyte string functions if necessary
55 // and set a flag so we don't have to repeatedly use extension_loaded()
56 // or function_exists()
57 if (extension_loaded('mbstring'))
58 {
59 define('MB_ENABLED', TRUE);
60 mb_internal_encoding('UTF-8');
61 }
62 else
63 {
64 define('MB_ENABLED', FALSE);
65 }
66 }
67 else
68 {
Pascal Krieteaaec1e42011-01-20 00:01:21 -050069 log_message('debug', "UTF-8 Support Disabled");
Derek Jones98badc12010-03-02 13:08:02 -060070 define('UTF8_ENABLED', FALSE);
Barry Mienydd671972010-10-04 16:33:58 +020071 }
Derek Jones98badc12010-03-02 13:08:02 -060072 }
Barry Mienydd671972010-10-04 16:33:58 +020073
Derek Jones98badc12010-03-02 13:08:02 -060074 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020075
Derek Jones98badc12010-03-02 13:08:02 -060076 /**
77 * Clean UTF-8 strings
78 *
79 * Ensures strings are UTF-8
80 *
81 * @access public
82 * @param string
83 * @return string
84 */
85 function clean_string($str)
86 {
87 if ($this->_is_ascii($str) === FALSE)
88 {
89 $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
90 }
Barry Mienydd671972010-10-04 16:33:58 +020091
Derek Jones98badc12010-03-02 13:08:02 -060092 return $str;
93 }
94
95 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020096
Derek Jones98badc12010-03-02 13:08:02 -060097 /**
98 * Remove ASCII control characters
99 *
100 * Removes all ASCII control characters except horizontal tabs,
101 * line feeds, and carriage returns, as all others can cause
102 * problems in XML
Barry Mienydd671972010-10-04 16:33:58 +0200103 *
Derek Jones98badc12010-03-02 13:08:02 -0600104 * @access public
105 * @param string
106 * @return string
107 */
108 function safe_ascii_for_xml($str)
109 {
Pascal Kriete14a0ac62011-04-05 14:55:56 -0400110 return remove_invisible_characters($str, FALSE);
Derek Jones98badc12010-03-02 13:08:02 -0600111 }
112
113 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200114
Derek Jones98badc12010-03-02 13:08:02 -0600115 /**
116 * Convert to UTF-8
117 *
118 * Attempts to convert a string to UTF-8
119 *
120 * @access public
121 * @param string
122 * @param string - input encoding
123 * @return string
124 */
125 function convert_to_utf8($str, $encoding)
126 {
127 if (function_exists('iconv'))
128 {
129 $str = @iconv($encoding, 'UTF-8', $str);
130 }
131 elseif (function_exists('mb_convert_encoding'))
132 {
133 $str = @mb_convert_encoding($str, 'UTF-8', $encoding);
134 }
135 else
136 {
137 return FALSE;
138 }
Barry Mienydd671972010-10-04 16:33:58 +0200139
Derek Jones98badc12010-03-02 13:08:02 -0600140 return $str;
141 }
142
143 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200144
Derek Jones98badc12010-03-02 13:08:02 -0600145 /**
146 * Is ASCII?
147 *
148 * Tests if a string is standard 7-bit ASCII or not
149 *
150 * @access public
151 * @param string
152 * @return bool
153 */
154 function _is_ascii($str)
155 {
156 return (preg_match('/[^\x00-\x7F]/S', $str) == 0);
157 }
158
159 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200160
Derek Jones98badc12010-03-02 13:08:02 -0600161}
Pascal Krieteaaec1e42011-01-20 00:01:21 -0500162// End Utf8 Class
Derek Jones98badc12010-03-02 13:08:02 -0600163
Pascal Krieteaaec1e42011-01-20 00:01:21 -0500164/* End of file Utf8.php */
165/* Location: ./system/core/Utf8.php */