blob: 0e180d36fa0d921ed6f2aa6d78b0b54385a2d1d0 [file] [log] [blame]
Andrey Andreevc123e112012-01-08 00:17:34 +02001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
Derek Jones98badc12010-03-02 13:08:02 -06002/**
3 * CodeIgniter
4 *
Greg Aker741de1c2010-11-10 14:52:57 -06005 * An open source application development framework for PHP 5.1.6 or newer
Derek Jones98badc12010-03-02 13:08:02 -06006 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05007 * NOTICE OF LICENSE
Andrey Andreevc123e112012-01-08 00:17:34 +02008 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05009 * Licensed under the Open Software License version 3.0
Andrey Andreevc123e112012-01-08 00:17:34 +020010 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -050011 * This source file is subject to the Open Software License (OSL 3.0) that is
12 * bundled with this package in the files license.txt / license.rst. It is
13 * also available through the world wide web at this URL:
14 * http://opensource.org/licenses/OSL-3.0
15 * If you did not receive a copy of the license and are unable to obtain it
16 * through the world wide web, please send an email to
17 * licensing@ellislab.com so we can send you a copy immediately.
18 *
Derek Jones98badc12010-03-02 13:08:02 -060019 * @package CodeIgniter
Derek Jonesf4a4bd82011-10-20 12:18:42 -050020 * @author EllisLab Dev Team
Greg Aker0defe5d2012-01-01 18:46:41 -060021 * @copyright Copyright (c) 2008 - 2012, EllisLab, Inc. (http://ellislab.com/)
Derek Jonesf4a4bd82011-10-20 12:18:42 -050022 * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
Derek Jones98badc12010-03-02 13:08:02 -060023 * @link http://codeigniter.com
Pascal Kriete5b2d2da2010-11-04 17:23:40 -040024 * @since Version 2.0
Derek Jones98badc12010-03-02 13:08:02 -060025 * @filesource
26 */
27
28// ------------------------------------------------------------------------
29
30/**
Pascal Krieteaaec1e42011-01-20 00:01:21 -050031 * Utf8 Class
Derek Jones98badc12010-03-02 13:08:02 -060032 *
Pascal Krieteaaec1e42011-01-20 00:01:21 -050033 * Provides support for UTF-8 environments
Derek Jones98badc12010-03-02 13:08:02 -060034 *
35 * @package CodeIgniter
36 * @subpackage Libraries
Pascal Krieteaaec1e42011-01-20 00:01:21 -050037 * @category UTF-8
Derek Jonesf4a4bd82011-10-20 12:18:42 -050038 * @author EllisLab Dev Team
Pascal Krieteaaec1e42011-01-20 00:01:21 -050039 * @link http://codeigniter.com/user_guide/libraries/utf8.html
Derek Jones98badc12010-03-02 13:08:02 -060040 */
Pascal Krieteaaec1e42011-01-20 00:01:21 -050041class CI_Utf8 {
Derek Jones98badc12010-03-02 13:08:02 -060042
43 /**
44 * Constructor
Barry Mienydd671972010-10-04 16:33:58 +020045 *
Derek Jones98badc12010-03-02 13:08:02 -060046 * Determines if UTF-8 support is to be enabled
Derek Jones98badc12010-03-02 13:08:02 -060047 */
Greg Akerd2c4ec62011-12-25 22:52:57 -060048 public function __construct()
Derek Jones98badc12010-03-02 13:08:02 -060049 {
Andrey Andreevc123e112012-01-08 00:17:34 +020050 log_message('debug', 'Utf8 Class Initialized');
Barry Mienydd671972010-10-04 16:33:58 +020051
Derek Jones98badc12010-03-02 13:08:02 -060052 global $CFG;
53
54 if (
Greg Akerd2c4ec62011-12-25 22:52:57 -060055 @preg_match('/./u', 'é') === 1 // PCRE must support UTF-8
56 && function_exists('iconv') // iconv must be installed
Andrey Andreevc123e112012-01-08 00:17:34 +020057 && @ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled
58 && $CFG->item('charset') === 'UTF-8' // Application charset must be UTF-8
Derek Jones98badc12010-03-02 13:08:02 -060059 )
60 {
Derek Jones98badc12010-03-02 13:08:02 -060061 define('UTF8_ENABLED', TRUE);
Andrey Andreevc123e112012-01-08 00:17:34 +020062 log_message('debug', 'UTF-8 Support Enabled');
Derek Jones98badc12010-03-02 13:08:02 -060063
64 // set internal encoding for multibyte string functions if necessary
65 // and set a flag so we don't have to repeatedly use extension_loaded()
66 // or function_exists()
67 if (extension_loaded('mbstring'))
68 {
69 define('MB_ENABLED', TRUE);
70 mb_internal_encoding('UTF-8');
71 }
72 else
73 {
74 define('MB_ENABLED', FALSE);
75 }
76 }
77 else
78 {
Derek Jones98badc12010-03-02 13:08:02 -060079 define('UTF8_ENABLED', FALSE);
Andrey Andreevc123e112012-01-08 00:17:34 +020080 log_message('debug', 'UTF-8 Support Disabled');
Barry Mienydd671972010-10-04 16:33:58 +020081 }
Derek Jones98badc12010-03-02 13:08:02 -060082 }
Barry Mienydd671972010-10-04 16:33:58 +020083
Derek Jones98badc12010-03-02 13:08:02 -060084 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020085
Derek Jones98badc12010-03-02 13:08:02 -060086 /**
87 * Clean UTF-8 strings
88 *
89 * Ensures strings are UTF-8
90 *
Derek Jones98badc12010-03-02 13:08:02 -060091 * @param string
92 * @return string
93 */
Greg Akerd2c4ec62011-12-25 22:52:57 -060094 public function clean_string($str)
Derek Jones98badc12010-03-02 13:08:02 -060095 {
96 if ($this->_is_ascii($str) === FALSE)
97 {
98 $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
99 }
Barry Mienydd671972010-10-04 16:33:58 +0200100
Derek Jones98badc12010-03-02 13:08:02 -0600101 return $str;
102 }
103
104 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200105
Derek Jones98badc12010-03-02 13:08:02 -0600106 /**
107 * Remove ASCII control characters
108 *
109 * Removes all ASCII control characters except horizontal tabs,
110 * line feeds, and carriage returns, as all others can cause
111 * problems in XML
Barry Mienydd671972010-10-04 16:33:58 +0200112 *
Derek Jones98badc12010-03-02 13:08:02 -0600113 * @param string
114 * @return string
115 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600116 public function safe_ascii_for_xml($str)
Derek Jones98badc12010-03-02 13:08:02 -0600117 {
Pascal Kriete14a0ac62011-04-05 14:55:56 -0400118 return remove_invisible_characters($str, FALSE);
Derek Jones98badc12010-03-02 13:08:02 -0600119 }
120
121 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200122
Derek Jones98badc12010-03-02 13:08:02 -0600123 /**
124 * Convert to UTF-8
125 *
126 * Attempts to convert a string to UTF-8
127 *
Derek Jones98badc12010-03-02 13:08:02 -0600128 * @param string
129 * @param string - input encoding
130 * @return string
131 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600132 public function convert_to_utf8($str, $encoding)
Derek Jones98badc12010-03-02 13:08:02 -0600133 {
134 if (function_exists('iconv'))
135 {
Andrey Andreevc123e112012-01-08 00:17:34 +0200136 return @iconv($encoding, 'UTF-8', $str);
Derek Jones98badc12010-03-02 13:08:02 -0600137 }
138 elseif (function_exists('mb_convert_encoding'))
139 {
Andrey Andreevc123e112012-01-08 00:17:34 +0200140 return @mb_convert_encoding($str, 'UTF-8', $encoding);
Derek Jones98badc12010-03-02 13:08:02 -0600141 }
Barry Mienydd671972010-10-04 16:33:58 +0200142
Andrey Andreevc123e112012-01-08 00:17:34 +0200143 return FALSE;
Derek Jones98badc12010-03-02 13:08:02 -0600144 }
145
146 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200147
Derek Jones98badc12010-03-02 13:08:02 -0600148 /**
149 * Is ASCII?
150 *
151 * Tests if a string is standard 7-bit ASCII or not
152 *
Derek Jones98badc12010-03-02 13:08:02 -0600153 * @param string
154 * @return bool
155 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600156 protected function _is_ascii($str)
Derek Jones98badc12010-03-02 13:08:02 -0600157 {
Greg Akerd2c4ec62011-12-25 22:52:57 -0600158 return (preg_match('/[^\x00-\x7F]/S', $str) === 0);
Derek Jones98badc12010-03-02 13:08:02 -0600159 }
160
Derek Jones98badc12010-03-02 13:08:02 -0600161}
Derek Jones98badc12010-03-02 13:08:02 -0600162
Pascal Krieteaaec1e42011-01-20 00:01:21 -0500163/* End of file Utf8.php */
Andrey Andreevc123e112012-01-08 00:17:34 +0200164/* Location: ./system/core/Utf8.php */