blob: bc7afed910baf3805229cab9c8adba75803476db [file] [log] [blame]
Andrey Andreevc123e112012-01-08 00:17:34 +02001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
Derek Jones98badc12010-03-02 13:08:02 -06002/**
3 * CodeIgniter
4 *
Phil Sturgeon07c1ac82012-03-09 17:03:37 +00005 * An open source application development framework for PHP 5.2.4 or newer
Derek Jones98badc12010-03-02 13:08:02 -06006 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05007 * NOTICE OF LICENSE
Andrey Andreevc123e112012-01-08 00:17:34 +02008 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05009 * Licensed under the Open Software License version 3.0
Andrey Andreevc123e112012-01-08 00:17:34 +020010 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -050011 * This source file is subject to the Open Software License (OSL 3.0) that is
12 * bundled with this package in the files license.txt / license.rst. It is
13 * also available through the world wide web at this URL:
14 * http://opensource.org/licenses/OSL-3.0
15 * If you did not receive a copy of the license and are unable to obtain it
16 * through the world wide web, please send an email to
17 * licensing@ellislab.com so we can send you a copy immediately.
18 *
Derek Jones98badc12010-03-02 13:08:02 -060019 * @package CodeIgniter
Derek Jonesf4a4bd82011-10-20 12:18:42 -050020 * @author EllisLab Dev Team
Greg Aker0defe5d2012-01-01 18:46:41 -060021 * @copyright Copyright (c) 2008 - 2012, EllisLab, Inc. (http://ellislab.com/)
Derek Jonesf4a4bd82011-10-20 12:18:42 -050022 * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
Derek Jones98badc12010-03-02 13:08:02 -060023 * @link http://codeigniter.com
Pascal Kriete5b2d2da2010-11-04 17:23:40 -040024 * @since Version 2.0
Derek Jones98badc12010-03-02 13:08:02 -060025 * @filesource
26 */
27
Derek Jones98badc12010-03-02 13:08:02 -060028/**
Pascal Krieteaaec1e42011-01-20 00:01:21 -050029 * Utf8 Class
Derek Jones98badc12010-03-02 13:08:02 -060030 *
Pascal Krieteaaec1e42011-01-20 00:01:21 -050031 * Provides support for UTF-8 environments
Derek Jones98badc12010-03-02 13:08:02 -060032 *
33 * @package CodeIgniter
34 * @subpackage Libraries
Pascal Krieteaaec1e42011-01-20 00:01:21 -050035 * @category UTF-8
Derek Jonesf4a4bd82011-10-20 12:18:42 -050036 * @author EllisLab Dev Team
Pascal Krieteaaec1e42011-01-20 00:01:21 -050037 * @link http://codeigniter.com/user_guide/libraries/utf8.html
Derek Jones98badc12010-03-02 13:08:02 -060038 */
Pascal Krieteaaec1e42011-01-20 00:01:21 -050039class CI_Utf8 {
Derek Jones98badc12010-03-02 13:08:02 -060040
41 /**
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030042 * Class constructor
Barry Mienydd671972010-10-04 16:33:58 +020043 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030044 * Determines if UTF-8 support is to be enabled.
Andrey Andreev92ebfb62012-05-17 12:49:24 +030045 *
46 * @return void
Derek Jones98badc12010-03-02 13:08:02 -060047 */
Greg Akerd2c4ec62011-12-25 22:52:57 -060048 public function __construct()
Derek Jones98badc12010-03-02 13:08:02 -060049 {
Andrey Andreevc123e112012-01-08 00:17:34 +020050 log_message('debug', 'Utf8 Class Initialized');
Barry Mienydd671972010-10-04 16:33:58 +020051
Andrey Andreev6c5f7512012-10-10 15:56:18 +030052 $charset = strtoupper(config_item('charset'));
53
54 // set internal encoding for multibyte string functions if necessary
55 // and set a flag so we don't have to repeatedly use extension_loaded()
56 // or function_exists()
57 if (extension_loaded('mbstring'))
58 {
59 define('MB_ENABLED', TRUE);
60 mb_internal_encoding($charset);
61 }
62 else
63 {
64 define('MB_ENABLED', FALSE);
65 }
66
Derek Jones98badc12010-03-02 13:08:02 -060067
68 if (
Andrey Andreev6c5f7512012-10-10 15:56:18 +030069 @preg_match('/./u', 'é') === 1 // PCRE must support UTF-8
70 && function_exists('iconv') // iconv must be installed
71 && MB_ENABLED === TRUE // mbstring must be enabled
72 && $charset === 'UTF-8' // Application charset must be UTF-8
Derek Jones98badc12010-03-02 13:08:02 -060073 )
74 {
Derek Jones98badc12010-03-02 13:08:02 -060075 define('UTF8_ENABLED', TRUE);
Andrey Andreevc123e112012-01-08 00:17:34 +020076 log_message('debug', 'UTF-8 Support Enabled');
Derek Jones98badc12010-03-02 13:08:02 -060077 }
78 else
79 {
Derek Jones98badc12010-03-02 13:08:02 -060080 define('UTF8_ENABLED', FALSE);
Andrey Andreevc123e112012-01-08 00:17:34 +020081 log_message('debug', 'UTF-8 Support Disabled');
Barry Mienydd671972010-10-04 16:33:58 +020082 }
Derek Jones98badc12010-03-02 13:08:02 -060083 }
Barry Mienydd671972010-10-04 16:33:58 +020084
Derek Jones98badc12010-03-02 13:08:02 -060085 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020086
Derek Jones98badc12010-03-02 13:08:02 -060087 /**
88 * Clean UTF-8 strings
89 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030090 * Ensures strings contain only valid UTF-8 characters.
Derek Jones98badc12010-03-02 13:08:02 -060091 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030092 * @uses CI_Utf8::_is_ascii() Decide whether a conversion is needed
93 *
94 * @param string $str String to clean
Derek Jones98badc12010-03-02 13:08:02 -060095 * @return string
96 */
Greg Akerd2c4ec62011-12-25 22:52:57 -060097 public function clean_string($str)
Derek Jones98badc12010-03-02 13:08:02 -060098 {
99 if ($this->_is_ascii($str) === FALSE)
100 {
101 $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
102 }
Barry Mienydd671972010-10-04 16:33:58 +0200103
Derek Jones98badc12010-03-02 13:08:02 -0600104 return $str;
105 }
106
107 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200108
Derek Jones98badc12010-03-02 13:08:02 -0600109 /**
110 * Remove ASCII control characters
111 *
112 * Removes all ASCII control characters except horizontal tabs,
113 * line feeds, and carriage returns, as all others can cause
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300114 * problems in XML.
Barry Mienydd671972010-10-04 16:33:58 +0200115 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300116 * @param string $str String to clean
Derek Jones98badc12010-03-02 13:08:02 -0600117 * @return string
118 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600119 public function safe_ascii_for_xml($str)
Derek Jones98badc12010-03-02 13:08:02 -0600120 {
Pascal Kriete14a0ac62011-04-05 14:55:56 -0400121 return remove_invisible_characters($str, FALSE);
Derek Jones98badc12010-03-02 13:08:02 -0600122 }
123
124 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200125
Derek Jones98badc12010-03-02 13:08:02 -0600126 /**
127 * Convert to UTF-8
128 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300129 * Attempts to convert a string to UTF-8.
Derek Jones98badc12010-03-02 13:08:02 -0600130 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300131 * @param string $str Input string
132 * @param string $encoding Input encoding
133 * @return string $str encoded in UTF-8 or FALSE on failure
Derek Jones98badc12010-03-02 13:08:02 -0600134 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600135 public function convert_to_utf8($str, $encoding)
Derek Jones98badc12010-03-02 13:08:02 -0600136 {
137 if (function_exists('iconv'))
138 {
Andrey Andreevc123e112012-01-08 00:17:34 +0200139 return @iconv($encoding, 'UTF-8', $str);
Derek Jones98badc12010-03-02 13:08:02 -0600140 }
Andrey Andreev9f44c212012-10-10 16:07:17 +0300141 elseif (MB_ENABLED === TRUE)
Derek Jones98badc12010-03-02 13:08:02 -0600142 {
Andrey Andreevc123e112012-01-08 00:17:34 +0200143 return @mb_convert_encoding($str, 'UTF-8', $encoding);
Derek Jones98badc12010-03-02 13:08:02 -0600144 }
Barry Mienydd671972010-10-04 16:33:58 +0200145
Andrey Andreevc123e112012-01-08 00:17:34 +0200146 return FALSE;
Derek Jones98badc12010-03-02 13:08:02 -0600147 }
148
149 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200150
Derek Jones98badc12010-03-02 13:08:02 -0600151 /**
152 * Is ASCII?
153 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300154 * Tests if a string is standard 7-bit ASCII or not.
Derek Jones98badc12010-03-02 13:08:02 -0600155 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300156 * @param string $str String to check
Derek Jones98badc12010-03-02 13:08:02 -0600157 * @return bool
158 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600159 protected function _is_ascii($str)
Derek Jones98badc12010-03-02 13:08:02 -0600160 {
Greg Akerd2c4ec62011-12-25 22:52:57 -0600161 return (preg_match('/[^\x00-\x7F]/S', $str) === 0);
Derek Jones98badc12010-03-02 13:08:02 -0600162 }
163
Derek Jones98badc12010-03-02 13:08:02 -0600164}
Derek Jones98badc12010-03-02 13:08:02 -0600165
Pascal Krieteaaec1e42011-01-20 00:01:21 -0500166/* End of file Utf8.php */
Timothy Warren40403d22012-04-19 16:38:50 -0400167/* Location: ./system/core/Utf8.php */