blob: 7e021776f60334e5f53073f6a4a1bd11a0964ca1 [file] [log] [blame]
Andrey Andreevc5536aa2012-11-01 17:33:58 +02001<?php
Derek Jones98badc12010-03-02 13:08:02 -06002/**
3 * CodeIgniter
4 *
Andrey Andreevfe9309d2015-01-09 17:48:58 +02005 * An open source application development framework for PHP
Derek Jones98badc12010-03-02 13:08:02 -06006 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +02007 * This content is released under the MIT License (MIT)
Andrey Andreevc123e112012-01-08 00:17:34 +02008 *
Andrey Andreevcce6bd12018-01-09 11:32:02 +02009 * Copyright (c) 2014 - 2018, British Columbia Institute of Technology
Andrey Andreevc123e112012-01-08 00:17:34 +020010 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020011 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
Derek Jonesf4a4bd82011-10-20 12:18:42 -050017 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020018 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 *
29 * @package CodeIgniter
30 * @author EllisLab Dev Team
Andrey Andreev1924e872016-01-11 12:55:34 +020031 * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
Andrey Andreevcce6bd12018-01-09 11:32:02 +020032 * @copyright Copyright (c) 2014 - 2018, British Columbia Institute of Technology (http://bcit.ca/)
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020033 * @license http://opensource.org/licenses/MIT MIT License
Andrey Andreevbd202c92016-01-11 12:50:18 +020034 * @link https://codeigniter.com
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020035 * @since Version 2.0.0
Derek Jones98badc12010-03-02 13:08:02 -060036 * @filesource
37 */
Andrey Andreevc5536aa2012-11-01 17:33:58 +020038defined('BASEPATH') OR exit('No direct script access allowed');
Derek Jones98badc12010-03-02 13:08:02 -060039
Derek Jones98badc12010-03-02 13:08:02 -060040/**
Pascal Krieteaaec1e42011-01-20 00:01:21 -050041 * Utf8 Class
Derek Jones98badc12010-03-02 13:08:02 -060042 *
Pascal Krieteaaec1e42011-01-20 00:01:21 -050043 * Provides support for UTF-8 environments
Derek Jones98badc12010-03-02 13:08:02 -060044 *
45 * @package CodeIgniter
46 * @subpackage Libraries
Pascal Krieteaaec1e42011-01-20 00:01:21 -050047 * @category UTF-8
Derek Jonesf4a4bd82011-10-20 12:18:42 -050048 * @author EllisLab Dev Team
Andrey Andreevbd202c92016-01-11 12:50:18 +020049 * @link https://codeigniter.com/user_guide/libraries/utf8.html
Derek Jones98badc12010-03-02 13:08:02 -060050 */
Pascal Krieteaaec1e42011-01-20 00:01:21 -050051class CI_Utf8 {
Derek Jones98badc12010-03-02 13:08:02 -060052
53 /**
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030054 * Class constructor
Barry Mienydd671972010-10-04 16:33:58 +020055 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030056 * Determines if UTF-8 support is to be enabled.
Andrey Andreev92ebfb62012-05-17 12:49:24 +030057 *
58 * @return void
Derek Jones98badc12010-03-02 13:08:02 -060059 */
Greg Akerd2c4ec62011-12-25 22:52:57 -060060 public function __construct()
Derek Jones98badc12010-03-02 13:08:02 -060061 {
Derek Jones98badc12010-03-02 13:08:02 -060062 if (
Andrey Andreevbe1496d2014-02-11 22:48:45 +020063 defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8
64 && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) // iconv or mbstring must be installed
Andrey Andreevb951f8b2014-02-15 21:45:40 +020065 && strtoupper(config_item('charset')) === 'UTF-8' // Application charset must be UTF-8
Derek Jones98badc12010-03-02 13:08:02 -060066 )
67 {
Derek Jones98badc12010-03-02 13:08:02 -060068 define('UTF8_ENABLED', TRUE);
Andrey Andreevc123e112012-01-08 00:17:34 +020069 log_message('debug', 'UTF-8 Support Enabled');
Derek Jones98badc12010-03-02 13:08:02 -060070 }
71 else
72 {
Derek Jones98badc12010-03-02 13:08:02 -060073 define('UTF8_ENABLED', FALSE);
Andrey Andreevc123e112012-01-08 00:17:34 +020074 log_message('debug', 'UTF-8 Support Disabled');
Barry Mienydd671972010-10-04 16:33:58 +020075 }
Andrey Andreeveb555ed2014-02-12 19:25:01 +020076
Andrey Andreev90726b82015-01-20 12:39:22 +020077 log_message('info', 'Utf8 Class Initialized');
Derek Jones98badc12010-03-02 13:08:02 -060078 }
Barry Mienydd671972010-10-04 16:33:58 +020079
Derek Jones98badc12010-03-02 13:08:02 -060080 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020081
Derek Jones98badc12010-03-02 13:08:02 -060082 /**
83 * Clean UTF-8 strings
84 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030085 * Ensures strings contain only valid UTF-8 characters.
Derek Jones98badc12010-03-02 13:08:02 -060086 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030087 * @param string $str String to clean
Derek Jones98badc12010-03-02 13:08:02 -060088 * @return string
89 */
Greg Akerd2c4ec62011-12-25 22:52:57 -060090 public function clean_string($str)
Derek Jones98badc12010-03-02 13:08:02 -060091 {
Andrey Andreevcd74d362014-02-15 21:44:02 +020092 if ($this->is_ascii($str) === FALSE)
Derek Jones98badc12010-03-02 13:08:02 -060093 {
Andrey Andreevbb3edf12014-02-20 17:51:41 +020094 if (MB_ENABLED)
Andrey Andreevbe1496d2014-02-11 22:48:45 +020095 {
96 $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
97 }
Andrey Andreevbb3edf12014-02-20 17:51:41 +020098 elseif (ICONV_ENABLED)
99 {
100 $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
101 }
Derek Jones98badc12010-03-02 13:08:02 -0600102 }
Barry Mienydd671972010-10-04 16:33:58 +0200103
Derek Jones98badc12010-03-02 13:08:02 -0600104 return $str;
105 }
106
107 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200108
Derek Jones98badc12010-03-02 13:08:02 -0600109 /**
110 * Remove ASCII control characters
111 *
112 * Removes all ASCII control characters except horizontal tabs,
113 * line feeds, and carriage returns, as all others can cause
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300114 * problems in XML.
Barry Mienydd671972010-10-04 16:33:58 +0200115 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300116 * @param string $str String to clean
Derek Jones98badc12010-03-02 13:08:02 -0600117 * @return string
118 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600119 public function safe_ascii_for_xml($str)
Derek Jones98badc12010-03-02 13:08:02 -0600120 {
Pascal Kriete14a0ac62011-04-05 14:55:56 -0400121 return remove_invisible_characters($str, FALSE);
Derek Jones98badc12010-03-02 13:08:02 -0600122 }
123
124 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200125
Derek Jones98badc12010-03-02 13:08:02 -0600126 /**
127 * Convert to UTF-8
128 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300129 * Attempts to convert a string to UTF-8.
Derek Jones98badc12010-03-02 13:08:02 -0600130 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300131 * @param string $str Input string
132 * @param string $encoding Input encoding
133 * @return string $str encoded in UTF-8 or FALSE on failure
Derek Jones98badc12010-03-02 13:08:02 -0600134 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600135 public function convert_to_utf8($str, $encoding)
Derek Jones98badc12010-03-02 13:08:02 -0600136 {
Andrey Andreevbb3edf12014-02-20 17:51:41 +0200137 if (MB_ENABLED)
138 {
139 return mb_convert_encoding($str, 'UTF-8', $encoding);
140 }
141 elseif (ICONV_ENABLED)
Derek Jones98badc12010-03-02 13:08:02 -0600142 {
Andrey Andreevc123e112012-01-08 00:17:34 +0200143 return @iconv($encoding, 'UTF-8', $str);
Derek Jones98badc12010-03-02 13:08:02 -0600144 }
Barry Mienydd671972010-10-04 16:33:58 +0200145
Andrey Andreevc123e112012-01-08 00:17:34 +0200146 return FALSE;
Derek Jones98badc12010-03-02 13:08:02 -0600147 }
148
149 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200150
Derek Jones98badc12010-03-02 13:08:02 -0600151 /**
152 * Is ASCII?
153 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300154 * Tests if a string is standard 7-bit ASCII or not.
Derek Jones98badc12010-03-02 13:08:02 -0600155 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300156 * @param string $str String to check
Derek Jones98badc12010-03-02 13:08:02 -0600157 * @return bool
158 */
Andrey Andreevcd74d362014-02-15 21:44:02 +0200159 public function is_ascii($str)
Derek Jones98badc12010-03-02 13:08:02 -0600160 {
Greg Akerd2c4ec62011-12-25 22:52:57 -0600161 return (preg_match('/[^\x00-\x7F]/S', $str) === 0);
Derek Jones98badc12010-03-02 13:08:02 -0600162 }
163
Derek Jones98badc12010-03-02 13:08:02 -0600164}