blob: 98352db6f1dce3559f29f7aacba6c80ae8261111 [file] [log] [blame]
Andrey Andreevc5536aa2012-11-01 17:33:58 +02001<?php
Derek Jones98badc12010-03-02 13:08:02 -06002/**
3 * CodeIgniter
4 *
Phil Sturgeon07c1ac82012-03-09 17:03:37 +00005 * An open source application development framework for PHP 5.2.4 or newer
Derek Jones98badc12010-03-02 13:08:02 -06006 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05007 * NOTICE OF LICENSE
Andrey Andreevc123e112012-01-08 00:17:34 +02008 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05009 * Licensed under the Open Software License version 3.0
Andrey Andreevc123e112012-01-08 00:17:34 +020010 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -050011 * This source file is subject to the Open Software License (OSL 3.0) that is
12 * bundled with this package in the files license.txt / license.rst. It is
13 * also available through the world wide web at this URL:
14 * http://opensource.org/licenses/OSL-3.0
15 * If you did not receive a copy of the license and are unable to obtain it
16 * through the world wide web, please send an email to
17 * licensing@ellislab.com so we can send you a copy immediately.
18 *
Derek Jones98badc12010-03-02 13:08:02 -060019 * @package CodeIgniter
Derek Jonesf4a4bd82011-10-20 12:18:42 -050020 * @author EllisLab Dev Team
darwinel871754a2014-02-11 17:34:57 +010021 * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/)
Derek Jonesf4a4bd82011-10-20 12:18:42 -050022 * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
Derek Jones98badc12010-03-02 13:08:02 -060023 * @link http://codeigniter.com
Pascal Kriete5b2d2da2010-11-04 17:23:40 -040024 * @since Version 2.0
Derek Jones98badc12010-03-02 13:08:02 -060025 * @filesource
26 */
Andrey Andreevc5536aa2012-11-01 17:33:58 +020027defined('BASEPATH') OR exit('No direct script access allowed');
Derek Jones98badc12010-03-02 13:08:02 -060028
Derek Jones98badc12010-03-02 13:08:02 -060029/**
Pascal Krieteaaec1e42011-01-20 00:01:21 -050030 * Utf8 Class
Derek Jones98badc12010-03-02 13:08:02 -060031 *
Pascal Krieteaaec1e42011-01-20 00:01:21 -050032 * Provides support for UTF-8 environments
Derek Jones98badc12010-03-02 13:08:02 -060033 *
34 * @package CodeIgniter
35 * @subpackage Libraries
Pascal Krieteaaec1e42011-01-20 00:01:21 -050036 * @category UTF-8
Derek Jonesf4a4bd82011-10-20 12:18:42 -050037 * @author EllisLab Dev Team
Pascal Krieteaaec1e42011-01-20 00:01:21 -050038 * @link http://codeigniter.com/user_guide/libraries/utf8.html
Derek Jones98badc12010-03-02 13:08:02 -060039 */
Pascal Krieteaaec1e42011-01-20 00:01:21 -050040class CI_Utf8 {
Derek Jones98badc12010-03-02 13:08:02 -060041
42 /**
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030043 * Class constructor
Barry Mienydd671972010-10-04 16:33:58 +020044 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030045 * Determines if UTF-8 support is to be enabled.
Andrey Andreev92ebfb62012-05-17 12:49:24 +030046 *
47 * @return void
Derek Jones98badc12010-03-02 13:08:02 -060048 */
Greg Akerd2c4ec62011-12-25 22:52:57 -060049 public function __construct()
Derek Jones98badc12010-03-02 13:08:02 -060050 {
Derek Jones98badc12010-03-02 13:08:02 -060051 if (
Andrey Andreevbe1496d2014-02-11 22:48:45 +020052 defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8
53 && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) // iconv or mbstring must be installed
Andrey Andreeveb555ed2014-02-12 19:25:01 +020054 && strnatcasecmp(config_item('charset'), 'UTF-8') === 0 // Application charset must be UTF-8
Derek Jones98badc12010-03-02 13:08:02 -060055 )
56 {
Derek Jones98badc12010-03-02 13:08:02 -060057 define('UTF8_ENABLED', TRUE);
Andrey Andreevc123e112012-01-08 00:17:34 +020058 log_message('debug', 'UTF-8 Support Enabled');
Derek Jones98badc12010-03-02 13:08:02 -060059 }
60 else
61 {
Derek Jones98badc12010-03-02 13:08:02 -060062 define('UTF8_ENABLED', FALSE);
Andrey Andreevc123e112012-01-08 00:17:34 +020063 log_message('debug', 'UTF-8 Support Disabled');
Barry Mienydd671972010-10-04 16:33:58 +020064 }
Andrey Andreeveb555ed2014-02-12 19:25:01 +020065
66 log_message('debug', 'Utf8 Class Initialized');
Derek Jones98badc12010-03-02 13:08:02 -060067 }
Barry Mienydd671972010-10-04 16:33:58 +020068
Derek Jones98badc12010-03-02 13:08:02 -060069 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020070
Derek Jones98badc12010-03-02 13:08:02 -060071 /**
72 * Clean UTF-8 strings
73 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030074 * Ensures strings contain only valid UTF-8 characters.
Derek Jones98badc12010-03-02 13:08:02 -060075 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +030076 * @param string $str String to clean
Derek Jones98badc12010-03-02 13:08:02 -060077 * @return string
78 */
Greg Akerd2c4ec62011-12-25 22:52:57 -060079 public function clean_string($str)
Derek Jones98badc12010-03-02 13:08:02 -060080 {
Andrey Andreevcd74d362014-02-15 21:44:02 +020081 if ($this->is_ascii($str) === FALSE)
Derek Jones98badc12010-03-02 13:08:02 -060082 {
Andrey Andreevbe1496d2014-02-11 22:48:45 +020083 if (ICONV_ENABLED)
84 {
85 $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
86 }
87 elseif (MB_ENABLED)
88 {
89 $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
90 }
Derek Jones98badc12010-03-02 13:08:02 -060091 }
Barry Mienydd671972010-10-04 16:33:58 +020092
Derek Jones98badc12010-03-02 13:08:02 -060093 return $str;
94 }
95
96 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020097
Derek Jones98badc12010-03-02 13:08:02 -060098 /**
99 * Remove ASCII control characters
100 *
101 * Removes all ASCII control characters except horizontal tabs,
102 * line feeds, and carriage returns, as all others can cause
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300103 * problems in XML.
Barry Mienydd671972010-10-04 16:33:58 +0200104 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300105 * @param string $str String to clean
Derek Jones98badc12010-03-02 13:08:02 -0600106 * @return string
107 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600108 public function safe_ascii_for_xml($str)
Derek Jones98badc12010-03-02 13:08:02 -0600109 {
Pascal Kriete14a0ac62011-04-05 14:55:56 -0400110 return remove_invisible_characters($str, FALSE);
Derek Jones98badc12010-03-02 13:08:02 -0600111 }
112
113 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200114
Derek Jones98badc12010-03-02 13:08:02 -0600115 /**
116 * Convert to UTF-8
117 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300118 * Attempts to convert a string to UTF-8.
Derek Jones98badc12010-03-02 13:08:02 -0600119 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300120 * @param string $str Input string
121 * @param string $encoding Input encoding
122 * @return string $str encoded in UTF-8 or FALSE on failure
Derek Jones98badc12010-03-02 13:08:02 -0600123 */
Greg Akerd2c4ec62011-12-25 22:52:57 -0600124 public function convert_to_utf8($str, $encoding)
Derek Jones98badc12010-03-02 13:08:02 -0600125 {
Andrey Andreevbe1496d2014-02-11 22:48:45 +0200126 if (ICONV_ENABLED)
Derek Jones98badc12010-03-02 13:08:02 -0600127 {
Andrey Andreevc123e112012-01-08 00:17:34 +0200128 return @iconv($encoding, 'UTF-8', $str);
Derek Jones98badc12010-03-02 13:08:02 -0600129 }
Andrey Andreev9f44c212012-10-10 16:07:17 +0300130 elseif (MB_ENABLED === TRUE)
Derek Jones98badc12010-03-02 13:08:02 -0600131 {
Andrey Andreevc123e112012-01-08 00:17:34 +0200132 return @mb_convert_encoding($str, 'UTF-8', $encoding);
Derek Jones98badc12010-03-02 13:08:02 -0600133 }
Barry Mienydd671972010-10-04 16:33:58 +0200134
Andrey Andreevc123e112012-01-08 00:17:34 +0200135 return FALSE;
Derek Jones98badc12010-03-02 13:08:02 -0600136 }
137
138 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200139
Derek Jones98badc12010-03-02 13:08:02 -0600140 /**
141 * Is ASCII?
142 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300143 * Tests if a string is standard 7-bit ASCII or not.
Derek Jones98badc12010-03-02 13:08:02 -0600144 *
Andrey Andreev3e9d2b82012-10-27 14:28:51 +0300145 * @param string $str String to check
Derek Jones98badc12010-03-02 13:08:02 -0600146 * @return bool
147 */
Andrey Andreevcd74d362014-02-15 21:44:02 +0200148 public function is_ascii($str)
Derek Jones98badc12010-03-02 13:08:02 -0600149 {
Greg Akerd2c4ec62011-12-25 22:52:57 -0600150 return (preg_match('/[^\x00-\x7F]/S', $str) === 0);
Derek Jones98badc12010-03-02 13:08:02 -0600151 }
152
Derek Jones98badc12010-03-02 13:08:02 -0600153}
Derek Jones98badc12010-03-02 13:08:02 -0600154
Pascal Krieteaaec1e42011-01-20 00:01:21 -0500155/* End of file Utf8.php */
Timothy Warren40403d22012-04-19 16:38:50 -0400156/* Location: ./system/core/Utf8.php */