blob: ade77491d5d80430ed614aaae4f1d0cac5ed154f [file] [log] [blame]
Andrey Andreevc5536aa2012-11-01 17:33:58 +02001<?php
Derek Jonese701d762010-03-02 18:17:01 -06002/**
3 * CodeIgniter
4 *
Andrey Andreevfe9309d2015-01-09 17:48:58 +02005 * An open source application development framework for PHP
Derek Jonese701d762010-03-02 18:17:01 -06006 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +02007 * This content is released under the MIT License (MIT)
Andrey Andreevbb488dc2012-01-07 23:35:16 +02008 *
Andrey Andreevfe9309d2015-01-09 17:48:58 +02009 * Copyright (c) 2014 - 2015, British Columbia Institute of Technology
Andrey Andreevbb488dc2012-01-07 23:35:16 +020010 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020011 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
Derek Jonesf4a4bd82011-10-20 12:18:42 -050017 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020018 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 *
29 * @package CodeIgniter
30 * @author EllisLab Dev Team
darwinel871754a2014-02-11 17:34:57 +010031 * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/)
Andrey Andreevfe9309d2015-01-09 17:48:58 +020032 * @copyright Copyright (c) 2014 - 2015, British Columbia Institute of Technology (http://bcit.ca/)
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020033 * @license http://opensource.org/licenses/MIT MIT License
34 * @link http://codeigniter.com
35 * @since Version 1.0.0
Derek Jonese701d762010-03-02 18:17:01 -060036 * @filesource
37 */
Andrey Andreevc5536aa2012-11-01 17:33:58 +020038defined('BASEPATH') OR exit('No direct script access allowed');
Derek Jonese701d762010-03-02 18:17:01 -060039
Derek Jonese701d762010-03-02 18:17:01 -060040/**
41 * Security Class
42 *
43 * @package CodeIgniter
44 * @subpackage Libraries
45 * @category Security
Derek Jonesf4a4bd82011-10-20 12:18:42 -050046 * @author EllisLab Dev Team
Pascal Krietec9c045a2011-04-05 14:50:41 -040047 * @link http://codeigniter.com/user_guide/libraries/security.html
Derek Jonese701d762010-03-02 18:17:01 -060048 */
49class CI_Security {
Barry Mienydd671972010-10-04 16:33:58 +020050
David Behler07b53422011-08-15 00:25:06 +020051 /**
Hunter Wua8d6d3b2013-08-03 23:17:45 +080052 * List of sanitize filename strings
53 *
54 * @var array
55 */
Hunter Wu4495cc72013-08-04 12:31:52 +080056 public $filename_bad_chars = array(
Hunter Wua8d6d3b2013-08-03 23:17:45 +080057 '../', '<!--', '-->', '<', '>',
58 "'", '"', '&', '$', '#',
59 '{', '}', '[', ']', '=',
60 ';', '?', '%20', '%22',
61 '%3c', // <
62 '%253c', // <
63 '%3e', // >
64 '%0e', // >
65 '%28', // (
66 '%29', // )
67 '%2528', // (
68 '%26', // &
69 '%24', // $
70 '%3f', // ?
71 '%3b', // ;
72 '%3d' // =
73 );
74
75 /**
Andrey Andreev487d1ae2014-05-23 14:41:32 +030076 * Character set
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020077 *
Claudio Galdiolo325d22d2015-01-29 11:41:17 -050078 * Will be overridden by the constructor.
Andrey Andreev487d1ae2014-05-23 14:41:32 +030079 *
80 * @var string
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020081 */
Andrey Andreev487d1ae2014-05-23 14:41:32 +030082 public $charset = 'UTF-8';
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020083
84 /**
Andrey Andreev64354102012-10-28 14:16:02 +020085 * XSS Hash
David Behler07b53422011-08-15 00:25:06 +020086 *
Andrey Andreev64354102012-10-28 14:16:02 +020087 * Random Hash for protecting URLs.
88 *
89 * @var string
David Behler07b53422011-08-15 00:25:06 +020090 */
Andrey Andreev487ccc92014-08-27 16:26:23 +030091 protected $_xss_hash;
Andrey Andreev3d113bd2011-10-05 00:03:20 +030092
David Behler07b53422011-08-15 00:25:06 +020093 /**
Andrey Andreev64354102012-10-28 14:16:02 +020094 * CSRF Hash
David Behler07b53422011-08-15 00:25:06 +020095 *
Andrey Andreev64354102012-10-28 14:16:02 +020096 * Random hash for Cross Site Request Forgery protection cookie
97 *
98 * @var string
David Behler07b53422011-08-15 00:25:06 +020099 */
Andrey Andreev487ccc92014-08-27 16:26:23 +0300100 protected $_csrf_hash;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300101
David Behler07b53422011-08-15 00:25:06 +0200102 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200103 * CSRF Expire time
David Behler07b53422011-08-15 00:25:06 +0200104 *
Andrey Andreev64354102012-10-28 14:16:02 +0200105 * Expiration time for Cross Site Request Forgery protection cookie.
106 * Defaults to two hours (in seconds).
107 *
108 * @var int
David Behler07b53422011-08-15 00:25:06 +0200109 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400110 protected $_csrf_expire = 7200;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300111
David Behler07b53422011-08-15 00:25:06 +0200112 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200113 * CSRF Token name
David Behler07b53422011-08-15 00:25:06 +0200114 *
Andrey Andreev64354102012-10-28 14:16:02 +0200115 * Token name for Cross Site Request Forgery protection cookie.
116 *
117 * @var string
David Behler07b53422011-08-15 00:25:06 +0200118 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400119 protected $_csrf_token_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300120
David Behler07b53422011-08-15 00:25:06 +0200121 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200122 * CSRF Cookie name
David Behler07b53422011-08-15 00:25:06 +0200123 *
Andrey Andreev64354102012-10-28 14:16:02 +0200124 * Cookie name for Cross Site Request Forgery protection cookie.
125 *
126 * @var string
David Behler07b53422011-08-15 00:25:06 +0200127 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400128 protected $_csrf_cookie_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300129
David Behler07b53422011-08-15 00:25:06 +0200130 /**
131 * List of never allowed strings
132 *
Andrey Andreev64354102012-10-28 14:16:02 +0200133 * @var array
David Behler07b53422011-08-15 00:25:06 +0200134 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400135 protected $_never_allowed_str = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400136 'document.cookie' => '[removed]',
137 'document.write' => '[removed]',
138 '.parentNode' => '[removed]',
139 '.innerHTML' => '[removed]',
Timothy Warren40403d22012-04-19 16:38:50 -0400140 '-moz-binding' => '[removed]',
141 '<!--' => '&lt;!--',
142 '-->' => '--&gt;',
143 '<![CDATA[' => '&lt;![CDATA[',
144 '<comment>' => '&lt;comment&gt;'
145 );
Derek Jonese701d762010-03-02 18:17:01 -0600146
David Behler07b53422011-08-15 00:25:06 +0200147 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200148 * List of never allowed regex replacements
David Behler07b53422011-08-15 00:25:06 +0200149 *
Andrey Andreev64354102012-10-28 14:16:02 +0200150 * @var array
David Behler07b53422011-08-15 00:25:06 +0200151 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400152 protected $_never_allowed_regex = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400153 'javascript\s*:',
Andrey Andreev1bbc5642014-01-07 12:45:27 +0200154 '(document|(document\.)?window)\.(location|on\w*)',
Timothy Warren40403d22012-04-19 16:38:50 -0400155 'expression\s*(\(|&\#40;)', // CSS and IE
156 'vbscript\s*:', // IE, surprise!
Andrey Andreeva30a7172014-02-10 09:17:25 +0200157 'wscript\s*:', // IE
Andrey Andreevf7f9dca2014-02-10 12:41:00 +0200158 'jscript\s*:', // IE
Andrey Andreeva30a7172014-02-10 09:17:25 +0200159 'vbs\s*:', // IE
Andrey Andreev43568062014-01-21 23:52:31 +0200160 'Redirect\s+30\d',
Wes Bakerd3481352012-05-07 16:49:33 -0400161 "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
Timothy Warren40403d22012-04-19 16:38:50 -0400162 );
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300163
Timothy Warrenad475052012-04-19 13:21:06 -0400164 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200165 * Class constructor
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300166 *
167 * @return void
Timothy Warrenad475052012-04-19 13:21:06 -0400168 */
Greg Akera9263282010-11-10 15:26:43 -0600169 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -0600170 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200171 // Is CSRF protection enabled?
Andrey Andreevd444d442014-10-06 00:00:08 +0300172 if (config_item('csrf_protection'))
patworkef1a55a2011-04-09 13:04:06 +0200173 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200174 // CSRF config
175 foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
patworkef1a55a2011-04-09 13:04:06 +0200176 {
Andrey Andreev7cc3e992014-10-06 08:04:44 +0300177 if (NULL !== ($val = config_item($key)))
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200178 {
179 $this->{'_'.$key} = $val;
180 }
patworkef1a55a2011-04-09 13:04:06 +0200181 }
patworkef1a55a2011-04-09 13:04:06 +0200182
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200183 // Append application specific cookie prefix
Andrey Andreevd444d442014-10-06 00:00:08 +0300184 if ($cookie_prefix = config_item('cookie_prefix'))
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200185 {
Andrey Andreevd444d442014-10-06 00:00:08 +0300186 $this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200187 }
Derek Jonesb3f10a22010-07-25 19:11:26 -0500188
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200189 // Set the CSRF hash
190 $this->_csrf_set_hash();
191 }
Derek Allard958543a2010-07-22 14:10:26 -0400192
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300193 $this->charset = strtoupper(config_item('charset'));
194
Andrey Andreev90726b82015-01-20 12:39:22 +0200195 log_message('info', 'Security Class Initialized');
Derek Jonese701d762010-03-02 18:17:01 -0600196 }
197
198 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200199
Derek Jonese701d762010-03-02 18:17:01 -0600200 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200201 * CSRF Verify
Derek Jonese701d762010-03-02 18:17:01 -0600202 *
Andrew Podner4296a652012-12-17 07:51:15 -0500203 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600204 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500205 public function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -0400206 {
Andrey Andreev5d27c432012-03-08 12:01:52 +0200207 // If it's not a POST request we will set the CSRF cookie
208 if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
Derek Jonese701d762010-03-02 18:17:01 -0600209 {
210 return $this->csrf_set_cookie();
211 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300212
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100213 // Check if URI has been whitelisted from CSRF checks
214 if ($exclude_uris = config_item('csrf_exclude_uris'))
215 {
216 $uri = load_class('URI', 'core');
caseyh5ac7c772014-08-18 05:10:24 -0400217 foreach ($exclude_uris as $excluded)
218 {
Andrey Andreev6c520962014-08-18 12:24:42 +0300219 if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
220 {
221 return $this;
222 }
223 }
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100224 }
Derek Jonese701d762010-03-02 18:17:01 -0600225
226 // Do the tokens exist in both the _POST and _COOKIE arrays?
Andrey Andreevf795ab52012-10-24 21:28:25 +0300227 if ( ! isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
Alex Bilbieed944a32012-06-02 11:07:47 +0100228 OR $_POST[$this->_csrf_token_name] !== $_COOKIE[$this->_csrf_cookie_name]) // Do the tokens match?
Derek Jonese701d762010-03-02 18:17:01 -0600229 {
230 $this->csrf_show_error();
231 }
232
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200233 // We kill this since we're done and we don't want to polute the _POST array
Pascal Krietec9c045a2011-04-05 14:50:41 -0400234 unset($_POST[$this->_csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +0200235
RS712be25a62011-12-31 16:02:04 -0200236 // Regenerate on every submission?
237 if (config_item('csrf_regenerate'))
238 {
239 // Nothing should last forever
240 unset($_COOKIE[$this->_csrf_cookie_name]);
Andrey Andreev487ccc92014-08-27 16:26:23 +0300241 $this->_csrf_hash = NULL;
RS712be25a62011-12-31 16:02:04 -0200242 }
Andrey Andreev8a7d0782012-01-08 05:43:42 +0200243
Derek Jonesb3f10a22010-07-25 19:11:26 -0500244 $this->_csrf_set_hash();
245 $this->csrf_set_cookie();
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300246
Andrey Andreev90726b82015-01-20 12:39:22 +0200247 log_message('info', 'CSRF token verified');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400248 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600249 }
Barry Mienydd671972010-10-04 16:33:58 +0200250
Derek Jonese701d762010-03-02 18:17:01 -0600251 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200252
Derek Jonese701d762010-03-02 18:17:01 -0600253 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200254 * CSRF Set Cookie
Derek Jonese701d762010-03-02 18:17:01 -0600255 *
Taufan Aditya6c7526c2012-05-27 13:51:27 +0700256 * @codeCoverageIgnore
Andrew Podner4296a652012-12-17 07:51:15 -0500257 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600258 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500259 public function csrf_set_cookie()
Derek Jonese701d762010-03-02 18:17:01 -0600260 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400261 $expire = time() + $this->_csrf_expire;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300262 $secure_cookie = (bool) config_item('cookie_secure');
Derek Jonese701d762010-03-02 18:17:01 -0600263
Andrey Andreev3fb02672012-10-22 16:48:01 +0300264 if ($secure_cookie && ! is_https())
Derek Jonese701d762010-03-02 18:17:01 -0600265 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200266 return FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600267 }
Derek Allard958543a2010-07-22 14:10:26 -0400268
freewil4ad0fd82012-03-13 22:37:42 -0400269 setcookie(
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300270 $this->_csrf_cookie_name,
271 $this->_csrf_hash,
272 $expire,
273 config_item('cookie_path'),
274 config_item('cookie_domain'),
freewil4ad0fd82012-03-13 22:37:42 -0400275 $secure_cookie,
276 config_item('cookie_httponly')
277 );
Mohammad Sadegh Dehghan Niri7881fd12015-07-15 17:48:57 +0430278 log_message('info', 'CSRF cookie sent');
David Behler07b53422011-08-15 00:25:06 +0200279
Pascal Krietec9c045a2011-04-05 14:50:41 -0400280 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600281 }
282
283 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200284
Derek Jonese701d762010-03-02 18:17:01 -0600285 /**
286 * Show CSRF Error
287 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400288 * @return void
Derek Jonese701d762010-03-02 18:17:01 -0600289 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500290 public function csrf_show_error()
Derek Jonese701d762010-03-02 18:17:01 -0600291 {
Kyle Valade05fcc092014-07-06 13:43:20 -0700292 show_error('The action you have requested is not allowed.', 403);
Derek Jonese701d762010-03-02 18:17:01 -0600293 }
294
295 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200296
Derek Jonese701d762010-03-02 18:17:01 -0600297 /**
David Behler07b53422011-08-15 00:25:06 +0200298 * Get CSRF Hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400299 *
Andrey Andreev64354102012-10-28 14:16:02 +0200300 * @see CI_Security::$_csrf_hash
301 * @return string CSRF hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400302 */
303 public function get_csrf_hash()
304 {
305 return $this->_csrf_hash;
306 }
307
308 // --------------------------------------------------------------------
309
310 /**
311 * Get CSRF Token Name
312 *
Andrey Andreev64354102012-10-28 14:16:02 +0200313 * @see CI_Security::$_csrf_token_name
314 * @return string CSRF token name
Pascal Krietec9c045a2011-04-05 14:50:41 -0400315 */
316 public function get_csrf_token_name()
317 {
318 return $this->_csrf_token_name;
319 }
320
321 // --------------------------------------------------------------------
322
323 /**
Derek Jonese701d762010-03-02 18:17:01 -0600324 * XSS Clean
325 *
326 * Sanitizes data so that Cross Site Scripting Hacks can be
Andrey Andreev64354102012-10-28 14:16:02 +0200327 * prevented. This method does a fair amount of work but
Derek Jonese701d762010-03-02 18:17:01 -0600328 * it is extremely thorough, designed to prevent even the
Derek Jones37f4b9c2011-07-01 17:56:50 -0500329 * most obscure XSS attempts. Nothing is ever 100% foolproof,
Derek Jonese701d762010-03-02 18:17:01 -0600330 * of course, but I haven't been able to get anything passed
331 * the filter.
332 *
Andrey Andreev64354102012-10-28 14:16:02 +0200333 * Note: Should only be used to deal with data upon submission.
334 * It's not something that should be used for general
335 * runtime processing.
Derek Jonese701d762010-03-02 18:17:01 -0600336 *
Andrey Andreev64354102012-10-28 14:16:02 +0200337 * @link http://channel.bitflux.ch/wiki/XSS_Prevention
338 * Based in part on some code and ideas from Bitflux.
Derek Jonese701d762010-03-02 18:17:01 -0600339 *
Andrey Andreev64354102012-10-28 14:16:02 +0200340 * @link http://ha.ckers.org/xss.html
341 * To help develop this script I used this great list of
342 * vulnerabilities along with a few other hacks I've
343 * harvested from examining vulnerabilities in other programs.
Derek Jonese701d762010-03-02 18:17:01 -0600344 *
Andrey Andreev64354102012-10-28 14:16:02 +0200345 * @param string|string[] $str Input data
346 * @param bool $is_image Whether the input is an image
Derek Jonese701d762010-03-02 18:17:01 -0600347 * @return string
348 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500349 public function xss_clean($str, $is_image = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600350 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200351 // Is the string an array?
Derek Jonese701d762010-03-02 18:17:01 -0600352 if (is_array($str))
353 {
354 while (list($key) = each($str))
355 {
356 $str[$key] = $this->xss_clean($str[$key]);
357 }
Barry Mienydd671972010-10-04 16:33:58 +0200358
Derek Jonese701d762010-03-02 18:17:01 -0600359 return $str;
360 }
361
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300362 // Remove Invisible Characters
363 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600364
365 /*
366 * URL Decode
367 *
368 * Just in case stuff like this is submitted:
369 *
370 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
371 *
372 * Note: Use rawurldecode() so it does not remove plus signs
Derek Jonese701d762010-03-02 18:17:01 -0600373 */
Andrey Andreev29e12642014-02-10 13:24:44 +0200374 do
375 {
376 $str = rawurldecode($str);
377 }
378 while (preg_match('/%[0-9a-f]{2,}/i', $str));
Barry Mienydd671972010-10-04 16:33:58 +0200379
Derek Jonese701d762010-03-02 18:17:01 -0600380 /*
Barry Mienydd671972010-10-04 16:33:58 +0200381 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600382 *
383 * This permits our tests below to work reliably.
384 * We only convert entities that are within tags since
385 * these are the ones that will pose security problems.
Derek Jonese701d762010-03-02 18:17:01 -0600386 */
Andrey Andreev9b8286c2014-08-05 11:46:57 +0300387 $str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
brian97807ccbe52012-12-11 20:24:12 +0200388 $str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600389
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200390 // Remove Invisible Characters Again!
Greg Aker757dda62010-04-14 19:06:19 -0500391 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200392
Derek Jonese701d762010-03-02 18:17:01 -0600393 /*
394 * Convert all tabs to spaces
395 *
396 * This prevents strings like this: ja vascript
397 * NOTE: we deal with spaces between characters later.
David Behler07b53422011-08-15 00:25:06 +0200398 * NOTE: preg_replace was found to be amazingly slow here on
Pascal Krietec9c045a2011-04-05 14:50:41 -0400399 * large blocks of data, so we use str_replace.
Derek Jonese701d762010-03-02 18:17:01 -0600400 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200401 $str = str_replace("\t", ' ', $str);
Barry Mienydd671972010-10-04 16:33:58 +0200402
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200403 // Capture converted string for later comparison
Derek Jonese701d762010-03-02 18:17:01 -0600404 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200405
Pascal Krietec9c045a2011-04-05 14:50:41 -0400406 // Remove Strings that are never allowed
407 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600408
409 /*
410 * Makes PHP tags safe
411 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400412 * Note: XML tags are inadvertently replaced too:
Derek Jonese701d762010-03-02 18:17:01 -0600413 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400414 * <?xml
Derek Jonese701d762010-03-02 18:17:01 -0600415 *
416 * But it doesn't seem to pose a problem.
Derek Jonese701d762010-03-02 18:17:01 -0600417 */
418 if ($is_image === TRUE)
419 {
David Behler07b53422011-08-15 00:25:06 +0200420 // Images have a tendency to have the PHP short opening and
421 // closing tags every so often so we skip those and only
Pascal Krietec9c045a2011-04-05 14:50:41 -0400422 // do the long opening tags.
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200423 $str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600424 }
425 else
426 {
Andrey Andreev838a9d62012-12-03 14:37:47 +0200427 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600428 }
Barry Mienydd671972010-10-04 16:33:58 +0200429
Derek Jonese701d762010-03-02 18:17:01 -0600430 /*
431 * Compact any exploded words
432 *
Derek Jones37f4b9c2011-07-01 17:56:50 -0500433 * This corrects words like: j a v a s c r i p t
Derek Jonese701d762010-03-02 18:17:01 -0600434 * These words are compacted back to their correct state.
Derek Jonese701d762010-03-02 18:17:01 -0600435 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400436 $words = array(
Andrey Andreeva30a7172014-02-10 09:17:25 +0200437 'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
438 'vbs', 'script', 'base64', 'applet', 'alert', 'document',
439 'write', 'cookie', 'window', 'confirm', 'prompt'
Timothy Warren40403d22012-04-19 16:38:50 -0400440 );
David Behler07b53422011-08-15 00:25:06 +0200441
Derek Jonese701d762010-03-02 18:17:01 -0600442 foreach ($words as $word)
443 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200444 $word = implode('\s*', str_split($word)).'\s*';
Derek Jonese701d762010-03-02 18:17:01 -0600445
446 // We only want to do this when it is followed by a non-word character
447 // That way valid stuff like "dealer to" does not become "dealerto"
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300448 $str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600449 }
Barry Mienydd671972010-10-04 16:33:58 +0200450
Derek Jonese701d762010-03-02 18:17:01 -0600451 /*
452 * Remove disallowed Javascript in links or img tags
Andrey Andreevf9a615a2014-09-28 20:24:06 +0300453 * We used to do some version comparisons and use of stripos(),
David Behler07b53422011-08-15 00:25:06 +0200454 * but it is dog slow compared to these simplified non-capturing
Pascal Krietec9c045a2011-04-05 14:50:41 -0400455 * preg_match(), especially if the pattern exists in the string
Andrey Andreev12445ca2014-01-25 01:55:52 +0200456 *
457 * Note: It was reported that not only space characters, but all in
458 * the following pattern can be parsed as separators between a tag name
459 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
460 * ... however, remove_invisible_characters() above already strips the
461 * hex-encoded ones, so we'll skip them below.
Derek Jonese701d762010-03-02 18:17:01 -0600462 */
463 do
464 {
465 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200466
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200467 if (preg_match('/<a/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600468 {
Andrey Andreev46d20722014-03-18 23:08:59 +0200469 $str = preg_replace_callback('#<a[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600470 }
Barry Mienydd671972010-10-04 16:33:58 +0200471
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200472 if (preg_match('/<img/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600473 {
Andrey Andreevebb3aa02014-03-18 19:18:19 +0200474 $str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600475 }
Barry Mienydd671972010-10-04 16:33:58 +0200476
vlakoffa81f60c2012-07-02 15:20:11 +0200477 if (preg_match('/script|xss/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600478 {
vlakoffa81f60c2012-07-02 15:20:11 +0200479 $str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600480 }
481 }
vlakoffa81f60c2012-07-02 15:20:11 +0200482 while ($original !== $str);
Derek Jonese701d762010-03-02 18:17:01 -0600483
484 unset($original);
485
Pascal Krietec9c045a2011-04-05 14:50:41 -0400486 // Remove evil attributes such as style, onclick and xmlns
487 $str = $this->_remove_evil_attributes($str, $is_image);
Barry Mienydd671972010-10-04 16:33:58 +0200488
Derek Jonese701d762010-03-02 18:17:01 -0600489 /*
490 * Sanitize naughty HTML elements
491 *
492 * If a tag containing any of the words in the list
493 * below is found, the tag gets converted to entities.
494 *
495 * So this: <blink>
496 * Becomes: &lt;blink&gt;
Derek Jonese701d762010-03-02 18:17:01 -0600497 */
Andrey Andreevbc787482015-09-11 18:11:32 +0300498 $pattern = '#'
499 .'<((/*\s*)([a-z0-9]+)(?=[^a-z0-9])' // tag start and name, followed by a non-tag character
500 // optional attributes
501 .'([\s\042\047/=]+' // non-attribute characters, excluding > (tag close) for obvious reasons
502 .'[^\s\042\047>/=]+' // attribute characters
503 // optional attribue-value
504 .'(\s*=\s*' // attribute-value separator
505 .'(\042[^\042]*\042|\047[^\047]*\047|[^\s\042\047=><`]*)' // single, double or non-quoted value
506 .')?' // end optional attribute-value group
507 .')*' // end optional attributes group
508 .'[^>]*)>#isS';
509
510 // Note: It would be nice to optimize this for speed, BUT
511 // only matching the naughty elements here results in
512 // false positives and in turn - vulnerabilities!
513 do
514 {
515 $old_str = $str;
516 $str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
517 }
518 while ($old_str !== $str);
519 unset($old_str);
Derek Jonese701d762010-03-02 18:17:01 -0600520
521 /*
522 * Sanitize naughty scripting elements
523 *
524 * Similar to above, only instead of looking for
525 * tags it looks for PHP and JavaScript commands
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200526 * that are disallowed. Rather than removing the
Derek Jonese701d762010-03-02 18:17:01 -0600527 * code, it simply converts the parenthesis to entities
528 * rendering the code un-executable.
529 *
530 * For example: eval('some code')
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200531 * Becomes: eval&#40;'some code'&#41;
Derek Jonese701d762010-03-02 18:17:01 -0600532 */
Andrey Andreeva30a7172014-02-10 09:17:25 +0200533 $str = preg_replace('#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200534 '\\1\\2&#40;\\3&#41;',
535 $str);
Barry Mienydd671972010-10-04 16:33:58 +0200536
Pascal Krietec9c045a2011-04-05 14:50:41 -0400537 // Final clean up
538 // This adds a bit of extra precaution in case
539 // something got through the above filters
540 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600541
542 /*
Pascal Krietec9c045a2011-04-05 14:50:41 -0400543 * Images are Handled in a Special Way
David Behler07b53422011-08-15 00:25:06 +0200544 * - Essentially, we want to know that after all of the character
545 * conversion is done whether any unwanted, likely XSS, code was found.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400546 * If not, we return TRUE, as the image is clean.
David Behler07b53422011-08-15 00:25:06 +0200547 * However, if the string post-conversion does not matched the
548 * string post-removal of XSS, then it fails, as there was unwanted XSS
Pascal Krietec9c045a2011-04-05 14:50:41 -0400549 * code found and removed/changed during processing.
Derek Jonese701d762010-03-02 18:17:01 -0600550 */
Derek Jonese701d762010-03-02 18:17:01 -0600551 if ($is_image === TRUE)
552 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200553 return ($str === $converted_string);
Derek Jonese701d762010-03-02 18:17:01 -0600554 }
Barry Mienydd671972010-10-04 16:33:58 +0200555
Derek Jonese701d762010-03-02 18:17:01 -0600556 return $str;
557 }
558
559 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200560
Derek Jonese701d762010-03-02 18:17:01 -0600561 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200562 * XSS Hash
Derek Jonese701d762010-03-02 18:17:01 -0600563 *
Andrey Andreev64354102012-10-28 14:16:02 +0200564 * Generates the XSS hash if needed and returns it.
565 *
566 * @see CI_Security::$_xss_hash
567 * @return string XSS hash
Derek Jonese701d762010-03-02 18:17:01 -0600568 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500569 public function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200570 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300571 if ($this->_xss_hash === NULL)
Derek Jonese701d762010-03-02 18:17:01 -0600572 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300573 $rand = $this->get_random_bytes(16);
574 $this->_xss_hash = ($rand === FALSE)
575 ? md5(uniqid(mt_rand(), TRUE))
576 : bin2hex($rand);
Derek Jonese701d762010-03-02 18:17:01 -0600577 }
578
Pascal Krietec9c045a2011-04-05 14:50:41 -0400579 return $this->_xss_hash;
Derek Jonese701d762010-03-02 18:17:01 -0600580 }
581
582 // --------------------------------------------------------------------
583
584 /**
Andrey Andreev487ccc92014-08-27 16:26:23 +0300585 * Get random bytes
586 *
587 * @param int $length Output length
588 * @return string
589 */
590 public function get_random_bytes($length)
591 {
Andrey Andreevefe33a22014-08-28 09:53:44 +0300592 if (empty($length) OR ! ctype_digit((string) $length))
Andrey Andreev487ccc92014-08-27 16:26:23 +0300593 {
594 return FALSE;
595 }
596
597 // Unfortunately, none of the following PRNGs is guaranteed to exist ...
Andrey Andreev607d5e22014-09-17 14:54:05 +0300598 if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
Andrey Andreev487ccc92014-08-27 16:26:23 +0300599 {
600 return $output;
601 }
602
603
604 if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
605 {
Andrey Andreeve4b9cd62014-10-02 02:19:06 +0300606 // Try not to waste entropy ...
607 is_php('5.4') && stream_set_chunk_size($fp, $length);
Andrey Andreev487ccc92014-08-27 16:26:23 +0300608 $output = fread($fp, $length);
609 fclose($fp);
610 if ($output !== FALSE)
611 {
612 return $output;
613 }
614 }
615
616 if (function_exists('openssl_random_pseudo_bytes'))
617 {
618 return openssl_random_pseudo_bytes($length);
619 }
620
621 return FALSE;
622 }
623
624 // --------------------------------------------------------------------
625
626 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500627 * HTML Entities Decode
628 *
Andrey Andreev64354102012-10-28 14:16:02 +0200629 * A replacement for html_entity_decode()
Derek Jonesa0911472010-03-30 10:33:09 -0500630 *
Pascal Krietec38e3b62011-11-14 13:55:00 -0500631 * The reason we are not using html_entity_decode() by itself is because
632 * while it is not technically correct to leave out the semicolon
633 * at the end of an entity most browsers will still interpret the entity
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200634 * correctly. html_entity_decode() does not convert entities without
Pascal Krietec38e3b62011-11-14 13:55:00 -0500635 * semicolons, so we are left with our own little solution here. Bummer.
Derek Jonesa0911472010-03-30 10:33:09 -0500636 *
Andrey Andreev64354102012-10-28 14:16:02 +0200637 * @link http://php.net/html-entity-decode
638 *
639 * @param string $str Input
640 * @param string $charset Character set
Derek Jonesa0911472010-03-30 10:33:09 -0500641 * @return string
642 */
freewil8cc0cfe2011-08-27 21:53:00 -0400643 public function entity_decode($str, $charset = NULL)
Derek Jonesa0911472010-03-30 10:33:09 -0500644 {
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300645 if (strpos($str, '&') === FALSE)
freewil5c9b0d12011-08-28 12:15:23 -0400646 {
647 return $str;
648 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300649
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300650 static $_entities;
651
652 isset($charset) OR $charset = $this->charset;
653 $flag = is_php('5.4')
654 ? ENT_COMPAT | ENT_HTML5
655 : ENT_COMPAT;
Barry Mienydd671972010-10-04 16:33:58 +0200656
brian978638a9d22012-12-18 13:25:54 +0200657 do
658 {
Andrey Andreeve7a2aa02014-03-18 18:44:53 +0200659 $str_compare = $str;
brian97807ccbe52012-12-11 20:24:12 +0200660
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300661 // Decode standard entities, avoiding false positives
Andrey Andreev074a2142015-02-09 14:03:14 +0200662 if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300663 {
664 if ( ! isset($_entities))
665 {
Andrey Andreev93455e32015-01-09 16:15:45 +0200666 $_entities = array_map(
667 'strtolower',
668 is_php('5.3.4')
669 ? get_html_translation_table(HTML_ENTITIES, $flag, $charset)
670 : get_html_translation_table(HTML_ENTITIES, $flag)
671 );
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300672
673 // If we're not on PHP 5.4+, add the possibly dangerous HTML 5
674 // entities to the array manually
675 if ($flag === ENT_COMPAT)
676 {
677 $_entities[':'] = '&colon;';
678 $_entities['('] = '&lpar;';
Andrey Andreev068ab202015-03-26 21:03:38 +0200679 $_entities[')'] = '&rpar;';
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300680 $_entities["\n"] = '&newline;';
681 $_entities["\t"] = '&tab;';
682 }
683 }
684
685 $replace = array();
686 $matches = array_unique(array_map('strtolower', $matches[0]));
Andrey Andreev068ab202015-03-26 21:03:38 +0200687 foreach ($matches as &$match)
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300688 {
Andrey Andreev068ab202015-03-26 21:03:38 +0200689 if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300690 {
Andrey Andreev068ab202015-03-26 21:03:38 +0200691 $replace[$match] = $char;
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300692 }
693 }
694
695 $str = str_ireplace(array_keys($replace), array_values($replace), $str);
696 }
697
698 // Decode numeric & UTF16 two byte entities
699 $str = html_entity_decode(
warpcodebb177982014-12-16 11:29:53 +0000700 preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300701 $flag,
702 $charset
703 );
brian978638a9d22012-12-18 13:25:54 +0200704 }
Andrey Andreeve7a2aa02014-03-18 18:44:53 +0200705 while ($str_compare !== $str);
brian978638a9d22012-12-18 13:25:54 +0200706 return $str;
Derek Jonesa0911472010-03-30 10:33:09 -0500707 }
Barry Mienydd671972010-10-04 16:33:58 +0200708
Derek Jonesa0911472010-03-30 10:33:09 -0500709 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200710
Derek Jonesa0911472010-03-30 10:33:09 -0500711 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200712 * Sanitize Filename
Derek Jonese701d762010-03-02 18:17:01 -0600713 *
Andrey Andreev64354102012-10-28 14:16:02 +0200714 * @param string $str Input file name
715 * @param bool $relative_path Whether to preserve paths
Derek Jonese701d762010-03-02 18:17:01 -0600716 * @return string
717 */
Hunter Wu8df33522013-08-03 22:36:05 +0800718 public function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600719 {
Hunter Wu4495cc72013-08-04 12:31:52 +0800720 $bad = $this->filename_bad_chars;
David Behler07b53422011-08-15 00:25:06 +0200721
Derek Jones2ef37592010-10-06 17:51:59 -0500722 if ( ! $relative_path)
723 {
724 $bad[] = './';
725 $bad[] = '/';
726 }
Derek Jonese701d762010-03-02 18:17:01 -0600727
Pascal Krietec9c045a2011-04-05 14:50:41 -0400728 $str = remove_invisible_characters($str, FALSE);
Andrey Andreev7e559772013-01-29 15:38:33 +0200729
730 do
731 {
732 $old = $str;
733 $str = str_replace($bad, '', $str);
734 }
735 while ($old !== $str);
736
737 return stripslashes($str);
Derek Jonese701d762010-03-02 18:17:01 -0600738 }
739
Pascal Krietec9c045a2011-04-05 14:50:41 -0400740 // ----------------------------------------------------------------
741
742 /**
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300743 * Strip Image Tags
744 *
Andrey Andreev64354102012-10-28 14:16:02 +0200745 * @param string $str
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300746 * @return string
747 */
748 public function strip_image_tags($str)
749 {
David Cox Jr46e77e02013-10-03 16:56:04 -0400750 return preg_replace(array('#<img[\s/]+.*?src\s*=\s*["\'](.+?)["\'].*?\>#', '#<img[\s/]+.*?src\s*=\s*(.+?).*?\>#'), '\\1', $str);
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300751 }
752
753 // ----------------------------------------------------------------
754
755 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400756 * Compact Exploded Words
757 *
Andrey Andreev64354102012-10-28 14:16:02 +0200758 * Callback method for xss_clean() to remove whitespace from
759 * things like 'j a v a s c r i p t'.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400760 *
Andrey Andreev64354102012-10-28 14:16:02 +0200761 * @used-by CI_Security::xss_clean()
762 * @param array $matches
Timothy Warrenad475052012-04-19 13:21:06 -0400763 * @return string
Pascal Krietec9c045a2011-04-05 14:50:41 -0400764 */
765 protected function _compact_exploded_words($matches)
766 {
767 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
768 }
769
770 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200771
Timothy Warrenad475052012-04-19 13:21:06 -0400772 /**
773 * Remove Evil HTML Attributes (like event handlers and style)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400774 *
775 * It removes the evil attribute and either:
Pascal Krietec9c045a2011-04-05 14:50:41 -0400776 *
Andrey Andreev64354102012-10-28 14:16:02 +0200777 * - Everything up until a space. For example, everything between the pipes:
778 *
779 * <code>
780 * <a |style=document.write('hello');alert('world');| class=link>
781 * </code>
782 *
783 * - Everything inside the quotes. For example, everything between the pipes:
784 *
785 * <code>
786 * <a |style="document.write('hello'); alert('world');"| class="link">
787 * </code>
788 *
789 * @param string $str The string to check
790 * @param bool $is_image Whether the input is an image
791 * @return string The string with the evil attributes removed
Pascal Krietec9c045a2011-04-05 14:50:41 -0400792 */
793 protected function _remove_evil_attributes($str, $is_image)
794 {
Andrey Andreevaebd0392015-03-26 14:15:34 +0200795 $evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400796
797 if ($is_image === TRUE)
798 {
799 /*
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200800 * Adobe Photoshop puts XML metadata into JFIF images,
Pascal Krietec9c045a2011-04-05 14:50:41 -0400801 * including namespacing, so we have to allow this for images.
802 */
803 unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
804 }
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200805
Andrey Andreev58c7bcb2015-09-11 13:59:40 +0300806 $pattern = '#(' // catch everything in the tag preceeding the evil attribute
807 .'<[a-z0-9]+(?=[^>a-z0-9])' // tag start and name, followed by a non-tag character
808 // optional attributes
809 .'([\s\042\047/=]+' // non-attribute characters, excluding > (tag close) for obvious reasons
810 .'[^\s\042\047>/=]+' // attribute characters
811 // optional attribue-value
812 .'(\s*=\s*' // attribute-value separator
813 .'(\042[^\042]*\042|\047[^\047]*\047|[^\s\042\047=><`]*)' // single, double or non-quoted value
814 .')?' // end optional attribute-value group
815 .')*' // end optional attributes group
816 .')' // end catching evil attribute prefix
817 // evil attribute starts here
Andrey Andreev2f71c622015-09-11 15:21:10 +0300818 .'([\s\042\047/=]+' // non-attribute characters (we'll replace that with a single space), again excluding '>'
Andrey Andreev58c7bcb2015-09-11 13:59:40 +0300819 .'('.implode('|', $evil_attributes).')'
820 .'\s*=\s*' // attribute-value separator
821 .'(\042[^042]+\042|\047[^047]+\047|[^\s\042\047=><`]+)' // attribute value; single, double or non-quotes
822 .')' // end evil attribute
823 .'#isS';
824
Pascal Krietec9c045a2011-04-05 14:50:41 -0400825 do {
Andrey Andreev58c7bcb2015-09-11 13:59:40 +0300826 $count = 0;
827 $str = preg_replace($pattern, '$1 [removed]', $str, -1, $count);
Andrey Andreev72ed4c32012-12-19 17:07:54 +0200828 }
829 while ($count);
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200830
Pascal Krietec9c045a2011-04-05 14:50:41 -0400831 return $str;
832 }
David Behler07b53422011-08-15 00:25:06 +0200833
Pascal Krietec9c045a2011-04-05 14:50:41 -0400834 // --------------------------------------------------------------------
835
836 /**
837 * Sanitize Naughty HTML
838 *
Andrey Andreev64354102012-10-28 14:16:02 +0200839 * Callback method for xss_clean() to remove naughty HTML elements.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400840 *
Andrey Andreev64354102012-10-28 14:16:02 +0200841 * @used-by CI_Security::xss_clean()
842 * @param array $matches
Pascal Krietec9c045a2011-04-05 14:50:41 -0400843 * @return string
844 */
845 protected function _sanitize_naughty_html($matches)
846 {
Andrey Andreevbc787482015-09-11 18:11:32 +0300847 static $naughty = array(
848 'alert', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
849 'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
850 'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
851 'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
852 );
853
854 // Is the element that we caught naughty?
855 // If not, just return it back.
856 if ( ! in_array(strtolower($matches[3]), $naughty, TRUE))
857 {
858 return $matches[0];
859 }
860
861 return '&lt;'.$matches[1].'&gt;';
Pascal Krietec9c045a2011-04-05 14:50:41 -0400862 }
863
864 // --------------------------------------------------------------------
865
866 /**
867 * JS Link Removal
868 *
Andrey Andreev64354102012-10-28 14:16:02 +0200869 * Callback method for xss_clean() to sanitize links.
870 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400871 * This limits the PCRE backtracks, making it more performance friendly
872 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200873 * PHP 5.2+ on link-heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400874 *
Andrey Andreev64354102012-10-28 14:16:02 +0200875 * @used-by CI_Security::xss_clean()
876 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400877 * @return string
878 */
879 protected function _js_link_removal($match)
880 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200881 return str_replace($match[1],
Andrey Andreeva30a7172014-02-10 09:17:25 +0200882 preg_replace('#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200883 '',
884 $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
885 ),
886 $match[0]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400887 }
888
889 // --------------------------------------------------------------------
890
891 /**
892 * JS Image Removal
893 *
Andrey Andreev64354102012-10-28 14:16:02 +0200894 * Callback method for xss_clean() to sanitize image tags.
895 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400896 * This limits the PCRE backtracks, making it more performance friendly
897 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200898 * PHP 5.2+ on image tag heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400899 *
Andrey Andreev64354102012-10-28 14:16:02 +0200900 * @used-by CI_Security::xss_clean()
901 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400902 * @return string
903 */
904 protected function _js_img_removal($match)
905 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200906 return str_replace($match[1],
Andrey Andreeva30a7172014-02-10 09:17:25 +0200907 preg_replace('#src=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200908 '',
909 $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
910 ),
911 $match[0]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400912 }
913
914 // --------------------------------------------------------------------
915
916 /**
917 * Attribute Conversion
918 *
Andrey Andreev64354102012-10-28 14:16:02 +0200919 * @used-by CI_Security::xss_clean()
920 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400921 * @return string
922 */
923 protected function _convert_attribute($match)
924 {
925 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
926 }
927
928 // --------------------------------------------------------------------
929
930 /**
931 * Filter Attributes
932 *
Andrey Andreev64354102012-10-28 14:16:02 +0200933 * Filters tag attributes for consistency and safety.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400934 *
Andrey Andreev64354102012-10-28 14:16:02 +0200935 * @used-by CI_Security::_js_img_removal()
936 * @used-by CI_Security::_js_link_removal()
937 * @param string $str
Pascal Krietec9c045a2011-04-05 14:50:41 -0400938 * @return string
939 */
940 protected function _filter_attributes($str)
941 {
942 $out = '';
Pascal Krietec9c045a2011-04-05 14:50:41 -0400943 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
944 {
945 foreach ($matches[0] as $match)
946 {
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200947 $out .= preg_replace('#/\*.*?\*/#s', '', $match);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400948 }
949 }
950
951 return $out;
952 }
953
954 // --------------------------------------------------------------------
955
956 /**
957 * HTML Entity Decode Callback
958 *
Andrey Andreev64354102012-10-28 14:16:02 +0200959 * @used-by CI_Security::xss_clean()
960 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400961 * @return string
962 */
963 protected function _decode_entity($match)
964 {
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300965 // Protect GET variables in URLs
966 // 901119URL5918AMP18930PROTECT8198
967 $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
968
969 // Decode, then un-protect URL GET vars
970 return str_replace(
971 $this->xss_hash(),
972 '&',
973 $this->entity_decode($match, $this->charset)
Andrey Andreevc67c3fb2014-01-22 13:26:00 +0200974 );
Pascal Krietec9c045a2011-04-05 14:50:41 -0400975 }
976
977 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200978
Pascal Krietec9c045a2011-04-05 14:50:41 -0400979 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400980 * Do Never Allowed
981 *
Andrey Andreev64354102012-10-28 14:16:02 +0200982 * @used-by CI_Security::xss_clean()
Pascal Krietec9c045a2011-04-05 14:50:41 -0400983 * @param string
984 * @return string
985 */
986 protected function _do_never_allowed($str)
987 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200988 $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400989
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200990 foreach ($this->_never_allowed_regex as $regex)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400991 {
Wes Baker5335bc32012-04-24 15:17:14 -0400992 $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400993 }
David Behler07b53422011-08-15 00:25:06 +0200994
Pascal Krietec9c045a2011-04-05 14:50:41 -0400995 return $str;
996 }
997
998 // --------------------------------------------------------------------
999
1000 /**
Andrey Andreev64354102012-10-28 14:16:02 +02001001 * Set CSRF Hash and Cookie
Pascal Krietec9c045a2011-04-05 14:50:41 -04001002 *
1003 * @return string
1004 */
1005 protected function _csrf_set_hash()
1006 {
Andrey Andreev487ccc92014-08-27 16:26:23 +03001007 if ($this->_csrf_hash === NULL)
Pascal Krietec9c045a2011-04-05 14:50:41 -04001008 {
vlakoff3a3d5f62013-10-17 22:22:16 +02001009 // If the cookie exists we will use its value.
Pascal Krietec9c045a2011-04-05 14:50:41 -04001010 // We don't necessarily want to regenerate it with
David Behler07b53422011-08-15 00:25:06 +02001011 // each page load since a page could contain embedded
Pascal Krietec9c045a2011-04-05 14:50:41 -04001012 // sub-pages causing this feature to fail
Andrey Andreev162b1a92014-12-08 10:59:51 +02001013 if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
1014 && preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
Pascal Krietec9c045a2011-04-05 14:50:41 -04001015 {
1016 return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
1017 }
David Behler07b53422011-08-15 00:25:06 +02001018
Andrey Andreev487ccc92014-08-27 16:26:23 +03001019 $rand = $this->get_random_bytes(16);
1020 $this->_csrf_hash = ($rand === FALSE)
1021 ? md5(uniqid(mt_rand(), TRUE))
1022 : bin2hex($rand);
Pascal Krietec9c045a2011-04-05 14:50:41 -04001023 }
1024
1025 return $this->_csrf_hash;
1026 }
1027
Derek Jonese701d762010-03-02 18:17:01 -06001028}