blob: cffdb9ad960391b1db1db73d67fea5159de10adf [file] [log] [blame]
Andrey Andreevc5536aa2012-11-01 17:33:58 +02001<?php
Derek Jonese701d762010-03-02 18:17:01 -06002/**
3 * CodeIgniter
4 *
Phil Sturgeon07c1ac82012-03-09 17:03:37 +00005 * An open source application development framework for PHP 5.2.4 or newer
Derek Jonese701d762010-03-02 18:17:01 -06006 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05007 * NOTICE OF LICENSE
Andrey Andreevbb488dc2012-01-07 23:35:16 +02008 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05009 * Licensed under the Open Software License version 3.0
Andrey Andreevbb488dc2012-01-07 23:35:16 +020010 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -050011 * This source file is subject to the Open Software License (OSL 3.0) that is
12 * bundled with this package in the files license.txt / license.rst. It is
13 * also available through the world wide web at this URL:
14 * http://opensource.org/licenses/OSL-3.0
15 * If you did not receive a copy of the license and are unable to obtain it
16 * through the world wide web, please send an email to
17 * licensing@ellislab.com so we can send you a copy immediately.
18 *
Derek Jonese701d762010-03-02 18:17:01 -060019 * @package CodeIgniter
Derek Jonesf4a4bd82011-10-20 12:18:42 -050020 * @author EllisLab Dev Team
darwinel871754a2014-02-11 17:34:57 +010021 * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/)
Derek Jonesf4a4bd82011-10-20 12:18:42 -050022 * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
Derek Jonese701d762010-03-02 18:17:01 -060023 * @link http://codeigniter.com
24 * @since Version 1.0
25 * @filesource
26 */
Andrey Andreevc5536aa2012-11-01 17:33:58 +020027defined('BASEPATH') OR exit('No direct script access allowed');
Derek Jonese701d762010-03-02 18:17:01 -060028
Derek Jonese701d762010-03-02 18:17:01 -060029/**
30 * Security Class
31 *
32 * @package CodeIgniter
33 * @subpackage Libraries
34 * @category Security
Derek Jonesf4a4bd82011-10-20 12:18:42 -050035 * @author EllisLab Dev Team
Pascal Krietec9c045a2011-04-05 14:50:41 -040036 * @link http://codeigniter.com/user_guide/libraries/security.html
Derek Jonese701d762010-03-02 18:17:01 -060037 */
38class CI_Security {
Barry Mienydd671972010-10-04 16:33:58 +020039
David Behler07b53422011-08-15 00:25:06 +020040 /**
Hunter Wua8d6d3b2013-08-03 23:17:45 +080041 * List of sanitize filename strings
42 *
43 * @var array
44 */
Hunter Wu4495cc72013-08-04 12:31:52 +080045 public $filename_bad_chars = array(
Hunter Wua8d6d3b2013-08-03 23:17:45 +080046 '../', '<!--', '-->', '<', '>',
47 "'", '"', '&', '$', '#',
48 '{', '}', '[', ']', '=',
49 ';', '?', '%20', '%22',
50 '%3c', // <
51 '%253c', // <
52 '%3e', // >
53 '%0e', // >
54 '%28', // (
55 '%29', // )
56 '%2528', // (
57 '%26', // &
58 '%24', // $
59 '%3f', // ?
60 '%3b', // ;
61 '%3d' // =
62 );
63
64 /**
Andrey Andreev487d1ae2014-05-23 14:41:32 +030065 * Character set
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020066 *
Andrey Andreev487d1ae2014-05-23 14:41:32 +030067 * Will be overriden by the constructor.
68 *
69 * @var string
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020070 */
Andrey Andreev487d1ae2014-05-23 14:41:32 +030071 public $charset = 'UTF-8';
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020072
73 /**
Andrey Andreev64354102012-10-28 14:16:02 +020074 * XSS Hash
David Behler07b53422011-08-15 00:25:06 +020075 *
Andrey Andreev64354102012-10-28 14:16:02 +020076 * Random Hash for protecting URLs.
77 *
78 * @var string
David Behler07b53422011-08-15 00:25:06 +020079 */
Andrey Andreev487ccc92014-08-27 16:26:23 +030080 protected $_xss_hash;
Andrey Andreev3d113bd2011-10-05 00:03:20 +030081
David Behler07b53422011-08-15 00:25:06 +020082 /**
Andrey Andreev64354102012-10-28 14:16:02 +020083 * CSRF Hash
David Behler07b53422011-08-15 00:25:06 +020084 *
Andrey Andreev64354102012-10-28 14:16:02 +020085 * Random hash for Cross Site Request Forgery protection cookie
86 *
87 * @var string
David Behler07b53422011-08-15 00:25:06 +020088 */
Andrey Andreev487ccc92014-08-27 16:26:23 +030089 protected $_csrf_hash;
Andrey Andreev3d113bd2011-10-05 00:03:20 +030090
David Behler07b53422011-08-15 00:25:06 +020091 /**
Andrey Andreev64354102012-10-28 14:16:02 +020092 * CSRF Expire time
David Behler07b53422011-08-15 00:25:06 +020093 *
Andrey Andreev64354102012-10-28 14:16:02 +020094 * Expiration time for Cross Site Request Forgery protection cookie.
95 * Defaults to two hours (in seconds).
96 *
97 * @var int
David Behler07b53422011-08-15 00:25:06 +020098 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -040099 protected $_csrf_expire = 7200;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300100
David Behler07b53422011-08-15 00:25:06 +0200101 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200102 * CSRF Token name
David Behler07b53422011-08-15 00:25:06 +0200103 *
Andrey Andreev64354102012-10-28 14:16:02 +0200104 * Token name for Cross Site Request Forgery protection cookie.
105 *
106 * @var string
David Behler07b53422011-08-15 00:25:06 +0200107 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400108 protected $_csrf_token_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300109
David Behler07b53422011-08-15 00:25:06 +0200110 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200111 * CSRF Cookie name
David Behler07b53422011-08-15 00:25:06 +0200112 *
Andrey Andreev64354102012-10-28 14:16:02 +0200113 * Cookie name for Cross Site Request Forgery protection cookie.
114 *
115 * @var string
David Behler07b53422011-08-15 00:25:06 +0200116 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400117 protected $_csrf_cookie_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300118
David Behler07b53422011-08-15 00:25:06 +0200119 /**
120 * List of never allowed strings
121 *
Andrey Andreev64354102012-10-28 14:16:02 +0200122 * @var array
David Behler07b53422011-08-15 00:25:06 +0200123 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400124 protected $_never_allowed_str = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400125 'document.cookie' => '[removed]',
126 'document.write' => '[removed]',
127 '.parentNode' => '[removed]',
128 '.innerHTML' => '[removed]',
Timothy Warren40403d22012-04-19 16:38:50 -0400129 '-moz-binding' => '[removed]',
130 '<!--' => '&lt;!--',
131 '-->' => '--&gt;',
132 '<![CDATA[' => '&lt;![CDATA[',
133 '<comment>' => '&lt;comment&gt;'
134 );
Derek Jonese701d762010-03-02 18:17:01 -0600135
David Behler07b53422011-08-15 00:25:06 +0200136 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200137 * List of never allowed regex replacements
David Behler07b53422011-08-15 00:25:06 +0200138 *
Andrey Andreev64354102012-10-28 14:16:02 +0200139 * @var array
David Behler07b53422011-08-15 00:25:06 +0200140 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400141 protected $_never_allowed_regex = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400142 'javascript\s*:',
Andrey Andreev1bbc5642014-01-07 12:45:27 +0200143 '(document|(document\.)?window)\.(location|on\w*)',
Timothy Warren40403d22012-04-19 16:38:50 -0400144 'expression\s*(\(|&\#40;)', // CSS and IE
145 'vbscript\s*:', // IE, surprise!
Andrey Andreeva30a7172014-02-10 09:17:25 +0200146 'wscript\s*:', // IE
Andrey Andreevf7f9dca2014-02-10 12:41:00 +0200147 'jscript\s*:', // IE
Andrey Andreeva30a7172014-02-10 09:17:25 +0200148 'vbs\s*:', // IE
Andrey Andreev43568062014-01-21 23:52:31 +0200149 'Redirect\s+30\d',
Wes Bakerd3481352012-05-07 16:49:33 -0400150 "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
Timothy Warren40403d22012-04-19 16:38:50 -0400151 );
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300152
Timothy Warrenad475052012-04-19 13:21:06 -0400153 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200154 * Class constructor
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300155 *
156 * @return void
Timothy Warrenad475052012-04-19 13:21:06 -0400157 */
Greg Akera9263282010-11-10 15:26:43 -0600158 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -0600159 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200160 // Is CSRF protection enabled?
Andrey Andreevd444d442014-10-06 00:00:08 +0300161 if (config_item('csrf_protection'))
patworkef1a55a2011-04-09 13:04:06 +0200162 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200163 // CSRF config
164 foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
patworkef1a55a2011-04-09 13:04:06 +0200165 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200166 if (FALSE !== ($val = config_item($key)))
167 {
168 $this->{'_'.$key} = $val;
169 }
patworkef1a55a2011-04-09 13:04:06 +0200170 }
patworkef1a55a2011-04-09 13:04:06 +0200171
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200172 // Append application specific cookie prefix
Andrey Andreevd444d442014-10-06 00:00:08 +0300173 if ($cookie_prefix = config_item('cookie_prefix'))
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200174 {
Andrey Andreevd444d442014-10-06 00:00:08 +0300175 $this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200176 }
Derek Jonesb3f10a22010-07-25 19:11:26 -0500177
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200178 // Set the CSRF hash
179 $this->_csrf_set_hash();
180 }
Derek Allard958543a2010-07-22 14:10:26 -0400181
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300182 $this->charset = strtoupper(config_item('charset'));
183
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200184 log_message('debug', 'Security Class Initialized');
Derek Jonese701d762010-03-02 18:17:01 -0600185 }
186
187 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200188
Derek Jonese701d762010-03-02 18:17:01 -0600189 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200190 * CSRF Verify
Derek Jonese701d762010-03-02 18:17:01 -0600191 *
Andrew Podner4296a652012-12-17 07:51:15 -0500192 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600193 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500194 public function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -0400195 {
Andrey Andreev5d27c432012-03-08 12:01:52 +0200196 // If it's not a POST request we will set the CSRF cookie
197 if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
Derek Jonese701d762010-03-02 18:17:01 -0600198 {
199 return $this->csrf_set_cookie();
200 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300201
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100202 // Check if URI has been whitelisted from CSRF checks
203 if ($exclude_uris = config_item('csrf_exclude_uris'))
204 {
205 $uri = load_class('URI', 'core');
caseyh5ac7c772014-08-18 05:10:24 -0400206 foreach ($exclude_uris as $excluded)
207 {
Andrey Andreev6c520962014-08-18 12:24:42 +0300208 if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
209 {
210 return $this;
211 }
212 }
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100213 }
Derek Jonese701d762010-03-02 18:17:01 -0600214
215 // Do the tokens exist in both the _POST and _COOKIE arrays?
Andrey Andreevf795ab52012-10-24 21:28:25 +0300216 if ( ! isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
Alex Bilbieed944a32012-06-02 11:07:47 +0100217 OR $_POST[$this->_csrf_token_name] !== $_COOKIE[$this->_csrf_cookie_name]) // Do the tokens match?
Derek Jonese701d762010-03-02 18:17:01 -0600218 {
219 $this->csrf_show_error();
220 }
221
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200222 // We kill this since we're done and we don't want to polute the _POST array
Pascal Krietec9c045a2011-04-05 14:50:41 -0400223 unset($_POST[$this->_csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +0200224
RS712be25a62011-12-31 16:02:04 -0200225 // Regenerate on every submission?
226 if (config_item('csrf_regenerate'))
227 {
228 // Nothing should last forever
229 unset($_COOKIE[$this->_csrf_cookie_name]);
Andrey Andreev487ccc92014-08-27 16:26:23 +0300230 $this->_csrf_hash = NULL;
RS712be25a62011-12-31 16:02:04 -0200231 }
Andrey Andreev8a7d0782012-01-08 05:43:42 +0200232
Derek Jonesb3f10a22010-07-25 19:11:26 -0500233 $this->_csrf_set_hash();
234 $this->csrf_set_cookie();
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300235
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200236 log_message('debug', 'CSRF token verified');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400237 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600238 }
Barry Mienydd671972010-10-04 16:33:58 +0200239
Derek Jonese701d762010-03-02 18:17:01 -0600240 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200241
Derek Jonese701d762010-03-02 18:17:01 -0600242 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200243 * CSRF Set Cookie
Derek Jonese701d762010-03-02 18:17:01 -0600244 *
Taufan Aditya6c7526c2012-05-27 13:51:27 +0700245 * @codeCoverageIgnore
Andrew Podner4296a652012-12-17 07:51:15 -0500246 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600247 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500248 public function csrf_set_cookie()
Derek Jonese701d762010-03-02 18:17:01 -0600249 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400250 $expire = time() + $this->_csrf_expire;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300251 $secure_cookie = (bool) config_item('cookie_secure');
Derek Jonese701d762010-03-02 18:17:01 -0600252
Andrey Andreev3fb02672012-10-22 16:48:01 +0300253 if ($secure_cookie && ! is_https())
Derek Jonese701d762010-03-02 18:17:01 -0600254 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200255 return FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600256 }
Derek Allard958543a2010-07-22 14:10:26 -0400257
freewil4ad0fd82012-03-13 22:37:42 -0400258 setcookie(
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300259 $this->_csrf_cookie_name,
260 $this->_csrf_hash,
261 $expire,
262 config_item('cookie_path'),
263 config_item('cookie_domain'),
freewil4ad0fd82012-03-13 22:37:42 -0400264 $secure_cookie,
265 config_item('cookie_httponly')
266 );
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200267 log_message('debug', 'CRSF cookie Set');
David Behler07b53422011-08-15 00:25:06 +0200268
Pascal Krietec9c045a2011-04-05 14:50:41 -0400269 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600270 }
271
272 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200273
Derek Jonese701d762010-03-02 18:17:01 -0600274 /**
275 * Show CSRF Error
276 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400277 * @return void
Derek Jonese701d762010-03-02 18:17:01 -0600278 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500279 public function csrf_show_error()
Derek Jonese701d762010-03-02 18:17:01 -0600280 {
Kyle Valade05fcc092014-07-06 13:43:20 -0700281 show_error('The action you have requested is not allowed.', 403);
Derek Jonese701d762010-03-02 18:17:01 -0600282 }
283
284 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200285
Derek Jonese701d762010-03-02 18:17:01 -0600286 /**
David Behler07b53422011-08-15 00:25:06 +0200287 * Get CSRF Hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400288 *
Andrey Andreev64354102012-10-28 14:16:02 +0200289 * @see CI_Security::$_csrf_hash
290 * @return string CSRF hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400291 */
292 public function get_csrf_hash()
293 {
294 return $this->_csrf_hash;
295 }
296
297 // --------------------------------------------------------------------
298
299 /**
300 * Get CSRF Token Name
301 *
Andrey Andreev64354102012-10-28 14:16:02 +0200302 * @see CI_Security::$_csrf_token_name
303 * @return string CSRF token name
Pascal Krietec9c045a2011-04-05 14:50:41 -0400304 */
305 public function get_csrf_token_name()
306 {
307 return $this->_csrf_token_name;
308 }
309
310 // --------------------------------------------------------------------
311
312 /**
Derek Jonese701d762010-03-02 18:17:01 -0600313 * XSS Clean
314 *
315 * Sanitizes data so that Cross Site Scripting Hacks can be
Andrey Andreev64354102012-10-28 14:16:02 +0200316 * prevented. This method does a fair amount of work but
Derek Jonese701d762010-03-02 18:17:01 -0600317 * it is extremely thorough, designed to prevent even the
Derek Jones37f4b9c2011-07-01 17:56:50 -0500318 * most obscure XSS attempts. Nothing is ever 100% foolproof,
Derek Jonese701d762010-03-02 18:17:01 -0600319 * of course, but I haven't been able to get anything passed
320 * the filter.
321 *
Andrey Andreev64354102012-10-28 14:16:02 +0200322 * Note: Should only be used to deal with data upon submission.
323 * It's not something that should be used for general
324 * runtime processing.
Derek Jonese701d762010-03-02 18:17:01 -0600325 *
Andrey Andreev64354102012-10-28 14:16:02 +0200326 * @link http://channel.bitflux.ch/wiki/XSS_Prevention
327 * Based in part on some code and ideas from Bitflux.
Derek Jonese701d762010-03-02 18:17:01 -0600328 *
Andrey Andreev64354102012-10-28 14:16:02 +0200329 * @link http://ha.ckers.org/xss.html
330 * To help develop this script I used this great list of
331 * vulnerabilities along with a few other hacks I've
332 * harvested from examining vulnerabilities in other programs.
Derek Jonese701d762010-03-02 18:17:01 -0600333 *
Andrey Andreev64354102012-10-28 14:16:02 +0200334 * @param string|string[] $str Input data
335 * @param bool $is_image Whether the input is an image
Derek Jonese701d762010-03-02 18:17:01 -0600336 * @return string
337 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500338 public function xss_clean($str, $is_image = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600339 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200340 // Is the string an array?
Derek Jonese701d762010-03-02 18:17:01 -0600341 if (is_array($str))
342 {
343 while (list($key) = each($str))
344 {
345 $str[$key] = $this->xss_clean($str[$key]);
346 }
Barry Mienydd671972010-10-04 16:33:58 +0200347
Derek Jonese701d762010-03-02 18:17:01 -0600348 return $str;
349 }
350
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300351 // Remove Invisible Characters
352 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600353
354 /*
355 * URL Decode
356 *
357 * Just in case stuff like this is submitted:
358 *
359 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
360 *
361 * Note: Use rawurldecode() so it does not remove plus signs
Derek Jonese701d762010-03-02 18:17:01 -0600362 */
Andrey Andreev29e12642014-02-10 13:24:44 +0200363 do
364 {
365 $str = rawurldecode($str);
366 }
367 while (preg_match('/%[0-9a-f]{2,}/i', $str));
Barry Mienydd671972010-10-04 16:33:58 +0200368
Derek Jonese701d762010-03-02 18:17:01 -0600369 /*
Barry Mienydd671972010-10-04 16:33:58 +0200370 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600371 *
372 * This permits our tests below to work reliably.
373 * We only convert entities that are within tags since
374 * these are the ones that will pose security problems.
Derek Jonese701d762010-03-02 18:17:01 -0600375 */
Andrey Andreev9b8286c2014-08-05 11:46:57 +0300376 $str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
brian97807ccbe52012-12-11 20:24:12 +0200377 $str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600378
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200379 // Remove Invisible Characters Again!
Greg Aker757dda62010-04-14 19:06:19 -0500380 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200381
Derek Jonese701d762010-03-02 18:17:01 -0600382 /*
383 * Convert all tabs to spaces
384 *
385 * This prevents strings like this: ja vascript
386 * NOTE: we deal with spaces between characters later.
David Behler07b53422011-08-15 00:25:06 +0200387 * NOTE: preg_replace was found to be amazingly slow here on
Pascal Krietec9c045a2011-04-05 14:50:41 -0400388 * large blocks of data, so we use str_replace.
Derek Jonese701d762010-03-02 18:17:01 -0600389 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200390 $str = str_replace("\t", ' ', $str);
Barry Mienydd671972010-10-04 16:33:58 +0200391
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200392 // Capture converted string for later comparison
Derek Jonese701d762010-03-02 18:17:01 -0600393 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200394
Pascal Krietec9c045a2011-04-05 14:50:41 -0400395 // Remove Strings that are never allowed
396 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600397
398 /*
399 * Makes PHP tags safe
400 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400401 * Note: XML tags are inadvertently replaced too:
Derek Jonese701d762010-03-02 18:17:01 -0600402 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400403 * <?xml
Derek Jonese701d762010-03-02 18:17:01 -0600404 *
405 * But it doesn't seem to pose a problem.
Derek Jonese701d762010-03-02 18:17:01 -0600406 */
407 if ($is_image === TRUE)
408 {
David Behler07b53422011-08-15 00:25:06 +0200409 // Images have a tendency to have the PHP short opening and
410 // closing tags every so often so we skip those and only
Pascal Krietec9c045a2011-04-05 14:50:41 -0400411 // do the long opening tags.
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200412 $str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600413 }
414 else
415 {
Andrey Andreev838a9d62012-12-03 14:37:47 +0200416 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600417 }
Barry Mienydd671972010-10-04 16:33:58 +0200418
Derek Jonese701d762010-03-02 18:17:01 -0600419 /*
420 * Compact any exploded words
421 *
Derek Jones37f4b9c2011-07-01 17:56:50 -0500422 * This corrects words like: j a v a s c r i p t
Derek Jonese701d762010-03-02 18:17:01 -0600423 * These words are compacted back to their correct state.
Derek Jonese701d762010-03-02 18:17:01 -0600424 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400425 $words = array(
Andrey Andreeva30a7172014-02-10 09:17:25 +0200426 'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
427 'vbs', 'script', 'base64', 'applet', 'alert', 'document',
428 'write', 'cookie', 'window', 'confirm', 'prompt'
Timothy Warren40403d22012-04-19 16:38:50 -0400429 );
David Behler07b53422011-08-15 00:25:06 +0200430
Derek Jonese701d762010-03-02 18:17:01 -0600431 foreach ($words as $word)
432 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200433 $word = implode('\s*', str_split($word)).'\s*';
Derek Jonese701d762010-03-02 18:17:01 -0600434
435 // We only want to do this when it is followed by a non-word character
436 // That way valid stuff like "dealer to" does not become "dealerto"
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300437 $str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600438 }
Barry Mienydd671972010-10-04 16:33:58 +0200439
Derek Jonese701d762010-03-02 18:17:01 -0600440 /*
441 * Remove disallowed Javascript in links or img tags
Andrey Andreevf9a615a2014-09-28 20:24:06 +0300442 * We used to do some version comparisons and use of stripos(),
David Behler07b53422011-08-15 00:25:06 +0200443 * but it is dog slow compared to these simplified non-capturing
Pascal Krietec9c045a2011-04-05 14:50:41 -0400444 * preg_match(), especially if the pattern exists in the string
Andrey Andreev12445ca2014-01-25 01:55:52 +0200445 *
446 * Note: It was reported that not only space characters, but all in
447 * the following pattern can be parsed as separators between a tag name
448 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
449 * ... however, remove_invisible_characters() above already strips the
450 * hex-encoded ones, so we'll skip them below.
Derek Jonese701d762010-03-02 18:17:01 -0600451 */
452 do
453 {
454 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200455
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200456 if (preg_match('/<a/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600457 {
Andrey Andreev46d20722014-03-18 23:08:59 +0200458 $str = preg_replace_callback('#<a[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600459 }
Barry Mienydd671972010-10-04 16:33:58 +0200460
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200461 if (preg_match('/<img/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600462 {
Andrey Andreevebb3aa02014-03-18 19:18:19 +0200463 $str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600464 }
Barry Mienydd671972010-10-04 16:33:58 +0200465
vlakoffa81f60c2012-07-02 15:20:11 +0200466 if (preg_match('/script|xss/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600467 {
vlakoffa81f60c2012-07-02 15:20:11 +0200468 $str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600469 }
470 }
vlakoffa81f60c2012-07-02 15:20:11 +0200471 while ($original !== $str);
Derek Jonese701d762010-03-02 18:17:01 -0600472
473 unset($original);
474
Pascal Krietec9c045a2011-04-05 14:50:41 -0400475 // Remove evil attributes such as style, onclick and xmlns
476 $str = $this->_remove_evil_attributes($str, $is_image);
Barry Mienydd671972010-10-04 16:33:58 +0200477
Derek Jonese701d762010-03-02 18:17:01 -0600478 /*
479 * Sanitize naughty HTML elements
480 *
481 * If a tag containing any of the words in the list
482 * below is found, the tag gets converted to entities.
483 *
484 * So this: <blink>
485 * Becomes: &lt;blink&gt;
Derek Jonese701d762010-03-02 18:17:01 -0600486 */
Andrey Andreeva30a7172014-02-10 09:17:25 +0200487 $naughty = 'alert|prompt|confirm|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|button|select|isindex|layer|link|meta|keygen|object|plaintext|style|script|textarea|title|math|video|svg|xml|xss';
Derek Jonese701d762010-03-02 18:17:01 -0600488 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
489
490 /*
491 * Sanitize naughty scripting elements
492 *
493 * Similar to above, only instead of looking for
494 * tags it looks for PHP and JavaScript commands
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200495 * that are disallowed. Rather than removing the
Derek Jonese701d762010-03-02 18:17:01 -0600496 * code, it simply converts the parenthesis to entities
497 * rendering the code un-executable.
498 *
499 * For example: eval('some code')
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200500 * Becomes: eval&#40;'some code'&#41;
Derek Jonese701d762010-03-02 18:17:01 -0600501 */
Andrey Andreeva30a7172014-02-10 09:17:25 +0200502 $str = preg_replace('#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200503 '\\1\\2&#40;\\3&#41;',
504 $str);
Barry Mienydd671972010-10-04 16:33:58 +0200505
Pascal Krietec9c045a2011-04-05 14:50:41 -0400506 // Final clean up
507 // This adds a bit of extra precaution in case
508 // something got through the above filters
509 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600510
511 /*
Pascal Krietec9c045a2011-04-05 14:50:41 -0400512 * Images are Handled in a Special Way
David Behler07b53422011-08-15 00:25:06 +0200513 * - Essentially, we want to know that after all of the character
514 * conversion is done whether any unwanted, likely XSS, code was found.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400515 * If not, we return TRUE, as the image is clean.
David Behler07b53422011-08-15 00:25:06 +0200516 * However, if the string post-conversion does not matched the
517 * string post-removal of XSS, then it fails, as there was unwanted XSS
Pascal Krietec9c045a2011-04-05 14:50:41 -0400518 * code found and removed/changed during processing.
Derek Jonese701d762010-03-02 18:17:01 -0600519 */
Derek Jonese701d762010-03-02 18:17:01 -0600520 if ($is_image === TRUE)
521 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200522 return ($str === $converted_string);
Derek Jonese701d762010-03-02 18:17:01 -0600523 }
Barry Mienydd671972010-10-04 16:33:58 +0200524
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200525 log_message('debug', 'XSS Filtering completed');
Derek Jonese701d762010-03-02 18:17:01 -0600526 return $str;
527 }
528
529 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200530
Derek Jonese701d762010-03-02 18:17:01 -0600531 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200532 * XSS Hash
Derek Jonese701d762010-03-02 18:17:01 -0600533 *
Andrey Andreev64354102012-10-28 14:16:02 +0200534 * Generates the XSS hash if needed and returns it.
535 *
536 * @see CI_Security::$_xss_hash
537 * @return string XSS hash
Derek Jonese701d762010-03-02 18:17:01 -0600538 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500539 public function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200540 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300541 if ($this->_xss_hash === NULL)
Derek Jonese701d762010-03-02 18:17:01 -0600542 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300543 $rand = $this->get_random_bytes(16);
544 $this->_xss_hash = ($rand === FALSE)
545 ? md5(uniqid(mt_rand(), TRUE))
546 : bin2hex($rand);
Derek Jonese701d762010-03-02 18:17:01 -0600547 }
548
Pascal Krietec9c045a2011-04-05 14:50:41 -0400549 return $this->_xss_hash;
Derek Jonese701d762010-03-02 18:17:01 -0600550 }
551
552 // --------------------------------------------------------------------
553
554 /**
Andrey Andreev487ccc92014-08-27 16:26:23 +0300555 * Get random bytes
556 *
557 * @param int $length Output length
558 * @return string
559 */
560 public function get_random_bytes($length)
561 {
Andrey Andreevefe33a22014-08-28 09:53:44 +0300562 if (empty($length) OR ! ctype_digit((string) $length))
Andrey Andreev487ccc92014-08-27 16:26:23 +0300563 {
564 return FALSE;
565 }
566
567 // Unfortunately, none of the following PRNGs is guaranteed to exist ...
Andrey Andreev607d5e22014-09-17 14:54:05 +0300568 if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
Andrey Andreev487ccc92014-08-27 16:26:23 +0300569 {
570 return $output;
571 }
572
573
574 if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
575 {
Andrey Andreeve4b9cd62014-10-02 02:19:06 +0300576 // Try not to waste entropy ...
577 is_php('5.4') && stream_set_chunk_size($fp, $length);
Andrey Andreev487ccc92014-08-27 16:26:23 +0300578 $output = fread($fp, $length);
579 fclose($fp);
580 if ($output !== FALSE)
581 {
582 return $output;
583 }
584 }
585
586 if (function_exists('openssl_random_pseudo_bytes'))
587 {
588 return openssl_random_pseudo_bytes($length);
589 }
590
591 return FALSE;
592 }
593
594 // --------------------------------------------------------------------
595
596 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500597 * HTML Entities Decode
598 *
Andrey Andreev64354102012-10-28 14:16:02 +0200599 * A replacement for html_entity_decode()
Derek Jonesa0911472010-03-30 10:33:09 -0500600 *
Pascal Krietec38e3b62011-11-14 13:55:00 -0500601 * The reason we are not using html_entity_decode() by itself is because
602 * while it is not technically correct to leave out the semicolon
603 * at the end of an entity most browsers will still interpret the entity
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200604 * correctly. html_entity_decode() does not convert entities without
Pascal Krietec38e3b62011-11-14 13:55:00 -0500605 * semicolons, so we are left with our own little solution here. Bummer.
Derek Jonesa0911472010-03-30 10:33:09 -0500606 *
Andrey Andreev64354102012-10-28 14:16:02 +0200607 * @link http://php.net/html-entity-decode
608 *
609 * @param string $str Input
610 * @param string $charset Character set
Derek Jonesa0911472010-03-30 10:33:09 -0500611 * @return string
612 */
freewil8cc0cfe2011-08-27 21:53:00 -0400613 public function entity_decode($str, $charset = NULL)
Derek Jonesa0911472010-03-30 10:33:09 -0500614 {
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300615 if (strpos($str, '&') === FALSE)
freewil5c9b0d12011-08-28 12:15:23 -0400616 {
617 return $str;
618 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300619
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300620 static $_entities;
621
622 isset($charset) OR $charset = $this->charset;
623 $flag = is_php('5.4')
624 ? ENT_COMPAT | ENT_HTML5
625 : ENT_COMPAT;
Barry Mienydd671972010-10-04 16:33:58 +0200626
brian978638a9d22012-12-18 13:25:54 +0200627 do
628 {
Andrey Andreeve7a2aa02014-03-18 18:44:53 +0200629 $str_compare = $str;
brian97807ccbe52012-12-11 20:24:12 +0200630
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300631 // Decode standard entities, avoiding false positives
632 if ($c = preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
633 {
634 if ( ! isset($_entities))
635 {
636 $_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
637
638 // If we're not on PHP 5.4+, add the possibly dangerous HTML 5
639 // entities to the array manually
640 if ($flag === ENT_COMPAT)
641 {
642 $_entities[':'] = '&colon;';
643 $_entities['('] = '&lpar;';
644 $_entities[')'] = '&rpar';
645 $_entities["\n"] = '&newline;';
646 $_entities["\t"] = '&tab;';
647 }
648 }
649
650 $replace = array();
651 $matches = array_unique(array_map('strtolower', $matches[0]));
652 for ($i = 0; $i < $c; $i++)
653 {
654 if (($char = array_search($matches[$i].';', $_entities, TRUE)) !== FALSE)
655 {
Graham Campbelleb93e732014-06-29 14:05:49 +0100656 $replace[$matches[$i]] = $char;
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300657 }
658 }
659
660 $str = str_ireplace(array_keys($replace), array_values($replace), $str);
661 }
662
663 // Decode numeric & UTF16 two byte entities
664 $str = html_entity_decode(
665 preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;]))|(?:0*\d{2,4}(?![0-9;])))/iS', '$1;', $str),
666 $flag,
667 $charset
668 );
brian978638a9d22012-12-18 13:25:54 +0200669 }
Andrey Andreeve7a2aa02014-03-18 18:44:53 +0200670 while ($str_compare !== $str);
brian978638a9d22012-12-18 13:25:54 +0200671 return $str;
Derek Jonesa0911472010-03-30 10:33:09 -0500672 }
Barry Mienydd671972010-10-04 16:33:58 +0200673
Derek Jonesa0911472010-03-30 10:33:09 -0500674 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200675
Derek Jonesa0911472010-03-30 10:33:09 -0500676 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200677 * Sanitize Filename
Derek Jonese701d762010-03-02 18:17:01 -0600678 *
Andrey Andreev64354102012-10-28 14:16:02 +0200679 * @param string $str Input file name
680 * @param bool $relative_path Whether to preserve paths
Derek Jonese701d762010-03-02 18:17:01 -0600681 * @return string
682 */
Hunter Wu8df33522013-08-03 22:36:05 +0800683 public function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600684 {
Hunter Wu4495cc72013-08-04 12:31:52 +0800685 $bad = $this->filename_bad_chars;
David Behler07b53422011-08-15 00:25:06 +0200686
Derek Jones2ef37592010-10-06 17:51:59 -0500687 if ( ! $relative_path)
688 {
689 $bad[] = './';
690 $bad[] = '/';
691 }
Derek Jonese701d762010-03-02 18:17:01 -0600692
Pascal Krietec9c045a2011-04-05 14:50:41 -0400693 $str = remove_invisible_characters($str, FALSE);
Andrey Andreev7e559772013-01-29 15:38:33 +0200694
695 do
696 {
697 $old = $str;
698 $str = str_replace($bad, '', $str);
699 }
700 while ($old !== $str);
701
702 return stripslashes($str);
Derek Jonese701d762010-03-02 18:17:01 -0600703 }
704
Pascal Krietec9c045a2011-04-05 14:50:41 -0400705 // ----------------------------------------------------------------
706
707 /**
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300708 * Strip Image Tags
709 *
Andrey Andreev64354102012-10-28 14:16:02 +0200710 * @param string $str
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300711 * @return string
712 */
713 public function strip_image_tags($str)
714 {
David Cox Jr46e77e02013-10-03 16:56:04 -0400715 return preg_replace(array('#<img[\s/]+.*?src\s*=\s*["\'](.+?)["\'].*?\>#', '#<img[\s/]+.*?src\s*=\s*(.+?).*?\>#'), '\\1', $str);
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300716 }
717
718 // ----------------------------------------------------------------
719
720 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400721 * Compact Exploded Words
722 *
Andrey Andreev64354102012-10-28 14:16:02 +0200723 * Callback method for xss_clean() to remove whitespace from
724 * things like 'j a v a s c r i p t'.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400725 *
Andrey Andreev64354102012-10-28 14:16:02 +0200726 * @used-by CI_Security::xss_clean()
727 * @param array $matches
Timothy Warrenad475052012-04-19 13:21:06 -0400728 * @return string
Pascal Krietec9c045a2011-04-05 14:50:41 -0400729 */
730 protected function _compact_exploded_words($matches)
731 {
732 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
733 }
734
735 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200736
Timothy Warrenad475052012-04-19 13:21:06 -0400737 /**
738 * Remove Evil HTML Attributes (like event handlers and style)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400739 *
740 * It removes the evil attribute and either:
Pascal Krietec9c045a2011-04-05 14:50:41 -0400741 *
Andrey Andreev64354102012-10-28 14:16:02 +0200742 * - Everything up until a space. For example, everything between the pipes:
743 *
744 * <code>
745 * <a |style=document.write('hello');alert('world');| class=link>
746 * </code>
747 *
748 * - Everything inside the quotes. For example, everything between the pipes:
749 *
750 * <code>
751 * <a |style="document.write('hello'); alert('world');"| class="link">
752 * </code>
753 *
754 * @param string $str The string to check
755 * @param bool $is_image Whether the input is an image
756 * @return string The string with the evil attributes removed
Pascal Krietec9c045a2011-04-05 14:50:41 -0400757 */
758 protected function _remove_evil_attributes($str, $is_image)
759 {
Andrey Andreevadf3bde2014-01-25 16:59:17 +0200760 $evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction', 'form', 'xlink:href');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400761
762 if ($is_image === TRUE)
763 {
764 /*
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200765 * Adobe Photoshop puts XML metadata into JFIF images,
Pascal Krietec9c045a2011-04-05 14:50:41 -0400766 * including namespacing, so we have to allow this for images.
767 */
768 unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
769 }
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200770
Pascal Krietec9c045a2011-04-05 14:50:41 -0400771 do {
Pascal Krietec38e3b62011-11-14 13:55:00 -0500772 $count = 0;
773 $attribs = array();
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200774
brian978160c7d12012-12-03 21:18:20 +0200775 // find occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes)
Andrey Andreevdbd999f2014-01-25 22:55:21 +0200776 preg_match_all('/(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*(\042|\047)([^\\2]*?)(\\2)/is', $str, $matches, PREG_SET_ORDER);
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200777
Pascal Krietec38e3b62011-11-14 13:55:00 -0500778 foreach ($matches as $attr)
779 {
780 $attribs[] = preg_quote($attr[0], '/');
781 }
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200782
brian978160c7d12012-12-03 21:18:20 +0200783 // find occurrences of illegal attribute strings without quotes
Andrey Andreevdbd999f2014-01-25 22:55:21 +0200784 preg_match_all('/(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*([^\s>]*)/is', $str, $matches, PREG_SET_ORDER);
David Behler07b53422011-08-15 00:25:06 +0200785
Pascal Krietec38e3b62011-11-14 13:55:00 -0500786 foreach ($matches as $attr)
787 {
788 $attribs[] = preg_quote($attr[0], '/');
789 }
790
791 // replace illegal attribute strings that are inside an html tag
792 if (count($attribs) > 0)
793 {
brian978160c7d12012-12-03 21:18:20 +0200794 $str = preg_replace('/(<?)(\/?[^><]+?)([^A-Za-z<>\-])(.*?)('.implode('|', $attribs).')(.*?)([\s><]?)([><]*)/i', '$1$2 $4$6$7$8', $str, -1, $count);
Pascal Krietec38e3b62011-11-14 13:55:00 -0500795 }
Andrey Andreev72ed4c32012-12-19 17:07:54 +0200796 }
797 while ($count);
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200798
Pascal Krietec9c045a2011-04-05 14:50:41 -0400799 return $str;
800 }
David Behler07b53422011-08-15 00:25:06 +0200801
Pascal Krietec9c045a2011-04-05 14:50:41 -0400802 // --------------------------------------------------------------------
803
804 /**
805 * Sanitize Naughty HTML
806 *
Andrey Andreev64354102012-10-28 14:16:02 +0200807 * Callback method for xss_clean() to remove naughty HTML elements.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400808 *
Andrey Andreev64354102012-10-28 14:16:02 +0200809 * @used-by CI_Security::xss_clean()
810 * @param array $matches
Pascal Krietec9c045a2011-04-05 14:50:41 -0400811 * @return string
812 */
813 protected function _sanitize_naughty_html($matches)
814 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200815 return '&lt;'.$matches[1].$matches[2].$matches[3] // encode opening brace
816 // encode captured opening or closing brace to prevent recursive vectors:
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200817 .str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400818 }
819
820 // --------------------------------------------------------------------
821
822 /**
823 * JS Link Removal
824 *
Andrey Andreev64354102012-10-28 14:16:02 +0200825 * Callback method for xss_clean() to sanitize links.
826 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400827 * This limits the PCRE backtracks, making it more performance friendly
828 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200829 * PHP 5.2+ on link-heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400830 *
Andrey Andreev64354102012-10-28 14:16:02 +0200831 * @used-by CI_Security::xss_clean()
832 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400833 * @return string
834 */
835 protected function _js_link_removal($match)
836 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200837 return str_replace($match[1],
Andrey Andreeva30a7172014-02-10 09:17:25 +0200838 preg_replace('#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200839 '',
840 $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
841 ),
842 $match[0]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400843 }
844
845 // --------------------------------------------------------------------
846
847 /**
848 * JS Image Removal
849 *
Andrey Andreev64354102012-10-28 14:16:02 +0200850 * Callback method for xss_clean() to sanitize image tags.
851 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400852 * This limits the PCRE backtracks, making it more performance friendly
853 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200854 * PHP 5.2+ on image tag heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400855 *
Andrey Andreev64354102012-10-28 14:16:02 +0200856 * @used-by CI_Security::xss_clean()
857 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400858 * @return string
859 */
860 protected function _js_img_removal($match)
861 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200862 return str_replace($match[1],
Andrey Andreeva30a7172014-02-10 09:17:25 +0200863 preg_replace('#src=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200864 '',
865 $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
866 ),
867 $match[0]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400868 }
869
870 // --------------------------------------------------------------------
871
872 /**
873 * Attribute Conversion
874 *
Andrey Andreev64354102012-10-28 14:16:02 +0200875 * @used-by CI_Security::xss_clean()
876 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400877 * @return string
878 */
879 protected function _convert_attribute($match)
880 {
881 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
882 }
883
884 // --------------------------------------------------------------------
885
886 /**
887 * Filter Attributes
888 *
Andrey Andreev64354102012-10-28 14:16:02 +0200889 * Filters tag attributes for consistency and safety.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400890 *
Andrey Andreev64354102012-10-28 14:16:02 +0200891 * @used-by CI_Security::_js_img_removal()
892 * @used-by CI_Security::_js_link_removal()
893 * @param string $str
Pascal Krietec9c045a2011-04-05 14:50:41 -0400894 * @return string
895 */
896 protected function _filter_attributes($str)
897 {
898 $out = '';
Pascal Krietec9c045a2011-04-05 14:50:41 -0400899 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
900 {
901 foreach ($matches[0] as $match)
902 {
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200903 $out .= preg_replace('#/\*.*?\*/#s', '', $match);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400904 }
905 }
906
907 return $out;
908 }
909
910 // --------------------------------------------------------------------
911
912 /**
913 * HTML Entity Decode Callback
914 *
Andrey Andreev64354102012-10-28 14:16:02 +0200915 * @used-by CI_Security::xss_clean()
916 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400917 * @return string
918 */
919 protected function _decode_entity($match)
920 {
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300921 // Protect GET variables in URLs
922 // 901119URL5918AMP18930PROTECT8198
923 $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
924
925 // Decode, then un-protect URL GET vars
926 return str_replace(
927 $this->xss_hash(),
928 '&',
929 $this->entity_decode($match, $this->charset)
Andrey Andreevc67c3fb2014-01-22 13:26:00 +0200930 );
Pascal Krietec9c045a2011-04-05 14:50:41 -0400931 }
932
933 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200934
Pascal Krietec9c045a2011-04-05 14:50:41 -0400935 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400936 * Do Never Allowed
937 *
Andrey Andreev64354102012-10-28 14:16:02 +0200938 * @used-by CI_Security::xss_clean()
Pascal Krietec9c045a2011-04-05 14:50:41 -0400939 * @param string
940 * @return string
941 */
942 protected function _do_never_allowed($str)
943 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200944 $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400945
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200946 foreach ($this->_never_allowed_regex as $regex)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400947 {
Wes Baker5335bc32012-04-24 15:17:14 -0400948 $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400949 }
David Behler07b53422011-08-15 00:25:06 +0200950
Pascal Krietec9c045a2011-04-05 14:50:41 -0400951 return $str;
952 }
953
954 // --------------------------------------------------------------------
955
956 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200957 * Set CSRF Hash and Cookie
Pascal Krietec9c045a2011-04-05 14:50:41 -0400958 *
959 * @return string
960 */
961 protected function _csrf_set_hash()
962 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300963 if ($this->_csrf_hash === NULL)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400964 {
vlakoff3a3d5f62013-10-17 22:22:16 +0200965 // If the cookie exists we will use its value.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400966 // We don't necessarily want to regenerate it with
David Behler07b53422011-08-15 00:25:06 +0200967 // each page load since a page could contain embedded
Pascal Krietec9c045a2011-04-05 14:50:41 -0400968 // sub-pages causing this feature to fail
David Behler07b53422011-08-15 00:25:06 +0200969 if (isset($_COOKIE[$this->_csrf_cookie_name]) &&
Alexander Hofstedee2c374f2012-05-17 00:28:08 +0200970 preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400971 {
972 return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
973 }
David Behler07b53422011-08-15 00:25:06 +0200974
Andrey Andreev487ccc92014-08-27 16:26:23 +0300975 $rand = $this->get_random_bytes(16);
976 $this->_csrf_hash = ($rand === FALSE)
977 ? md5(uniqid(mt_rand(), TRUE))
978 : bin2hex($rand);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400979 }
980
981 return $this->_csrf_hash;
982 }
983
Derek Jonese701d762010-03-02 18:17:01 -0600984}
Derek Jonese701d762010-03-02 18:17:01 -0600985
986/* End of file Security.php */
Andrey Andreev6c520962014-08-18 12:24:42 +0300987/* Location: ./system/core/Security.php */