blob: 2bf0f628417d579355c853269cb6615f41b12dfa [file] [log] [blame]
Andrey Andreevc5536aa2012-11-01 17:33:58 +02001<?php
Derek Jonese701d762010-03-02 18:17:01 -06002/**
3 * CodeIgniter
4 *
Andrey Andreevfe9309d2015-01-09 17:48:58 +02005 * An open source application development framework for PHP
Derek Jonese701d762010-03-02 18:17:01 -06006 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +02007 * This content is released under the MIT License (MIT)
Andrey Andreevbb488dc2012-01-07 23:35:16 +02008 *
Andrey Andreevfe9309d2015-01-09 17:48:58 +02009 * Copyright (c) 2014 - 2015, British Columbia Institute of Technology
Andrey Andreevbb488dc2012-01-07 23:35:16 +020010 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020011 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
Derek Jonesf4a4bd82011-10-20 12:18:42 -050017 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020018 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 *
29 * @package CodeIgniter
30 * @author EllisLab Dev Team
darwinel871754a2014-02-11 17:34:57 +010031 * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/)
Andrey Andreevfe9309d2015-01-09 17:48:58 +020032 * @copyright Copyright (c) 2014 - 2015, British Columbia Institute of Technology (http://bcit.ca/)
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020033 * @license http://opensource.org/licenses/MIT MIT License
34 * @link http://codeigniter.com
35 * @since Version 1.0.0
Derek Jonese701d762010-03-02 18:17:01 -060036 * @filesource
37 */
Andrey Andreevc5536aa2012-11-01 17:33:58 +020038defined('BASEPATH') OR exit('No direct script access allowed');
Derek Jonese701d762010-03-02 18:17:01 -060039
Derek Jonese701d762010-03-02 18:17:01 -060040/**
41 * Security Class
42 *
43 * @package CodeIgniter
44 * @subpackage Libraries
45 * @category Security
Derek Jonesf4a4bd82011-10-20 12:18:42 -050046 * @author EllisLab Dev Team
Pascal Krietec9c045a2011-04-05 14:50:41 -040047 * @link http://codeigniter.com/user_guide/libraries/security.html
Derek Jonese701d762010-03-02 18:17:01 -060048 */
49class CI_Security {
Barry Mienydd671972010-10-04 16:33:58 +020050
David Behler07b53422011-08-15 00:25:06 +020051 /**
Hunter Wua8d6d3b2013-08-03 23:17:45 +080052 * List of sanitize filename strings
53 *
54 * @var array
55 */
Hunter Wu4495cc72013-08-04 12:31:52 +080056 public $filename_bad_chars = array(
Hunter Wua8d6d3b2013-08-03 23:17:45 +080057 '../', '<!--', '-->', '<', '>',
58 "'", '"', '&', '$', '#',
59 '{', '}', '[', ']', '=',
60 ';', '?', '%20', '%22',
61 '%3c', // <
62 '%253c', // <
63 '%3e', // >
64 '%0e', // >
65 '%28', // (
66 '%29', // )
67 '%2528', // (
68 '%26', // &
69 '%24', // $
70 '%3f', // ?
71 '%3b', // ;
72 '%3d' // =
73 );
74
75 /**
Andrey Andreev487d1ae2014-05-23 14:41:32 +030076 * Character set
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020077 *
Andrey Andreev487d1ae2014-05-23 14:41:32 +030078 * Will be overriden by the constructor.
79 *
80 * @var string
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020081 */
Andrey Andreev487d1ae2014-05-23 14:41:32 +030082 public $charset = 'UTF-8';
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020083
84 /**
Andrey Andreev64354102012-10-28 14:16:02 +020085 * XSS Hash
David Behler07b53422011-08-15 00:25:06 +020086 *
Andrey Andreev64354102012-10-28 14:16:02 +020087 * Random Hash for protecting URLs.
88 *
89 * @var string
David Behler07b53422011-08-15 00:25:06 +020090 */
Andrey Andreev487ccc92014-08-27 16:26:23 +030091 protected $_xss_hash;
Andrey Andreev3d113bd2011-10-05 00:03:20 +030092
David Behler07b53422011-08-15 00:25:06 +020093 /**
Andrey Andreev64354102012-10-28 14:16:02 +020094 * CSRF Hash
David Behler07b53422011-08-15 00:25:06 +020095 *
Andrey Andreev64354102012-10-28 14:16:02 +020096 * Random hash for Cross Site Request Forgery protection cookie
97 *
98 * @var string
David Behler07b53422011-08-15 00:25:06 +020099 */
Andrey Andreev487ccc92014-08-27 16:26:23 +0300100 protected $_csrf_hash;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300101
David Behler07b53422011-08-15 00:25:06 +0200102 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200103 * CSRF Expire time
David Behler07b53422011-08-15 00:25:06 +0200104 *
Andrey Andreev64354102012-10-28 14:16:02 +0200105 * Expiration time for Cross Site Request Forgery protection cookie.
106 * Defaults to two hours (in seconds).
107 *
108 * @var int
David Behler07b53422011-08-15 00:25:06 +0200109 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400110 protected $_csrf_expire = 7200;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300111
David Behler07b53422011-08-15 00:25:06 +0200112 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200113 * CSRF Token name
David Behler07b53422011-08-15 00:25:06 +0200114 *
Andrey Andreev64354102012-10-28 14:16:02 +0200115 * Token name for Cross Site Request Forgery protection cookie.
116 *
117 * @var string
David Behler07b53422011-08-15 00:25:06 +0200118 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400119 protected $_csrf_token_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300120
David Behler07b53422011-08-15 00:25:06 +0200121 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200122 * CSRF Cookie name
David Behler07b53422011-08-15 00:25:06 +0200123 *
Andrey Andreev64354102012-10-28 14:16:02 +0200124 * Cookie name for Cross Site Request Forgery protection cookie.
125 *
126 * @var string
David Behler07b53422011-08-15 00:25:06 +0200127 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400128 protected $_csrf_cookie_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300129
David Behler07b53422011-08-15 00:25:06 +0200130 /**
131 * List of never allowed strings
132 *
Andrey Andreev64354102012-10-28 14:16:02 +0200133 * @var array
David Behler07b53422011-08-15 00:25:06 +0200134 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400135 protected $_never_allowed_str = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400136 'document.cookie' => '[removed]',
137 'document.write' => '[removed]',
138 '.parentNode' => '[removed]',
139 '.innerHTML' => '[removed]',
Timothy Warren40403d22012-04-19 16:38:50 -0400140 '-moz-binding' => '[removed]',
141 '<!--' => '&lt;!--',
142 '-->' => '--&gt;',
143 '<![CDATA[' => '&lt;![CDATA[',
144 '<comment>' => '&lt;comment&gt;'
145 );
Derek Jonese701d762010-03-02 18:17:01 -0600146
David Behler07b53422011-08-15 00:25:06 +0200147 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200148 * List of never allowed regex replacements
David Behler07b53422011-08-15 00:25:06 +0200149 *
Andrey Andreev64354102012-10-28 14:16:02 +0200150 * @var array
David Behler07b53422011-08-15 00:25:06 +0200151 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400152 protected $_never_allowed_regex = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400153 'javascript\s*:',
Andrey Andreev1bbc5642014-01-07 12:45:27 +0200154 '(document|(document\.)?window)\.(location|on\w*)',
Timothy Warren40403d22012-04-19 16:38:50 -0400155 'expression\s*(\(|&\#40;)', // CSS and IE
156 'vbscript\s*:', // IE, surprise!
Andrey Andreeva30a7172014-02-10 09:17:25 +0200157 'wscript\s*:', // IE
Andrey Andreevf7f9dca2014-02-10 12:41:00 +0200158 'jscript\s*:', // IE
Andrey Andreeva30a7172014-02-10 09:17:25 +0200159 'vbs\s*:', // IE
Andrey Andreev43568062014-01-21 23:52:31 +0200160 'Redirect\s+30\d',
Wes Bakerd3481352012-05-07 16:49:33 -0400161 "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
Timothy Warren40403d22012-04-19 16:38:50 -0400162 );
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300163
Timothy Warrenad475052012-04-19 13:21:06 -0400164 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200165 * Class constructor
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300166 *
167 * @return void
Timothy Warrenad475052012-04-19 13:21:06 -0400168 */
Greg Akera9263282010-11-10 15:26:43 -0600169 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -0600170 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200171 // Is CSRF protection enabled?
Andrey Andreevd444d442014-10-06 00:00:08 +0300172 if (config_item('csrf_protection'))
patworkef1a55a2011-04-09 13:04:06 +0200173 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200174 // CSRF config
175 foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
patworkef1a55a2011-04-09 13:04:06 +0200176 {
Andrey Andreev7cc3e992014-10-06 08:04:44 +0300177 if (NULL !== ($val = config_item($key)))
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200178 {
179 $this->{'_'.$key} = $val;
180 }
patworkef1a55a2011-04-09 13:04:06 +0200181 }
patworkef1a55a2011-04-09 13:04:06 +0200182
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200183 // Append application specific cookie prefix
Andrey Andreevd444d442014-10-06 00:00:08 +0300184 if ($cookie_prefix = config_item('cookie_prefix'))
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200185 {
Andrey Andreevd444d442014-10-06 00:00:08 +0300186 $this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200187 }
Derek Jonesb3f10a22010-07-25 19:11:26 -0500188
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200189 // Set the CSRF hash
190 $this->_csrf_set_hash();
191 }
Derek Allard958543a2010-07-22 14:10:26 -0400192
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300193 $this->charset = strtoupper(config_item('charset'));
194
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200195 log_message('debug', 'Security Class Initialized');
Derek Jonese701d762010-03-02 18:17:01 -0600196 }
197
198 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200199
Derek Jonese701d762010-03-02 18:17:01 -0600200 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200201 * CSRF Verify
Derek Jonese701d762010-03-02 18:17:01 -0600202 *
Andrew Podner4296a652012-12-17 07:51:15 -0500203 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600204 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500205 public function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -0400206 {
Andrey Andreev5d27c432012-03-08 12:01:52 +0200207 // If it's not a POST request we will set the CSRF cookie
208 if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
Derek Jonese701d762010-03-02 18:17:01 -0600209 {
210 return $this->csrf_set_cookie();
211 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300212
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100213 // Check if URI has been whitelisted from CSRF checks
214 if ($exclude_uris = config_item('csrf_exclude_uris'))
215 {
216 $uri = load_class('URI', 'core');
caseyh5ac7c772014-08-18 05:10:24 -0400217 foreach ($exclude_uris as $excluded)
218 {
Andrey Andreev6c520962014-08-18 12:24:42 +0300219 if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
220 {
221 return $this;
222 }
223 }
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100224 }
Derek Jonese701d762010-03-02 18:17:01 -0600225
226 // Do the tokens exist in both the _POST and _COOKIE arrays?
Andrey Andreevf795ab52012-10-24 21:28:25 +0300227 if ( ! isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
Alex Bilbieed944a32012-06-02 11:07:47 +0100228 OR $_POST[$this->_csrf_token_name] !== $_COOKIE[$this->_csrf_cookie_name]) // Do the tokens match?
Derek Jonese701d762010-03-02 18:17:01 -0600229 {
230 $this->csrf_show_error();
231 }
232
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200233 // We kill this since we're done and we don't want to polute the _POST array
Pascal Krietec9c045a2011-04-05 14:50:41 -0400234 unset($_POST[$this->_csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +0200235
RS712be25a62011-12-31 16:02:04 -0200236 // Regenerate on every submission?
237 if (config_item('csrf_regenerate'))
238 {
239 // Nothing should last forever
240 unset($_COOKIE[$this->_csrf_cookie_name]);
Andrey Andreev487ccc92014-08-27 16:26:23 +0300241 $this->_csrf_hash = NULL;
RS712be25a62011-12-31 16:02:04 -0200242 }
Andrey Andreev8a7d0782012-01-08 05:43:42 +0200243
Derek Jonesb3f10a22010-07-25 19:11:26 -0500244 $this->_csrf_set_hash();
245 $this->csrf_set_cookie();
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300246
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200247 log_message('debug', 'CSRF token verified');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400248 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600249 }
Barry Mienydd671972010-10-04 16:33:58 +0200250
Derek Jonese701d762010-03-02 18:17:01 -0600251 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200252
Derek Jonese701d762010-03-02 18:17:01 -0600253 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200254 * CSRF Set Cookie
Derek Jonese701d762010-03-02 18:17:01 -0600255 *
Taufan Aditya6c7526c2012-05-27 13:51:27 +0700256 * @codeCoverageIgnore
Andrew Podner4296a652012-12-17 07:51:15 -0500257 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600258 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500259 public function csrf_set_cookie()
Derek Jonese701d762010-03-02 18:17:01 -0600260 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400261 $expire = time() + $this->_csrf_expire;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300262 $secure_cookie = (bool) config_item('cookie_secure');
Derek Jonese701d762010-03-02 18:17:01 -0600263
Andrey Andreev3fb02672012-10-22 16:48:01 +0300264 if ($secure_cookie && ! is_https())
Derek Jonese701d762010-03-02 18:17:01 -0600265 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200266 return FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600267 }
Derek Allard958543a2010-07-22 14:10:26 -0400268
freewil4ad0fd82012-03-13 22:37:42 -0400269 setcookie(
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300270 $this->_csrf_cookie_name,
271 $this->_csrf_hash,
272 $expire,
273 config_item('cookie_path'),
274 config_item('cookie_domain'),
freewil4ad0fd82012-03-13 22:37:42 -0400275 $secure_cookie,
276 config_item('cookie_httponly')
277 );
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200278 log_message('debug', 'CRSF cookie Set');
David Behler07b53422011-08-15 00:25:06 +0200279
Pascal Krietec9c045a2011-04-05 14:50:41 -0400280 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600281 }
282
283 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200284
Derek Jonese701d762010-03-02 18:17:01 -0600285 /**
286 * Show CSRF Error
287 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400288 * @return void
Derek Jonese701d762010-03-02 18:17:01 -0600289 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500290 public function csrf_show_error()
Derek Jonese701d762010-03-02 18:17:01 -0600291 {
Kyle Valade05fcc092014-07-06 13:43:20 -0700292 show_error('The action you have requested is not allowed.', 403);
Derek Jonese701d762010-03-02 18:17:01 -0600293 }
294
295 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200296
Derek Jonese701d762010-03-02 18:17:01 -0600297 /**
David Behler07b53422011-08-15 00:25:06 +0200298 * Get CSRF Hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400299 *
Andrey Andreev64354102012-10-28 14:16:02 +0200300 * @see CI_Security::$_csrf_hash
301 * @return string CSRF hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400302 */
303 public function get_csrf_hash()
304 {
305 return $this->_csrf_hash;
306 }
307
308 // --------------------------------------------------------------------
309
310 /**
311 * Get CSRF Token Name
312 *
Andrey Andreev64354102012-10-28 14:16:02 +0200313 * @see CI_Security::$_csrf_token_name
314 * @return string CSRF token name
Pascal Krietec9c045a2011-04-05 14:50:41 -0400315 */
316 public function get_csrf_token_name()
317 {
318 return $this->_csrf_token_name;
319 }
320
321 // --------------------------------------------------------------------
322
323 /**
Derek Jonese701d762010-03-02 18:17:01 -0600324 * XSS Clean
325 *
326 * Sanitizes data so that Cross Site Scripting Hacks can be
Andrey Andreev64354102012-10-28 14:16:02 +0200327 * prevented. This method does a fair amount of work but
Derek Jonese701d762010-03-02 18:17:01 -0600328 * it is extremely thorough, designed to prevent even the
Derek Jones37f4b9c2011-07-01 17:56:50 -0500329 * most obscure XSS attempts. Nothing is ever 100% foolproof,
Derek Jonese701d762010-03-02 18:17:01 -0600330 * of course, but I haven't been able to get anything passed
331 * the filter.
332 *
Andrey Andreev64354102012-10-28 14:16:02 +0200333 * Note: Should only be used to deal with data upon submission.
334 * It's not something that should be used for general
335 * runtime processing.
Derek Jonese701d762010-03-02 18:17:01 -0600336 *
Andrey Andreev64354102012-10-28 14:16:02 +0200337 * @link http://channel.bitflux.ch/wiki/XSS_Prevention
338 * Based in part on some code and ideas from Bitflux.
Derek Jonese701d762010-03-02 18:17:01 -0600339 *
Andrey Andreev64354102012-10-28 14:16:02 +0200340 * @link http://ha.ckers.org/xss.html
341 * To help develop this script I used this great list of
342 * vulnerabilities along with a few other hacks I've
343 * harvested from examining vulnerabilities in other programs.
Derek Jonese701d762010-03-02 18:17:01 -0600344 *
Andrey Andreev64354102012-10-28 14:16:02 +0200345 * @param string|string[] $str Input data
346 * @param bool $is_image Whether the input is an image
Derek Jonese701d762010-03-02 18:17:01 -0600347 * @return string
348 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500349 public function xss_clean($str, $is_image = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600350 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200351 // Is the string an array?
Derek Jonese701d762010-03-02 18:17:01 -0600352 if (is_array($str))
353 {
354 while (list($key) = each($str))
355 {
356 $str[$key] = $this->xss_clean($str[$key]);
357 }
Barry Mienydd671972010-10-04 16:33:58 +0200358
Derek Jonese701d762010-03-02 18:17:01 -0600359 return $str;
360 }
361
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300362 // Remove Invisible Characters
363 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600364
365 /*
366 * URL Decode
367 *
368 * Just in case stuff like this is submitted:
369 *
370 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
371 *
372 * Note: Use rawurldecode() so it does not remove plus signs
Derek Jonese701d762010-03-02 18:17:01 -0600373 */
Andrey Andreev29e12642014-02-10 13:24:44 +0200374 do
375 {
376 $str = rawurldecode($str);
377 }
378 while (preg_match('/%[0-9a-f]{2,}/i', $str));
Barry Mienydd671972010-10-04 16:33:58 +0200379
Derek Jonese701d762010-03-02 18:17:01 -0600380 /*
Barry Mienydd671972010-10-04 16:33:58 +0200381 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600382 *
383 * This permits our tests below to work reliably.
384 * We only convert entities that are within tags since
385 * these are the ones that will pose security problems.
Derek Jonese701d762010-03-02 18:17:01 -0600386 */
Andrey Andreev9b8286c2014-08-05 11:46:57 +0300387 $str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
brian97807ccbe52012-12-11 20:24:12 +0200388 $str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600389
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200390 // Remove Invisible Characters Again!
Greg Aker757dda62010-04-14 19:06:19 -0500391 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200392
Derek Jonese701d762010-03-02 18:17:01 -0600393 /*
394 * Convert all tabs to spaces
395 *
396 * This prevents strings like this: ja vascript
397 * NOTE: we deal with spaces between characters later.
David Behler07b53422011-08-15 00:25:06 +0200398 * NOTE: preg_replace was found to be amazingly slow here on
Pascal Krietec9c045a2011-04-05 14:50:41 -0400399 * large blocks of data, so we use str_replace.
Derek Jonese701d762010-03-02 18:17:01 -0600400 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200401 $str = str_replace("\t", ' ', $str);
Barry Mienydd671972010-10-04 16:33:58 +0200402
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200403 // Capture converted string for later comparison
Derek Jonese701d762010-03-02 18:17:01 -0600404 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200405
Pascal Krietec9c045a2011-04-05 14:50:41 -0400406 // Remove Strings that are never allowed
407 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600408
409 /*
410 * Makes PHP tags safe
411 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400412 * Note: XML tags are inadvertently replaced too:
Derek Jonese701d762010-03-02 18:17:01 -0600413 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400414 * <?xml
Derek Jonese701d762010-03-02 18:17:01 -0600415 *
416 * But it doesn't seem to pose a problem.
Derek Jonese701d762010-03-02 18:17:01 -0600417 */
418 if ($is_image === TRUE)
419 {
David Behler07b53422011-08-15 00:25:06 +0200420 // Images have a tendency to have the PHP short opening and
421 // closing tags every so often so we skip those and only
Pascal Krietec9c045a2011-04-05 14:50:41 -0400422 // do the long opening tags.
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200423 $str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600424 }
425 else
426 {
Andrey Andreev838a9d62012-12-03 14:37:47 +0200427 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600428 }
Barry Mienydd671972010-10-04 16:33:58 +0200429
Derek Jonese701d762010-03-02 18:17:01 -0600430 /*
431 * Compact any exploded words
432 *
Derek Jones37f4b9c2011-07-01 17:56:50 -0500433 * This corrects words like: j a v a s c r i p t
Derek Jonese701d762010-03-02 18:17:01 -0600434 * These words are compacted back to their correct state.
Derek Jonese701d762010-03-02 18:17:01 -0600435 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400436 $words = array(
Andrey Andreeva30a7172014-02-10 09:17:25 +0200437 'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
438 'vbs', 'script', 'base64', 'applet', 'alert', 'document',
439 'write', 'cookie', 'window', 'confirm', 'prompt'
Timothy Warren40403d22012-04-19 16:38:50 -0400440 );
David Behler07b53422011-08-15 00:25:06 +0200441
Derek Jonese701d762010-03-02 18:17:01 -0600442 foreach ($words as $word)
443 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200444 $word = implode('\s*', str_split($word)).'\s*';
Derek Jonese701d762010-03-02 18:17:01 -0600445
446 // We only want to do this when it is followed by a non-word character
447 // That way valid stuff like "dealer to" does not become "dealerto"
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300448 $str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600449 }
Barry Mienydd671972010-10-04 16:33:58 +0200450
Derek Jonese701d762010-03-02 18:17:01 -0600451 /*
452 * Remove disallowed Javascript in links or img tags
Andrey Andreevf9a615a2014-09-28 20:24:06 +0300453 * We used to do some version comparisons and use of stripos(),
David Behler07b53422011-08-15 00:25:06 +0200454 * but it is dog slow compared to these simplified non-capturing
Pascal Krietec9c045a2011-04-05 14:50:41 -0400455 * preg_match(), especially if the pattern exists in the string
Andrey Andreev12445ca2014-01-25 01:55:52 +0200456 *
457 * Note: It was reported that not only space characters, but all in
458 * the following pattern can be parsed as separators between a tag name
459 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
460 * ... however, remove_invisible_characters() above already strips the
461 * hex-encoded ones, so we'll skip them below.
Derek Jonese701d762010-03-02 18:17:01 -0600462 */
463 do
464 {
465 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200466
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200467 if (preg_match('/<a/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600468 {
Andrey Andreev46d20722014-03-18 23:08:59 +0200469 $str = preg_replace_callback('#<a[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600470 }
Barry Mienydd671972010-10-04 16:33:58 +0200471
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200472 if (preg_match('/<img/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600473 {
Andrey Andreevebb3aa02014-03-18 19:18:19 +0200474 $str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600475 }
Barry Mienydd671972010-10-04 16:33:58 +0200476
vlakoffa81f60c2012-07-02 15:20:11 +0200477 if (preg_match('/script|xss/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600478 {
vlakoffa81f60c2012-07-02 15:20:11 +0200479 $str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600480 }
481 }
vlakoffa81f60c2012-07-02 15:20:11 +0200482 while ($original !== $str);
Derek Jonese701d762010-03-02 18:17:01 -0600483
484 unset($original);
485
Pascal Krietec9c045a2011-04-05 14:50:41 -0400486 // Remove evil attributes such as style, onclick and xmlns
487 $str = $this->_remove_evil_attributes($str, $is_image);
Barry Mienydd671972010-10-04 16:33:58 +0200488
Derek Jonese701d762010-03-02 18:17:01 -0600489 /*
490 * Sanitize naughty HTML elements
491 *
492 * If a tag containing any of the words in the list
493 * below is found, the tag gets converted to entities.
494 *
495 * So this: <blink>
496 * Becomes: &lt;blink&gt;
Derek Jonese701d762010-03-02 18:17:01 -0600497 */
Andrey Andreeva30a7172014-02-10 09:17:25 +0200498 $naughty = 'alert|prompt|confirm|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|button|select|isindex|layer|link|meta|keygen|object|plaintext|style|script|textarea|title|math|video|svg|xml|xss';
Derek Jonese701d762010-03-02 18:17:01 -0600499 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
500
501 /*
502 * Sanitize naughty scripting elements
503 *
504 * Similar to above, only instead of looking for
505 * tags it looks for PHP and JavaScript commands
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200506 * that are disallowed. Rather than removing the
Derek Jonese701d762010-03-02 18:17:01 -0600507 * code, it simply converts the parenthesis to entities
508 * rendering the code un-executable.
509 *
510 * For example: eval('some code')
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200511 * Becomes: eval&#40;'some code'&#41;
Derek Jonese701d762010-03-02 18:17:01 -0600512 */
Andrey Andreeva30a7172014-02-10 09:17:25 +0200513 $str = preg_replace('#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200514 '\\1\\2&#40;\\3&#41;',
515 $str);
Barry Mienydd671972010-10-04 16:33:58 +0200516
Pascal Krietec9c045a2011-04-05 14:50:41 -0400517 // Final clean up
518 // This adds a bit of extra precaution in case
519 // something got through the above filters
520 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600521
522 /*
Pascal Krietec9c045a2011-04-05 14:50:41 -0400523 * Images are Handled in a Special Way
David Behler07b53422011-08-15 00:25:06 +0200524 * - Essentially, we want to know that after all of the character
525 * conversion is done whether any unwanted, likely XSS, code was found.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400526 * If not, we return TRUE, as the image is clean.
David Behler07b53422011-08-15 00:25:06 +0200527 * However, if the string post-conversion does not matched the
528 * string post-removal of XSS, then it fails, as there was unwanted XSS
Pascal Krietec9c045a2011-04-05 14:50:41 -0400529 * code found and removed/changed during processing.
Derek Jonese701d762010-03-02 18:17:01 -0600530 */
Derek Jonese701d762010-03-02 18:17:01 -0600531 if ($is_image === TRUE)
532 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200533 return ($str === $converted_string);
Derek Jonese701d762010-03-02 18:17:01 -0600534 }
Barry Mienydd671972010-10-04 16:33:58 +0200535
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200536 log_message('debug', 'XSS Filtering completed');
Derek Jonese701d762010-03-02 18:17:01 -0600537 return $str;
538 }
539
540 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200541
Derek Jonese701d762010-03-02 18:17:01 -0600542 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200543 * XSS Hash
Derek Jonese701d762010-03-02 18:17:01 -0600544 *
Andrey Andreev64354102012-10-28 14:16:02 +0200545 * Generates the XSS hash if needed and returns it.
546 *
547 * @see CI_Security::$_xss_hash
548 * @return string XSS hash
Derek Jonese701d762010-03-02 18:17:01 -0600549 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500550 public function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200551 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300552 if ($this->_xss_hash === NULL)
Derek Jonese701d762010-03-02 18:17:01 -0600553 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300554 $rand = $this->get_random_bytes(16);
555 $this->_xss_hash = ($rand === FALSE)
556 ? md5(uniqid(mt_rand(), TRUE))
557 : bin2hex($rand);
Derek Jonese701d762010-03-02 18:17:01 -0600558 }
559
Pascal Krietec9c045a2011-04-05 14:50:41 -0400560 return $this->_xss_hash;
Derek Jonese701d762010-03-02 18:17:01 -0600561 }
562
563 // --------------------------------------------------------------------
564
565 /**
Andrey Andreev487ccc92014-08-27 16:26:23 +0300566 * Get random bytes
567 *
568 * @param int $length Output length
569 * @return string
570 */
571 public function get_random_bytes($length)
572 {
Andrey Andreevefe33a22014-08-28 09:53:44 +0300573 if (empty($length) OR ! ctype_digit((string) $length))
Andrey Andreev487ccc92014-08-27 16:26:23 +0300574 {
575 return FALSE;
576 }
577
578 // Unfortunately, none of the following PRNGs is guaranteed to exist ...
Andrey Andreev607d5e22014-09-17 14:54:05 +0300579 if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
Andrey Andreev487ccc92014-08-27 16:26:23 +0300580 {
581 return $output;
582 }
583
584
585 if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
586 {
Andrey Andreeve4b9cd62014-10-02 02:19:06 +0300587 // Try not to waste entropy ...
588 is_php('5.4') && stream_set_chunk_size($fp, $length);
Andrey Andreev487ccc92014-08-27 16:26:23 +0300589 $output = fread($fp, $length);
590 fclose($fp);
591 if ($output !== FALSE)
592 {
593 return $output;
594 }
595 }
596
597 if (function_exists('openssl_random_pseudo_bytes'))
598 {
599 return openssl_random_pseudo_bytes($length);
600 }
601
602 return FALSE;
603 }
604
605 // --------------------------------------------------------------------
606
607 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500608 * HTML Entities Decode
609 *
Andrey Andreev64354102012-10-28 14:16:02 +0200610 * A replacement for html_entity_decode()
Derek Jonesa0911472010-03-30 10:33:09 -0500611 *
Pascal Krietec38e3b62011-11-14 13:55:00 -0500612 * The reason we are not using html_entity_decode() by itself is because
613 * while it is not technically correct to leave out the semicolon
614 * at the end of an entity most browsers will still interpret the entity
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200615 * correctly. html_entity_decode() does not convert entities without
Pascal Krietec38e3b62011-11-14 13:55:00 -0500616 * semicolons, so we are left with our own little solution here. Bummer.
Derek Jonesa0911472010-03-30 10:33:09 -0500617 *
Andrey Andreev64354102012-10-28 14:16:02 +0200618 * @link http://php.net/html-entity-decode
619 *
620 * @param string $str Input
621 * @param string $charset Character set
Derek Jonesa0911472010-03-30 10:33:09 -0500622 * @return string
623 */
freewil8cc0cfe2011-08-27 21:53:00 -0400624 public function entity_decode($str, $charset = NULL)
Derek Jonesa0911472010-03-30 10:33:09 -0500625 {
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300626 if (strpos($str, '&') === FALSE)
freewil5c9b0d12011-08-28 12:15:23 -0400627 {
628 return $str;
629 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300630
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300631 static $_entities;
632
633 isset($charset) OR $charset = $this->charset;
634 $flag = is_php('5.4')
635 ? ENT_COMPAT | ENT_HTML5
636 : ENT_COMPAT;
Barry Mienydd671972010-10-04 16:33:58 +0200637
brian978638a9d22012-12-18 13:25:54 +0200638 do
639 {
Andrey Andreeve7a2aa02014-03-18 18:44:53 +0200640 $str_compare = $str;
brian97807ccbe52012-12-11 20:24:12 +0200641
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300642 // Decode standard entities, avoiding false positives
643 if ($c = preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
644 {
645 if ( ! isset($_entities))
646 {
Andrey Andreev93455e32015-01-09 16:15:45 +0200647 $_entities = array_map(
648 'strtolower',
649 is_php('5.3.4')
650 ? get_html_translation_table(HTML_ENTITIES, $flag, $charset)
651 : get_html_translation_table(HTML_ENTITIES, $flag)
652 );
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300653
654 // If we're not on PHP 5.4+, add the possibly dangerous HTML 5
655 // entities to the array manually
656 if ($flag === ENT_COMPAT)
657 {
658 $_entities[':'] = '&colon;';
659 $_entities['('] = '&lpar;';
660 $_entities[')'] = '&rpar';
661 $_entities["\n"] = '&newline;';
662 $_entities["\t"] = '&tab;';
663 }
664 }
665
666 $replace = array();
667 $matches = array_unique(array_map('strtolower', $matches[0]));
668 for ($i = 0; $i < $c; $i++)
669 {
670 if (($char = array_search($matches[$i].';', $_entities, TRUE)) !== FALSE)
671 {
Graham Campbelleb93e732014-06-29 14:05:49 +0100672 $replace[$matches[$i]] = $char;
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300673 }
674 }
675
676 $str = str_ireplace(array_keys($replace), array_values($replace), $str);
677 }
678
679 // Decode numeric & UTF16 two byte entities
680 $str = html_entity_decode(
warpcodebb177982014-12-16 11:29:53 +0000681 preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300682 $flag,
683 $charset
684 );
brian978638a9d22012-12-18 13:25:54 +0200685 }
Andrey Andreeve7a2aa02014-03-18 18:44:53 +0200686 while ($str_compare !== $str);
brian978638a9d22012-12-18 13:25:54 +0200687 return $str;
Derek Jonesa0911472010-03-30 10:33:09 -0500688 }
Barry Mienydd671972010-10-04 16:33:58 +0200689
Derek Jonesa0911472010-03-30 10:33:09 -0500690 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200691
Derek Jonesa0911472010-03-30 10:33:09 -0500692 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200693 * Sanitize Filename
Derek Jonese701d762010-03-02 18:17:01 -0600694 *
Andrey Andreev64354102012-10-28 14:16:02 +0200695 * @param string $str Input file name
696 * @param bool $relative_path Whether to preserve paths
Derek Jonese701d762010-03-02 18:17:01 -0600697 * @return string
698 */
Hunter Wu8df33522013-08-03 22:36:05 +0800699 public function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600700 {
Hunter Wu4495cc72013-08-04 12:31:52 +0800701 $bad = $this->filename_bad_chars;
David Behler07b53422011-08-15 00:25:06 +0200702
Derek Jones2ef37592010-10-06 17:51:59 -0500703 if ( ! $relative_path)
704 {
705 $bad[] = './';
706 $bad[] = '/';
707 }
Derek Jonese701d762010-03-02 18:17:01 -0600708
Pascal Krietec9c045a2011-04-05 14:50:41 -0400709 $str = remove_invisible_characters($str, FALSE);
Andrey Andreev7e559772013-01-29 15:38:33 +0200710
711 do
712 {
713 $old = $str;
714 $str = str_replace($bad, '', $str);
715 }
716 while ($old !== $str);
717
718 return stripslashes($str);
Derek Jonese701d762010-03-02 18:17:01 -0600719 }
720
Pascal Krietec9c045a2011-04-05 14:50:41 -0400721 // ----------------------------------------------------------------
722
723 /**
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300724 * Strip Image Tags
725 *
Andrey Andreev64354102012-10-28 14:16:02 +0200726 * @param string $str
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300727 * @return string
728 */
729 public function strip_image_tags($str)
730 {
David Cox Jr46e77e02013-10-03 16:56:04 -0400731 return preg_replace(array('#<img[\s/]+.*?src\s*=\s*["\'](.+?)["\'].*?\>#', '#<img[\s/]+.*?src\s*=\s*(.+?).*?\>#'), '\\1', $str);
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300732 }
733
734 // ----------------------------------------------------------------
735
736 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400737 * Compact Exploded Words
738 *
Andrey Andreev64354102012-10-28 14:16:02 +0200739 * Callback method for xss_clean() to remove whitespace from
740 * things like 'j a v a s c r i p t'.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400741 *
Andrey Andreev64354102012-10-28 14:16:02 +0200742 * @used-by CI_Security::xss_clean()
743 * @param array $matches
Timothy Warrenad475052012-04-19 13:21:06 -0400744 * @return string
Pascal Krietec9c045a2011-04-05 14:50:41 -0400745 */
746 protected function _compact_exploded_words($matches)
747 {
748 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
749 }
750
751 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200752
Timothy Warrenad475052012-04-19 13:21:06 -0400753 /**
754 * Remove Evil HTML Attributes (like event handlers and style)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400755 *
756 * It removes the evil attribute and either:
Pascal Krietec9c045a2011-04-05 14:50:41 -0400757 *
Andrey Andreev64354102012-10-28 14:16:02 +0200758 * - Everything up until a space. For example, everything between the pipes:
759 *
760 * <code>
761 * <a |style=document.write('hello');alert('world');| class=link>
762 * </code>
763 *
764 * - Everything inside the quotes. For example, everything between the pipes:
765 *
766 * <code>
767 * <a |style="document.write('hello'); alert('world');"| class="link">
768 * </code>
769 *
770 * @param string $str The string to check
771 * @param bool $is_image Whether the input is an image
772 * @return string The string with the evil attributes removed
Pascal Krietec9c045a2011-04-05 14:50:41 -0400773 */
774 protected function _remove_evil_attributes($str, $is_image)
775 {
Andrey Andreevadf3bde2014-01-25 16:59:17 +0200776 $evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction', 'form', 'xlink:href');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400777
778 if ($is_image === TRUE)
779 {
780 /*
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200781 * Adobe Photoshop puts XML metadata into JFIF images,
Pascal Krietec9c045a2011-04-05 14:50:41 -0400782 * including namespacing, so we have to allow this for images.
783 */
784 unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
785 }
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200786
Pascal Krietec9c045a2011-04-05 14:50:41 -0400787 do {
Pascal Krietec38e3b62011-11-14 13:55:00 -0500788 $count = 0;
789 $attribs = array();
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200790
brian978160c7d12012-12-03 21:18:20 +0200791 // find occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes)
Andrey Andreevdbd999f2014-01-25 22:55:21 +0200792 preg_match_all('/(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*(\042|\047)([^\\2]*?)(\\2)/is', $str, $matches, PREG_SET_ORDER);
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200793
Pascal Krietec38e3b62011-11-14 13:55:00 -0500794 foreach ($matches as $attr)
795 {
796 $attribs[] = preg_quote($attr[0], '/');
797 }
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200798
brian978160c7d12012-12-03 21:18:20 +0200799 // find occurrences of illegal attribute strings without quotes
Andrey Andreevdbd999f2014-01-25 22:55:21 +0200800 preg_match_all('/(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*([^\s>]*)/is', $str, $matches, PREG_SET_ORDER);
David Behler07b53422011-08-15 00:25:06 +0200801
Pascal Krietec38e3b62011-11-14 13:55:00 -0500802 foreach ($matches as $attr)
803 {
804 $attribs[] = preg_quote($attr[0], '/');
805 }
806
807 // replace illegal attribute strings that are inside an html tag
808 if (count($attribs) > 0)
809 {
brian978160c7d12012-12-03 21:18:20 +0200810 $str = preg_replace('/(<?)(\/?[^><]+?)([^A-Za-z<>\-])(.*?)('.implode('|', $attribs).')(.*?)([\s><]?)([><]*)/i', '$1$2 $4$6$7$8', $str, -1, $count);
Pascal Krietec38e3b62011-11-14 13:55:00 -0500811 }
Andrey Andreev72ed4c32012-12-19 17:07:54 +0200812 }
813 while ($count);
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200814
Pascal Krietec9c045a2011-04-05 14:50:41 -0400815 return $str;
816 }
David Behler07b53422011-08-15 00:25:06 +0200817
Pascal Krietec9c045a2011-04-05 14:50:41 -0400818 // --------------------------------------------------------------------
819
820 /**
821 * Sanitize Naughty HTML
822 *
Andrey Andreev64354102012-10-28 14:16:02 +0200823 * Callback method for xss_clean() to remove naughty HTML elements.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400824 *
Andrey Andreev64354102012-10-28 14:16:02 +0200825 * @used-by CI_Security::xss_clean()
826 * @param array $matches
Pascal Krietec9c045a2011-04-05 14:50:41 -0400827 * @return string
828 */
829 protected function _sanitize_naughty_html($matches)
830 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200831 return '&lt;'.$matches[1].$matches[2].$matches[3] // encode opening brace
832 // encode captured opening or closing brace to prevent recursive vectors:
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200833 .str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400834 }
835
836 // --------------------------------------------------------------------
837
838 /**
839 * JS Link Removal
840 *
Andrey Andreev64354102012-10-28 14:16:02 +0200841 * Callback method for xss_clean() to sanitize links.
842 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400843 * This limits the PCRE backtracks, making it more performance friendly
844 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200845 * PHP 5.2+ on link-heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400846 *
Andrey Andreev64354102012-10-28 14:16:02 +0200847 * @used-by CI_Security::xss_clean()
848 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400849 * @return string
850 */
851 protected function _js_link_removal($match)
852 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200853 return str_replace($match[1],
Andrey Andreeva30a7172014-02-10 09:17:25 +0200854 preg_replace('#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200855 '',
856 $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
857 ),
858 $match[0]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400859 }
860
861 // --------------------------------------------------------------------
862
863 /**
864 * JS Image Removal
865 *
Andrey Andreev64354102012-10-28 14:16:02 +0200866 * Callback method for xss_clean() to sanitize image tags.
867 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400868 * This limits the PCRE backtracks, making it more performance friendly
869 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200870 * PHP 5.2+ on image tag heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400871 *
Andrey Andreev64354102012-10-28 14:16:02 +0200872 * @used-by CI_Security::xss_clean()
873 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400874 * @return string
875 */
876 protected function _js_img_removal($match)
877 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200878 return str_replace($match[1],
Andrey Andreeva30a7172014-02-10 09:17:25 +0200879 preg_replace('#src=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200880 '',
881 $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
882 ),
883 $match[0]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400884 }
885
886 // --------------------------------------------------------------------
887
888 /**
889 * Attribute Conversion
890 *
Andrey Andreev64354102012-10-28 14:16:02 +0200891 * @used-by CI_Security::xss_clean()
892 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400893 * @return string
894 */
895 protected function _convert_attribute($match)
896 {
897 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
898 }
899
900 // --------------------------------------------------------------------
901
902 /**
903 * Filter Attributes
904 *
Andrey Andreev64354102012-10-28 14:16:02 +0200905 * Filters tag attributes for consistency and safety.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400906 *
Andrey Andreev64354102012-10-28 14:16:02 +0200907 * @used-by CI_Security::_js_img_removal()
908 * @used-by CI_Security::_js_link_removal()
909 * @param string $str
Pascal Krietec9c045a2011-04-05 14:50:41 -0400910 * @return string
911 */
912 protected function _filter_attributes($str)
913 {
914 $out = '';
Pascal Krietec9c045a2011-04-05 14:50:41 -0400915 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
916 {
917 foreach ($matches[0] as $match)
918 {
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200919 $out .= preg_replace('#/\*.*?\*/#s', '', $match);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400920 }
921 }
922
923 return $out;
924 }
925
926 // --------------------------------------------------------------------
927
928 /**
929 * HTML Entity Decode Callback
930 *
Andrey Andreev64354102012-10-28 14:16:02 +0200931 * @used-by CI_Security::xss_clean()
932 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400933 * @return string
934 */
935 protected function _decode_entity($match)
936 {
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300937 // Protect GET variables in URLs
938 // 901119URL5918AMP18930PROTECT8198
939 $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
940
941 // Decode, then un-protect URL GET vars
942 return str_replace(
943 $this->xss_hash(),
944 '&',
945 $this->entity_decode($match, $this->charset)
Andrey Andreevc67c3fb2014-01-22 13:26:00 +0200946 );
Pascal Krietec9c045a2011-04-05 14:50:41 -0400947 }
948
949 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200950
Pascal Krietec9c045a2011-04-05 14:50:41 -0400951 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400952 * Do Never Allowed
953 *
Andrey Andreev64354102012-10-28 14:16:02 +0200954 * @used-by CI_Security::xss_clean()
Pascal Krietec9c045a2011-04-05 14:50:41 -0400955 * @param string
956 * @return string
957 */
958 protected function _do_never_allowed($str)
959 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200960 $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400961
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200962 foreach ($this->_never_allowed_regex as $regex)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400963 {
Wes Baker5335bc32012-04-24 15:17:14 -0400964 $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400965 }
David Behler07b53422011-08-15 00:25:06 +0200966
Pascal Krietec9c045a2011-04-05 14:50:41 -0400967 return $str;
968 }
969
970 // --------------------------------------------------------------------
971
972 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200973 * Set CSRF Hash and Cookie
Pascal Krietec9c045a2011-04-05 14:50:41 -0400974 *
975 * @return string
976 */
977 protected function _csrf_set_hash()
978 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300979 if ($this->_csrf_hash === NULL)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400980 {
vlakoff3a3d5f62013-10-17 22:22:16 +0200981 // If the cookie exists we will use its value.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400982 // We don't necessarily want to regenerate it with
David Behler07b53422011-08-15 00:25:06 +0200983 // each page load since a page could contain embedded
Pascal Krietec9c045a2011-04-05 14:50:41 -0400984 // sub-pages causing this feature to fail
Andrey Andreev162b1a92014-12-08 10:59:51 +0200985 if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
986 && preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400987 {
988 return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
989 }
David Behler07b53422011-08-15 00:25:06 +0200990
Andrey Andreev487ccc92014-08-27 16:26:23 +0300991 $rand = $this->get_random_bytes(16);
992 $this->_csrf_hash = ($rand === FALSE)
993 ? md5(uniqid(mt_rand(), TRUE))
994 : bin2hex($rand);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400995 }
996
997 return $this->_csrf_hash;
998 }
999
Derek Jonese701d762010-03-02 18:17:01 -06001000}
Derek Jonese701d762010-03-02 18:17:01 -06001001
1002/* End of file Security.php */
Jason Taylore11657c2014-12-16 12:33:36 +00001003/* Location: ./system/core/Security.php */