blob: 3d04d5e12681145a82df4002efe4e4da52a1e89e [file] [log] [blame]
Andrey Andreevc5536aa2012-11-01 17:33:58 +02001<?php
Derek Jonese701d762010-03-02 18:17:01 -06002/**
3 * CodeIgniter
4 *
Phil Sturgeon07c1ac82012-03-09 17:03:37 +00005 * An open source application development framework for PHP 5.2.4 or newer
Derek Jonese701d762010-03-02 18:17:01 -06006 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05007 * NOTICE OF LICENSE
Andrey Andreevbb488dc2012-01-07 23:35:16 +02008 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -05009 * Licensed under the Open Software License version 3.0
Andrey Andreevbb488dc2012-01-07 23:35:16 +020010 *
Derek Jonesf4a4bd82011-10-20 12:18:42 -050011 * This source file is subject to the Open Software License (OSL 3.0) that is
12 * bundled with this package in the files license.txt / license.rst. It is
13 * also available through the world wide web at this URL:
14 * http://opensource.org/licenses/OSL-3.0
15 * If you did not receive a copy of the license and are unable to obtain it
16 * through the world wide web, please send an email to
17 * licensing@ellislab.com so we can send you a copy immediately.
18 *
Derek Jonese701d762010-03-02 18:17:01 -060019 * @package CodeIgniter
Derek Jonesf4a4bd82011-10-20 12:18:42 -050020 * @author EllisLab Dev Team
Andrey Andreev80500af2013-01-01 08:16:53 +020021 * @copyright Copyright (c) 2008 - 2013, EllisLab, Inc. (http://ellislab.com/)
Derek Jonesf4a4bd82011-10-20 12:18:42 -050022 * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
Derek Jonese701d762010-03-02 18:17:01 -060023 * @link http://codeigniter.com
24 * @since Version 1.0
25 * @filesource
26 */
Andrey Andreevc5536aa2012-11-01 17:33:58 +020027defined('BASEPATH') OR exit('No direct script access allowed');
Derek Jonese701d762010-03-02 18:17:01 -060028
Derek Jonese701d762010-03-02 18:17:01 -060029/**
30 * Security Class
31 *
32 * @package CodeIgniter
33 * @subpackage Libraries
34 * @category Security
Derek Jonesf4a4bd82011-10-20 12:18:42 -050035 * @author EllisLab Dev Team
Pascal Krietec9c045a2011-04-05 14:50:41 -040036 * @link http://codeigniter.com/user_guide/libraries/security.html
Derek Jonese701d762010-03-02 18:17:01 -060037 */
38class CI_Security {
Barry Mienydd671972010-10-04 16:33:58 +020039
David Behler07b53422011-08-15 00:25:06 +020040 /**
Hunter Wua8d6d3b2013-08-03 23:17:45 +080041 * List of sanitize filename strings
42 *
43 * @var array
44 */
45 public $sanitize_filename_str = array(
46 '../', '<!--', '-->', '<', '>',
47 "'", '"', '&', '$', '#',
48 '{', '}', '[', ']', '=',
49 ';', '?', '%20', '%22',
50 '%3c', // <
51 '%253c', // <
52 '%3e', // >
53 '%0e', // >
54 '%28', // (
55 '%29', // )
56 '%2528', // (
57 '%26', // &
58 '%24', // $
59 '%3f', // ?
60 '%3b', // ;
61 '%3d' // =
62 );
63
64 /**
Andrey Andreev64354102012-10-28 14:16:02 +020065 * XSS Hash
David Behler07b53422011-08-15 00:25:06 +020066 *
Andrey Andreev64354102012-10-28 14:16:02 +020067 * Random Hash for protecting URLs.
68 *
69 * @var string
David Behler07b53422011-08-15 00:25:06 +020070 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -040071 protected $_xss_hash = '';
Andrey Andreev3d113bd2011-10-05 00:03:20 +030072
David Behler07b53422011-08-15 00:25:06 +020073 /**
Andrey Andreev64354102012-10-28 14:16:02 +020074 * CSRF Hash
David Behler07b53422011-08-15 00:25:06 +020075 *
Andrey Andreev64354102012-10-28 14:16:02 +020076 * Random hash for Cross Site Request Forgery protection cookie
77 *
78 * @var string
David Behler07b53422011-08-15 00:25:06 +020079 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -040080 protected $_csrf_hash = '';
Andrey Andreev3d113bd2011-10-05 00:03:20 +030081
David Behler07b53422011-08-15 00:25:06 +020082 /**
Andrey Andreev64354102012-10-28 14:16:02 +020083 * CSRF Expire time
David Behler07b53422011-08-15 00:25:06 +020084 *
Andrey Andreev64354102012-10-28 14:16:02 +020085 * Expiration time for Cross Site Request Forgery protection cookie.
86 * Defaults to two hours (in seconds).
87 *
88 * @var int
David Behler07b53422011-08-15 00:25:06 +020089 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -040090 protected $_csrf_expire = 7200;
Andrey Andreev3d113bd2011-10-05 00:03:20 +030091
David Behler07b53422011-08-15 00:25:06 +020092 /**
Andrey Andreev64354102012-10-28 14:16:02 +020093 * CSRF Token name
David Behler07b53422011-08-15 00:25:06 +020094 *
Andrey Andreev64354102012-10-28 14:16:02 +020095 * Token name for Cross Site Request Forgery protection cookie.
96 *
97 * @var string
David Behler07b53422011-08-15 00:25:06 +020098 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -040099 protected $_csrf_token_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300100
David Behler07b53422011-08-15 00:25:06 +0200101 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200102 * CSRF Cookie name
David Behler07b53422011-08-15 00:25:06 +0200103 *
Andrey Andreev64354102012-10-28 14:16:02 +0200104 * Cookie name for Cross Site Request Forgery protection cookie.
105 *
106 * @var string
David Behler07b53422011-08-15 00:25:06 +0200107 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400108 protected $_csrf_cookie_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300109
David Behler07b53422011-08-15 00:25:06 +0200110 /**
111 * List of never allowed strings
112 *
Andrey Andreev64354102012-10-28 14:16:02 +0200113 * @var array
David Behler07b53422011-08-15 00:25:06 +0200114 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400115 protected $_never_allowed_str = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400116 'document.cookie' => '[removed]',
117 'document.write' => '[removed]',
118 '.parentNode' => '[removed]',
119 '.innerHTML' => '[removed]',
120 'window.location' => '[removed]',
121 '-moz-binding' => '[removed]',
122 '<!--' => '&lt;!--',
123 '-->' => '--&gt;',
124 '<![CDATA[' => '&lt;![CDATA[',
125 '<comment>' => '&lt;comment&gt;'
126 );
Derek Jonese701d762010-03-02 18:17:01 -0600127
David Behler07b53422011-08-15 00:25:06 +0200128 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200129 * List of never allowed regex replacements
David Behler07b53422011-08-15 00:25:06 +0200130 *
Andrey Andreev64354102012-10-28 14:16:02 +0200131 * @var array
David Behler07b53422011-08-15 00:25:06 +0200132 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400133 protected $_never_allowed_regex = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400134 'javascript\s*:',
135 'expression\s*(\(|&\#40;)', // CSS and IE
136 'vbscript\s*:', // IE, surprise!
Wes Bakerd3481352012-05-07 16:49:33 -0400137 'Redirect\s+302',
138 "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
Timothy Warren40403d22012-04-19 16:38:50 -0400139 );
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300140
Timothy Warrenad475052012-04-19 13:21:06 -0400141 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200142 * Class constructor
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300143 *
144 * @return void
Timothy Warrenad475052012-04-19 13:21:06 -0400145 */
Greg Akera9263282010-11-10 15:26:43 -0600146 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -0600147 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200148 // Is CSRF protection enabled?
149 if (config_item('csrf_protection') === TRUE)
patworkef1a55a2011-04-09 13:04:06 +0200150 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200151 // CSRF config
152 foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
patworkef1a55a2011-04-09 13:04:06 +0200153 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200154 if (FALSE !== ($val = config_item($key)))
155 {
156 $this->{'_'.$key} = $val;
157 }
patworkef1a55a2011-04-09 13:04:06 +0200158 }
patworkef1a55a2011-04-09 13:04:06 +0200159
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200160 // Append application specific cookie prefix
161 if (config_item('cookie_prefix'))
162 {
163 $this->_csrf_cookie_name = config_item('cookie_prefix').$this->_csrf_cookie_name;
164 }
Derek Jonesb3f10a22010-07-25 19:11:26 -0500165
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200166 // Set the CSRF hash
167 $this->_csrf_set_hash();
168 }
Derek Allard958543a2010-07-22 14:10:26 -0400169
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200170 log_message('debug', 'Security Class Initialized');
Derek Jonese701d762010-03-02 18:17:01 -0600171 }
172
173 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200174
Derek Jonese701d762010-03-02 18:17:01 -0600175 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200176 * CSRF Verify
Derek Jonese701d762010-03-02 18:17:01 -0600177 *
Andrew Podner4296a652012-12-17 07:51:15 -0500178 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600179 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500180 public function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -0400181 {
Andrey Andreev5d27c432012-03-08 12:01:52 +0200182 // If it's not a POST request we will set the CSRF cookie
183 if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
Derek Jonese701d762010-03-02 18:17:01 -0600184 {
185 return $this->csrf_set_cookie();
186 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300187
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100188 // Check if URI has been whitelisted from CSRF checks
189 if ($exclude_uris = config_item('csrf_exclude_uris'))
190 {
191 $uri = load_class('URI', 'core');
192 if (in_array($uri->uri_string(), $exclude_uris))
193 {
194 return $this;
195 }
196 }
Derek Jonese701d762010-03-02 18:17:01 -0600197
198 // Do the tokens exist in both the _POST and _COOKIE arrays?
Andrey Andreevf795ab52012-10-24 21:28:25 +0300199 if ( ! isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
Alex Bilbieed944a32012-06-02 11:07:47 +0100200 OR $_POST[$this->_csrf_token_name] !== $_COOKIE[$this->_csrf_cookie_name]) // Do the tokens match?
Derek Jonese701d762010-03-02 18:17:01 -0600201 {
202 $this->csrf_show_error();
203 }
204
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200205 // We kill this since we're done and we don't want to polute the _POST array
Pascal Krietec9c045a2011-04-05 14:50:41 -0400206 unset($_POST[$this->_csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +0200207
RS712be25a62011-12-31 16:02:04 -0200208 // Regenerate on every submission?
209 if (config_item('csrf_regenerate'))
210 {
211 // Nothing should last forever
212 unset($_COOKIE[$this->_csrf_cookie_name]);
213 $this->_csrf_hash = '';
214 }
Andrey Andreev8a7d0782012-01-08 05:43:42 +0200215
Derek Jonesb3f10a22010-07-25 19:11:26 -0500216 $this->_csrf_set_hash();
217 $this->csrf_set_cookie();
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300218
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200219 log_message('debug', 'CSRF token verified');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400220 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600221 }
Barry Mienydd671972010-10-04 16:33:58 +0200222
Derek Jonese701d762010-03-02 18:17:01 -0600223 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200224
Derek Jonese701d762010-03-02 18:17:01 -0600225 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200226 * CSRF Set Cookie
Derek Jonese701d762010-03-02 18:17:01 -0600227 *
Taufan Aditya6c7526c2012-05-27 13:51:27 +0700228 * @codeCoverageIgnore
Andrew Podner4296a652012-12-17 07:51:15 -0500229 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600230 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500231 public function csrf_set_cookie()
Derek Jonese701d762010-03-02 18:17:01 -0600232 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400233 $expire = time() + $this->_csrf_expire;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300234 $secure_cookie = (bool) config_item('cookie_secure');
Derek Jonese701d762010-03-02 18:17:01 -0600235
Andrey Andreev3fb02672012-10-22 16:48:01 +0300236 if ($secure_cookie && ! is_https())
Derek Jonese701d762010-03-02 18:17:01 -0600237 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200238 return FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600239 }
Derek Allard958543a2010-07-22 14:10:26 -0400240
freewil4ad0fd82012-03-13 22:37:42 -0400241 setcookie(
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300242 $this->_csrf_cookie_name,
243 $this->_csrf_hash,
244 $expire,
245 config_item('cookie_path'),
246 config_item('cookie_domain'),
freewil4ad0fd82012-03-13 22:37:42 -0400247 $secure_cookie,
248 config_item('cookie_httponly')
249 );
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200250 log_message('debug', 'CRSF cookie Set');
David Behler07b53422011-08-15 00:25:06 +0200251
Pascal Krietec9c045a2011-04-05 14:50:41 -0400252 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600253 }
254
255 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200256
Derek Jonese701d762010-03-02 18:17:01 -0600257 /**
258 * Show CSRF Error
259 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400260 * @return void
Derek Jonese701d762010-03-02 18:17:01 -0600261 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500262 public function csrf_show_error()
Derek Jonese701d762010-03-02 18:17:01 -0600263 {
264 show_error('The action you have requested is not allowed.');
265 }
266
267 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200268
Derek Jonese701d762010-03-02 18:17:01 -0600269 /**
David Behler07b53422011-08-15 00:25:06 +0200270 * Get CSRF Hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400271 *
Andrey Andreev64354102012-10-28 14:16:02 +0200272 * @see CI_Security::$_csrf_hash
273 * @return string CSRF hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400274 */
275 public function get_csrf_hash()
276 {
277 return $this->_csrf_hash;
278 }
279
280 // --------------------------------------------------------------------
281
282 /**
283 * Get CSRF Token Name
284 *
Andrey Andreev64354102012-10-28 14:16:02 +0200285 * @see CI_Security::$_csrf_token_name
286 * @return string CSRF token name
Pascal Krietec9c045a2011-04-05 14:50:41 -0400287 */
288 public function get_csrf_token_name()
289 {
290 return $this->_csrf_token_name;
291 }
292
293 // --------------------------------------------------------------------
294
295 /**
Derek Jonese701d762010-03-02 18:17:01 -0600296 * XSS Clean
297 *
298 * Sanitizes data so that Cross Site Scripting Hacks can be
Andrey Andreev64354102012-10-28 14:16:02 +0200299 * prevented. This method does a fair amount of work but
Derek Jonese701d762010-03-02 18:17:01 -0600300 * it is extremely thorough, designed to prevent even the
Derek Jones37f4b9c2011-07-01 17:56:50 -0500301 * most obscure XSS attempts. Nothing is ever 100% foolproof,
Derek Jonese701d762010-03-02 18:17:01 -0600302 * of course, but I haven't been able to get anything passed
303 * the filter.
304 *
Andrey Andreev64354102012-10-28 14:16:02 +0200305 * Note: Should only be used to deal with data upon submission.
306 * It's not something that should be used for general
307 * runtime processing.
Derek Jonese701d762010-03-02 18:17:01 -0600308 *
Andrey Andreev64354102012-10-28 14:16:02 +0200309 * @link http://channel.bitflux.ch/wiki/XSS_Prevention
310 * Based in part on some code and ideas from Bitflux.
Derek Jonese701d762010-03-02 18:17:01 -0600311 *
Andrey Andreev64354102012-10-28 14:16:02 +0200312 * @link http://ha.ckers.org/xss.html
313 * To help develop this script I used this great list of
314 * vulnerabilities along with a few other hacks I've
315 * harvested from examining vulnerabilities in other programs.
Derek Jonese701d762010-03-02 18:17:01 -0600316 *
Andrey Andreev64354102012-10-28 14:16:02 +0200317 * @param string|string[] $str Input data
318 * @param bool $is_image Whether the input is an image
Derek Jonese701d762010-03-02 18:17:01 -0600319 * @return string
320 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500321 public function xss_clean($str, $is_image = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600322 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200323 // Is the string an array?
Derek Jonese701d762010-03-02 18:17:01 -0600324 if (is_array($str))
325 {
326 while (list($key) = each($str))
327 {
328 $str[$key] = $this->xss_clean($str[$key]);
329 }
Barry Mienydd671972010-10-04 16:33:58 +0200330
Derek Jonese701d762010-03-02 18:17:01 -0600331 return $str;
332 }
333
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200334 // Remove Invisible Characters and validate entities in URLs
335 $str = $this->_validate_entities(remove_invisible_characters($str));
Derek Jonese701d762010-03-02 18:17:01 -0600336
337 /*
338 * URL Decode
339 *
340 * Just in case stuff like this is submitted:
341 *
342 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
343 *
344 * Note: Use rawurldecode() so it does not remove plus signs
Derek Jonese701d762010-03-02 18:17:01 -0600345 */
346 $str = rawurldecode($str);
Barry Mienydd671972010-10-04 16:33:58 +0200347
Derek Jonese701d762010-03-02 18:17:01 -0600348 /*
Barry Mienydd671972010-10-04 16:33:58 +0200349 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600350 *
351 * This permits our tests below to work reliably.
352 * We only convert entities that are within tags since
353 * these are the ones that will pose security problems.
Derek Jonese701d762010-03-02 18:17:01 -0600354 */
Derek Jonese701d762010-03-02 18:17:01 -0600355 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
brian97807ccbe52012-12-11 20:24:12 +0200356 $str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600357
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200358 // Remove Invisible Characters Again!
Greg Aker757dda62010-04-14 19:06:19 -0500359 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200360
Derek Jonese701d762010-03-02 18:17:01 -0600361 /*
362 * Convert all tabs to spaces
363 *
364 * This prevents strings like this: ja vascript
365 * NOTE: we deal with spaces between characters later.
David Behler07b53422011-08-15 00:25:06 +0200366 * NOTE: preg_replace was found to be amazingly slow here on
Pascal Krietec9c045a2011-04-05 14:50:41 -0400367 * large blocks of data, so we use str_replace.
Derek Jonese701d762010-03-02 18:17:01 -0600368 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200369 $str = str_replace("\t", ' ', $str);
Barry Mienydd671972010-10-04 16:33:58 +0200370
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200371 // Capture converted string for later comparison
Derek Jonese701d762010-03-02 18:17:01 -0600372 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200373
Pascal Krietec9c045a2011-04-05 14:50:41 -0400374 // Remove Strings that are never allowed
375 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600376
377 /*
378 * Makes PHP tags safe
379 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400380 * Note: XML tags are inadvertently replaced too:
Derek Jonese701d762010-03-02 18:17:01 -0600381 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400382 * <?xml
Derek Jonese701d762010-03-02 18:17:01 -0600383 *
384 * But it doesn't seem to pose a problem.
Derek Jonese701d762010-03-02 18:17:01 -0600385 */
386 if ($is_image === TRUE)
387 {
David Behler07b53422011-08-15 00:25:06 +0200388 // Images have a tendency to have the PHP short opening and
389 // closing tags every so often so we skip those and only
Pascal Krietec9c045a2011-04-05 14:50:41 -0400390 // do the long opening tags.
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200391 $str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600392 }
393 else
394 {
Andrey Andreev838a9d62012-12-03 14:37:47 +0200395 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600396 }
Barry Mienydd671972010-10-04 16:33:58 +0200397
Derek Jonese701d762010-03-02 18:17:01 -0600398 /*
399 * Compact any exploded words
400 *
Derek Jones37f4b9c2011-07-01 17:56:50 -0500401 * This corrects words like: j a v a s c r i p t
Derek Jonese701d762010-03-02 18:17:01 -0600402 * These words are compacted back to their correct state.
Derek Jonese701d762010-03-02 18:17:01 -0600403 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400404 $words = array(
Wes Bakerd3481352012-05-07 16:49:33 -0400405 'javascript', 'expression', 'vbscript', 'script', 'base64',
Timothy Warren40403d22012-04-19 16:38:50 -0400406 'applet', 'alert', 'document', 'write', 'cookie', 'window'
407 );
David Behler07b53422011-08-15 00:25:06 +0200408
Derek Jonese701d762010-03-02 18:17:01 -0600409 foreach ($words as $word)
410 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200411 $word = implode('\s*', str_split($word)).'\s*';
Derek Jonese701d762010-03-02 18:17:01 -0600412
413 // We only want to do this when it is followed by a non-word character
414 // That way valid stuff like "dealer to" does not become "dealerto"
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300415 $str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600416 }
Barry Mienydd671972010-10-04 16:33:58 +0200417
Derek Jonese701d762010-03-02 18:17:01 -0600418 /*
419 * Remove disallowed Javascript in links or img tags
David Behler07b53422011-08-15 00:25:06 +0200420 * We used to do some version comparisons and use of stripos for PHP5,
421 * but it is dog slow compared to these simplified non-capturing
Pascal Krietec9c045a2011-04-05 14:50:41 -0400422 * preg_match(), especially if the pattern exists in the string
Derek Jonese701d762010-03-02 18:17:01 -0600423 */
424 do
425 {
426 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200427
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200428 if (preg_match('/<a/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600429 {
vlakoffa81f60c2012-07-02 15:20:11 +0200430 $str = preg_replace_callback('#<a\s+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600431 }
Barry Mienydd671972010-10-04 16:33:58 +0200432
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200433 if (preg_match('/<img/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600434 {
vlakoffa81f60c2012-07-02 15:20:11 +0200435 $str = preg_replace_callback('#<img\s+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600436 }
Barry Mienydd671972010-10-04 16:33:58 +0200437
vlakoffa81f60c2012-07-02 15:20:11 +0200438 if (preg_match('/script|xss/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600439 {
vlakoffa81f60c2012-07-02 15:20:11 +0200440 $str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600441 }
442 }
vlakoffa81f60c2012-07-02 15:20:11 +0200443 while ($original !== $str);
Derek Jonese701d762010-03-02 18:17:01 -0600444
445 unset($original);
446
Pascal Krietec9c045a2011-04-05 14:50:41 -0400447 // Remove evil attributes such as style, onclick and xmlns
448 $str = $this->_remove_evil_attributes($str, $is_image);
Barry Mienydd671972010-10-04 16:33:58 +0200449
Derek Jonese701d762010-03-02 18:17:01 -0600450 /*
451 * Sanitize naughty HTML elements
452 *
453 * If a tag containing any of the words in the list
454 * below is found, the tag gets converted to entities.
455 *
456 * So this: <blink>
457 * Becomes: &lt;blink&gt;
Derek Jonese701d762010-03-02 18:17:01 -0600458 */
459 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
460 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
461
462 /*
463 * Sanitize naughty scripting elements
464 *
465 * Similar to above, only instead of looking for
466 * tags it looks for PHP and JavaScript commands
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200467 * that are disallowed. Rather than removing the
Derek Jonese701d762010-03-02 18:17:01 -0600468 * code, it simply converts the parenthesis to entities
469 * rendering the code un-executable.
470 *
471 * For example: eval('some code')
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200472 * Becomes: eval&#40;'some code'&#41;
Derek Jonese701d762010-03-02 18:17:01 -0600473 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200474 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
475 '\\1\\2&#40;\\3&#41;',
476 $str);
Barry Mienydd671972010-10-04 16:33:58 +0200477
Pascal Krietec9c045a2011-04-05 14:50:41 -0400478 // Final clean up
479 // This adds a bit of extra precaution in case
480 // something got through the above filters
481 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600482
483 /*
Pascal Krietec9c045a2011-04-05 14:50:41 -0400484 * Images are Handled in a Special Way
David Behler07b53422011-08-15 00:25:06 +0200485 * - Essentially, we want to know that after all of the character
486 * conversion is done whether any unwanted, likely XSS, code was found.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400487 * If not, we return TRUE, as the image is clean.
David Behler07b53422011-08-15 00:25:06 +0200488 * However, if the string post-conversion does not matched the
489 * string post-removal of XSS, then it fails, as there was unwanted XSS
Pascal Krietec9c045a2011-04-05 14:50:41 -0400490 * code found and removed/changed during processing.
Derek Jonese701d762010-03-02 18:17:01 -0600491 */
Derek Jonese701d762010-03-02 18:17:01 -0600492 if ($is_image === TRUE)
493 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200494 return ($str === $converted_string);
Derek Jonese701d762010-03-02 18:17:01 -0600495 }
Barry Mienydd671972010-10-04 16:33:58 +0200496
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200497 log_message('debug', 'XSS Filtering completed');
Derek Jonese701d762010-03-02 18:17:01 -0600498 return $str;
499 }
500
501 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200502
Derek Jonese701d762010-03-02 18:17:01 -0600503 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200504 * XSS Hash
Derek Jonese701d762010-03-02 18:17:01 -0600505 *
Andrey Andreev64354102012-10-28 14:16:02 +0200506 * Generates the XSS hash if needed and returns it.
507 *
508 * @see CI_Security::$_xss_hash
509 * @return string XSS hash
Derek Jonese701d762010-03-02 18:17:01 -0600510 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500511 public function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200512 {
Alex Bilbieed944a32012-06-02 11:07:47 +0100513 if ($this->_xss_hash === '')
Derek Jonese701d762010-03-02 18:17:01 -0600514 {
vlakoff06127562013-03-30 00:06:39 +0100515 $this->_xss_hash = md5(uniqid(mt_rand()));
Derek Jonese701d762010-03-02 18:17:01 -0600516 }
517
Pascal Krietec9c045a2011-04-05 14:50:41 -0400518 return $this->_xss_hash;
Derek Jonese701d762010-03-02 18:17:01 -0600519 }
520
521 // --------------------------------------------------------------------
522
523 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500524 * HTML Entities Decode
525 *
Andrey Andreev64354102012-10-28 14:16:02 +0200526 * A replacement for html_entity_decode()
Derek Jonesa0911472010-03-30 10:33:09 -0500527 *
Pascal Krietec38e3b62011-11-14 13:55:00 -0500528 * The reason we are not using html_entity_decode() by itself is because
529 * while it is not technically correct to leave out the semicolon
530 * at the end of an entity most browsers will still interpret the entity
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200531 * correctly. html_entity_decode() does not convert entities without
Pascal Krietec38e3b62011-11-14 13:55:00 -0500532 * semicolons, so we are left with our own little solution here. Bummer.
Derek Jonesa0911472010-03-30 10:33:09 -0500533 *
Andrey Andreev64354102012-10-28 14:16:02 +0200534 * @link http://php.net/html-entity-decode
535 *
536 * @param string $str Input
537 * @param string $charset Character set
Derek Jonesa0911472010-03-30 10:33:09 -0500538 * @return string
539 */
freewil8cc0cfe2011-08-27 21:53:00 -0400540 public function entity_decode($str, $charset = NULL)
Derek Jonesa0911472010-03-30 10:33:09 -0500541 {
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300542 if (strpos($str, '&') === FALSE)
freewil5c9b0d12011-08-28 12:15:23 -0400543 {
544 return $str;
545 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300546
freewil5c9b0d12011-08-28 12:15:23 -0400547 if (empty($charset))
548 {
549 $charset = config_item('charset');
550 }
Barry Mienydd671972010-10-04 16:33:58 +0200551
brian978638a9d22012-12-18 13:25:54 +0200552 do
553 {
554 $matches = $matches1 = 0;
brian97807ccbe52012-12-11 20:24:12 +0200555
brian978638a9d22012-12-18 13:25:54 +0200556 $str = html_entity_decode($str, ENT_COMPAT, $charset);
557 $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str, -1, $matches);
558 $str = preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str, -1, $matches1);
559 }
Andrey Andreev72ed4c32012-12-19 17:07:54 +0200560 while ($matches OR $matches1);
brian978f50fc732012-12-08 23:22:26 +0200561
brian978638a9d22012-12-18 13:25:54 +0200562 return $str;
Derek Jonesa0911472010-03-30 10:33:09 -0500563 }
Barry Mienydd671972010-10-04 16:33:58 +0200564
Derek Jonesa0911472010-03-30 10:33:09 -0500565 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200566
Derek Jonesa0911472010-03-30 10:33:09 -0500567 /**
Hunter Wua8d6d3b2013-08-03 23:17:45 +0800568 * Set Sanitize Filename Strings
569 *
570 * @param array $strings
571 * @return void
572 */
573 public function set_sanitize_filename_str($strings)
574 {
575 $this->sanitize_filename_str = $strings;
576 }
577
578 // --------------------------------------------------------------------
579
580 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200581 * Sanitize Filename
Derek Jonese701d762010-03-02 18:17:01 -0600582 *
Andrey Andreev64354102012-10-28 14:16:02 +0200583 * @param string $str Input file name
584 * @param bool $relative_path Whether to preserve paths
Derek Jonese701d762010-03-02 18:17:01 -0600585 * @return string
586 */
Hunter Wu8df33522013-08-03 22:36:05 +0800587 public function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600588 {
Hunter Wua8d6d3b2013-08-03 23:17:45 +0800589 $bad = $this->sanitize_filename_str;
David Behler07b53422011-08-15 00:25:06 +0200590
Derek Jones2ef37592010-10-06 17:51:59 -0500591 if ( ! $relative_path)
592 {
593 $bad[] = './';
594 $bad[] = '/';
595 }
Derek Jonese701d762010-03-02 18:17:01 -0600596
Pascal Krietec9c045a2011-04-05 14:50:41 -0400597 $str = remove_invisible_characters($str, FALSE);
Andrey Andreev7e559772013-01-29 15:38:33 +0200598
599 do
600 {
601 $old = $str;
602 $str = str_replace($bad, '', $str);
603 }
604 while ($old !== $str);
605
606 return stripslashes($str);
Derek Jonese701d762010-03-02 18:17:01 -0600607 }
608
Pascal Krietec9c045a2011-04-05 14:50:41 -0400609 // ----------------------------------------------------------------
610
611 /**
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300612 * Strip Image Tags
613 *
Andrey Andreev64354102012-10-28 14:16:02 +0200614 * @param string $str
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300615 * @return string
616 */
617 public function strip_image_tags($str)
618 {
619 return preg_replace(array('#<img\s+.*?src\s*=\s*["\'](.+?)["\'].*?\>#', '#<img\s+.*?src\s*=\s*(.+?).*?\>#'), '\\1', $str);
620 }
621
622 // ----------------------------------------------------------------
623
624 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400625 * Compact Exploded Words
626 *
Andrey Andreev64354102012-10-28 14:16:02 +0200627 * Callback method for xss_clean() to remove whitespace from
628 * things like 'j a v a s c r i p t'.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400629 *
Andrey Andreev64354102012-10-28 14:16:02 +0200630 * @used-by CI_Security::xss_clean()
631 * @param array $matches
Timothy Warrenad475052012-04-19 13:21:06 -0400632 * @return string
Pascal Krietec9c045a2011-04-05 14:50:41 -0400633 */
634 protected function _compact_exploded_words($matches)
635 {
636 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
637 }
638
639 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200640
Timothy Warrenad475052012-04-19 13:21:06 -0400641 /**
642 * Remove Evil HTML Attributes (like event handlers and style)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400643 *
644 * It removes the evil attribute and either:
Pascal Krietec9c045a2011-04-05 14:50:41 -0400645 *
Andrey Andreev64354102012-10-28 14:16:02 +0200646 * - Everything up until a space. For example, everything between the pipes:
647 *
648 * <code>
649 * <a |style=document.write('hello');alert('world');| class=link>
650 * </code>
651 *
652 * - Everything inside the quotes. For example, everything between the pipes:
653 *
654 * <code>
655 * <a |style="document.write('hello'); alert('world');"| class="link">
656 * </code>
657 *
658 * @param string $str The string to check
659 * @param bool $is_image Whether the input is an image
660 * @return string The string with the evil attributes removed
Pascal Krietec9c045a2011-04-05 14:50:41 -0400661 */
662 protected function _remove_evil_attributes($str, $is_image)
663 {
664 // All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
Pascal Krietec38e3b62011-11-14 13:55:00 -0500665 $evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400666
667 if ($is_image === TRUE)
668 {
669 /*
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200670 * Adobe Photoshop puts XML metadata into JFIF images,
Pascal Krietec9c045a2011-04-05 14:50:41 -0400671 * including namespacing, so we have to allow this for images.
672 */
673 unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
674 }
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200675
Pascal Krietec9c045a2011-04-05 14:50:41 -0400676 do {
Pascal Krietec38e3b62011-11-14 13:55:00 -0500677 $count = 0;
678 $attribs = array();
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200679
brian978160c7d12012-12-03 21:18:20 +0200680 // find occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes)
681 preg_match_all('/('.implode('|', $evil_attributes).')\s*=\s*(\042|\047)([^\\2]*?)(\\2)/is', $str, $matches, PREG_SET_ORDER);
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200682
Pascal Krietec38e3b62011-11-14 13:55:00 -0500683 foreach ($matches as $attr)
684 {
685 $attribs[] = preg_quote($attr[0], '/');
686 }
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200687
brian978160c7d12012-12-03 21:18:20 +0200688 // find occurrences of illegal attribute strings without quotes
689 preg_match_all('/('.implode('|', $evil_attributes).')\s*=\s*([^\s>]*)/is', $str, $matches, PREG_SET_ORDER);
David Behler07b53422011-08-15 00:25:06 +0200690
Pascal Krietec38e3b62011-11-14 13:55:00 -0500691 foreach ($matches as $attr)
692 {
693 $attribs[] = preg_quote($attr[0], '/');
694 }
695
696 // replace illegal attribute strings that are inside an html tag
697 if (count($attribs) > 0)
698 {
brian978160c7d12012-12-03 21:18:20 +0200699 $str = preg_replace('/(<?)(\/?[^><]+?)([^A-Za-z<>\-])(.*?)('.implode('|', $attribs).')(.*?)([\s><]?)([><]*)/i', '$1$2 $4$6$7$8', $str, -1, $count);
Pascal Krietec38e3b62011-11-14 13:55:00 -0500700 }
Andrey Andreev72ed4c32012-12-19 17:07:54 +0200701 }
702 while ($count);
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200703
Pascal Krietec9c045a2011-04-05 14:50:41 -0400704 return $str;
705 }
David Behler07b53422011-08-15 00:25:06 +0200706
Pascal Krietec9c045a2011-04-05 14:50:41 -0400707 // --------------------------------------------------------------------
708
709 /**
710 * Sanitize Naughty HTML
711 *
Andrey Andreev64354102012-10-28 14:16:02 +0200712 * Callback method for xss_clean() to remove naughty HTML elements.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400713 *
Andrey Andreev64354102012-10-28 14:16:02 +0200714 * @used-by CI_Security::xss_clean()
715 * @param array $matches
Pascal Krietec9c045a2011-04-05 14:50:41 -0400716 * @return string
717 */
718 protected function _sanitize_naughty_html($matches)
719 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200720 return '&lt;'.$matches[1].$matches[2].$matches[3] // encode opening brace
721 // encode captured opening or closing brace to prevent recursive vectors:
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200722 .str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400723 }
724
725 // --------------------------------------------------------------------
726
727 /**
728 * JS Link Removal
729 *
Andrey Andreev64354102012-10-28 14:16:02 +0200730 * Callback method for xss_clean() to sanitize links.
731 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400732 * This limits the PCRE backtracks, making it more performance friendly
733 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200734 * PHP 5.2+ on link-heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400735 *
Andrey Andreev64354102012-10-28 14:16:02 +0200736 * @used-by CI_Security::xss_clean()
737 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400738 * @return string
739 */
740 protected function _js_link_removal($match)
741 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200742 return str_replace($match[1],
vlakoffa81f60c2012-07-02 15:20:11 +0200743 preg_replace('#href=.*?(?:alert\(|alert&\#40;|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200744 '',
745 $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
746 ),
747 $match[0]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400748 }
749
750 // --------------------------------------------------------------------
751
752 /**
753 * JS Image Removal
754 *
Andrey Andreev64354102012-10-28 14:16:02 +0200755 * Callback method for xss_clean() to sanitize image tags.
756 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400757 * This limits the PCRE backtracks, making it more performance friendly
758 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200759 * PHP 5.2+ on image tag heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400760 *
Andrey Andreev64354102012-10-28 14:16:02 +0200761 * @used-by CI_Security::xss_clean()
762 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400763 * @return string
764 */
765 protected function _js_img_removal($match)
766 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200767 return str_replace($match[1],
vlakoffa81f60c2012-07-02 15:20:11 +0200768 preg_replace('#src=.*?(?:alert\(|alert&\#40;|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200769 '',
770 $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
771 ),
772 $match[0]);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400773 }
774
775 // --------------------------------------------------------------------
776
777 /**
778 * Attribute Conversion
779 *
Andrey Andreev64354102012-10-28 14:16:02 +0200780 * @used-by CI_Security::xss_clean()
781 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400782 * @return string
783 */
784 protected function _convert_attribute($match)
785 {
786 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
787 }
788
789 // --------------------------------------------------------------------
790
791 /**
792 * Filter Attributes
793 *
Andrey Andreev64354102012-10-28 14:16:02 +0200794 * Filters tag attributes for consistency and safety.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400795 *
Andrey Andreev64354102012-10-28 14:16:02 +0200796 * @used-by CI_Security::_js_img_removal()
797 * @used-by CI_Security::_js_link_removal()
798 * @param string $str
Pascal Krietec9c045a2011-04-05 14:50:41 -0400799 * @return string
800 */
801 protected function _filter_attributes($str)
802 {
803 $out = '';
Pascal Krietec9c045a2011-04-05 14:50:41 -0400804 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
805 {
806 foreach ($matches[0] as $match)
807 {
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200808 $out .= preg_replace('#/\*.*?\*/#s', '', $match);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400809 }
810 }
811
812 return $out;
813 }
814
815 // --------------------------------------------------------------------
816
817 /**
818 * HTML Entity Decode Callback
819 *
Andrey Andreev64354102012-10-28 14:16:02 +0200820 * @used-by CI_Security::xss_clean()
821 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400822 * @return string
823 */
824 protected function _decode_entity($match)
825 {
826 return $this->entity_decode($match[0], strtoupper(config_item('charset')));
827 }
828
829 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200830
Pascal Krietec9c045a2011-04-05 14:50:41 -0400831 /**
832 * Validate URL entities
833 *
Andrey Andreev64354102012-10-28 14:16:02 +0200834 * @used-by CI_Security::xss_clean()
835 * @param string $str
Pascal Krietec9c045a2011-04-05 14:50:41 -0400836 * @return string
837 */
838 protected function _validate_entities($str)
839 {
840 /*
841 * Protect GET variables in URLs
842 */
David Behler07b53422011-08-15 00:25:06 +0200843
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200844 // 901119URL5918AMP18930PROTECT8198
845 $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash().'\\1=\\2', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400846
847 /*
848 * Validate standard character entities
849 *
Derek Jones37f4b9c2011-07-01 17:56:50 -0500850 * Add a semicolon if missing. We do this to enable
Pascal Krietec9c045a2011-04-05 14:50:41 -0400851 * the conversion of entities to ASCII later.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400852 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200853 $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', '\\1;\\2', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400854
855 /*
856 * Validate UTF16 two byte encoding (x00)
857 *
858 * Just as above, adds a semicolon if missing.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400859 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200860 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i', '\\1\\2;', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400861
862 /*
863 * Un-Protect GET variables in URLs
864 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200865 return str_replace($this->xss_hash(), '&', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400866 }
867
868 // ----------------------------------------------------------------------
869
870 /**
871 * Do Never Allowed
872 *
Andrey Andreev64354102012-10-28 14:16:02 +0200873 * @used-by CI_Security::xss_clean()
Pascal Krietec9c045a2011-04-05 14:50:41 -0400874 * @param string
875 * @return string
876 */
877 protected function _do_never_allowed($str)
878 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200879 $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400880
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200881 foreach ($this->_never_allowed_regex as $regex)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400882 {
Wes Baker5335bc32012-04-24 15:17:14 -0400883 $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400884 }
David Behler07b53422011-08-15 00:25:06 +0200885
Pascal Krietec9c045a2011-04-05 14:50:41 -0400886 return $str;
887 }
888
889 // --------------------------------------------------------------------
890
891 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200892 * Set CSRF Hash and Cookie
Pascal Krietec9c045a2011-04-05 14:50:41 -0400893 *
894 * @return string
895 */
896 protected function _csrf_set_hash()
897 {
Alex Bilbieed944a32012-06-02 11:07:47 +0100898 if ($this->_csrf_hash === '')
Pascal Krietec9c045a2011-04-05 14:50:41 -0400899 {
David Behler07b53422011-08-15 00:25:06 +0200900 // If the cookie exists we will use it's value.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400901 // We don't necessarily want to regenerate it with
David Behler07b53422011-08-15 00:25:06 +0200902 // each page load since a page could contain embedded
Pascal Krietec9c045a2011-04-05 14:50:41 -0400903 // sub-pages causing this feature to fail
David Behler07b53422011-08-15 00:25:06 +0200904 if (isset($_COOKIE[$this->_csrf_cookie_name]) &&
Alexander Hofstedee2c374f2012-05-17 00:28:08 +0200905 preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
Pascal Krietec9c045a2011-04-05 14:50:41 -0400906 {
907 return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
908 }
David Behler07b53422011-08-15 00:25:06 +0200909
Chris Berthed93e6f32011-09-25 10:33:25 -0400910 $this->_csrf_hash = md5(uniqid(rand(), TRUE));
911 $this->csrf_set_cookie();
Pascal Krietec9c045a2011-04-05 14:50:41 -0400912 }
913
914 return $this->_csrf_hash;
915 }
916
Derek Jonese701d762010-03-02 18:17:01 -0600917}
Derek Jonese701d762010-03-02 18:17:01 -0600918
919/* End of file Security.php */
Andrey Andreev9ba661b2012-06-04 14:44:34 +0300920/* Location: ./system/core/Security.php */