blob: 31926b466fc356026ac4db3340a996a8ba6240fa [file] [log] [blame]
Andrey Andreevc5536aa2012-11-01 17:33:58 +02001<?php
Derek Jonese701d762010-03-02 18:17:01 -06002/**
3 * CodeIgniter
4 *
Andrey Andreevfe9309d2015-01-09 17:48:58 +02005 * An open source application development framework for PHP
Derek Jonese701d762010-03-02 18:17:01 -06006 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +02007 * This content is released under the MIT License (MIT)
Andrey Andreevbb488dc2012-01-07 23:35:16 +02008 *
Andrey Andreevcce6bd12018-01-09 11:32:02 +02009 * Copyright (c) 2014 - 2018, British Columbia Institute of Technology
Andrey Andreevbb488dc2012-01-07 23:35:16 +020010 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020011 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
Derek Jonesf4a4bd82011-10-20 12:18:42 -050017 *
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020018 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 *
29 * @package CodeIgniter
30 * @author EllisLab Dev Team
Andrey Andreev1924e872016-01-11 12:55:34 +020031 * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
Andrey Andreevcce6bd12018-01-09 11:32:02 +020032 * @copyright Copyright (c) 2014 - 2018, British Columbia Institute of Technology (http://bcit.ca/)
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020033 * @license http://opensource.org/licenses/MIT MIT License
Andrey Andreevbd202c92016-01-11 12:50:18 +020034 * @link https://codeigniter.com
Andrey Andreevbdb96ca2014-10-28 00:13:31 +020035 * @since Version 1.0.0
Derek Jonese701d762010-03-02 18:17:01 -060036 * @filesource
37 */
Andrey Andreevc5536aa2012-11-01 17:33:58 +020038defined('BASEPATH') OR exit('No direct script access allowed');
Derek Jonese701d762010-03-02 18:17:01 -060039
Derek Jonese701d762010-03-02 18:17:01 -060040/**
41 * Security Class
42 *
43 * @package CodeIgniter
44 * @subpackage Libraries
45 * @category Security
Derek Jonesf4a4bd82011-10-20 12:18:42 -050046 * @author EllisLab Dev Team
Andrey Andreevbd202c92016-01-11 12:50:18 +020047 * @link https://codeigniter.com/user_guide/libraries/security.html
Derek Jonese701d762010-03-02 18:17:01 -060048 */
49class CI_Security {
Barry Mienydd671972010-10-04 16:33:58 +020050
David Behler07b53422011-08-15 00:25:06 +020051 /**
Hunter Wua8d6d3b2013-08-03 23:17:45 +080052 * List of sanitize filename strings
53 *
54 * @var array
55 */
Hunter Wu4495cc72013-08-04 12:31:52 +080056 public $filename_bad_chars = array(
Hunter Wua8d6d3b2013-08-03 23:17:45 +080057 '../', '<!--', '-->', '<', '>',
58 "'", '"', '&', '$', '#',
59 '{', '}', '[', ']', '=',
60 ';', '?', '%20', '%22',
61 '%3c', // <
62 '%253c', // <
63 '%3e', // >
64 '%0e', // >
65 '%28', // (
66 '%29', // )
67 '%2528', // (
68 '%26', // &
69 '%24', // $
70 '%3f', // ?
71 '%3b', // ;
72 '%3d' // =
73 );
74
75 /**
Andrey Andreev487d1ae2014-05-23 14:41:32 +030076 * Character set
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020077 *
Claudio Galdiolo325d22d2015-01-29 11:41:17 -050078 * Will be overridden by the constructor.
Andrey Andreev487d1ae2014-05-23 14:41:32 +030079 *
80 * @var string
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020081 */
Andrey Andreev487d1ae2014-05-23 14:41:32 +030082 public $charset = 'UTF-8';
Andrey Andreevc67c3fb2014-01-22 13:26:00 +020083
84 /**
Andrey Andreev64354102012-10-28 14:16:02 +020085 * XSS Hash
David Behler07b53422011-08-15 00:25:06 +020086 *
Andrey Andreev64354102012-10-28 14:16:02 +020087 * Random Hash for protecting URLs.
88 *
89 * @var string
David Behler07b53422011-08-15 00:25:06 +020090 */
Andrey Andreev487ccc92014-08-27 16:26:23 +030091 protected $_xss_hash;
Andrey Andreev3d113bd2011-10-05 00:03:20 +030092
David Behler07b53422011-08-15 00:25:06 +020093 /**
Andrey Andreev64354102012-10-28 14:16:02 +020094 * CSRF Hash
David Behler07b53422011-08-15 00:25:06 +020095 *
Andrey Andreev64354102012-10-28 14:16:02 +020096 * Random hash for Cross Site Request Forgery protection cookie
97 *
98 * @var string
David Behler07b53422011-08-15 00:25:06 +020099 */
Andrey Andreev487ccc92014-08-27 16:26:23 +0300100 protected $_csrf_hash;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300101
David Behler07b53422011-08-15 00:25:06 +0200102 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200103 * CSRF Expire time
David Behler07b53422011-08-15 00:25:06 +0200104 *
Andrey Andreev64354102012-10-28 14:16:02 +0200105 * Expiration time for Cross Site Request Forgery protection cookie.
106 * Defaults to two hours (in seconds).
107 *
108 * @var int
David Behler07b53422011-08-15 00:25:06 +0200109 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400110 protected $_csrf_expire = 7200;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300111
David Behler07b53422011-08-15 00:25:06 +0200112 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200113 * CSRF Token name
David Behler07b53422011-08-15 00:25:06 +0200114 *
Andrey Andreev64354102012-10-28 14:16:02 +0200115 * Token name for Cross Site Request Forgery protection cookie.
116 *
117 * @var string
David Behler07b53422011-08-15 00:25:06 +0200118 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400119 protected $_csrf_token_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300120
David Behler07b53422011-08-15 00:25:06 +0200121 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200122 * CSRF Cookie name
David Behler07b53422011-08-15 00:25:06 +0200123 *
Andrey Andreev64354102012-10-28 14:16:02 +0200124 * Cookie name for Cross Site Request Forgery protection cookie.
125 *
126 * @var string
David Behler07b53422011-08-15 00:25:06 +0200127 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400128 protected $_csrf_cookie_name = 'ci_csrf_token';
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300129
David Behler07b53422011-08-15 00:25:06 +0200130 /**
131 * List of never allowed strings
132 *
Andrey Andreev64354102012-10-28 14:16:02 +0200133 * @var array
David Behler07b53422011-08-15 00:25:06 +0200134 */
Timothy Warren48a7fbb2012-04-23 11:58:16 -0400135 protected $_never_allowed_str = array(
Andrey Andreev57fa1432016-10-28 17:46:31 +0300136 'document.cookie' => '[removed]',
Andrey Andreeve475b1c2018-03-15 16:43:35 +0200137 '(document).cookie' => '[removed]',
Andrey Andreev57fa1432016-10-28 17:46:31 +0300138 'document.write' => '[removed]',
Andrey Andreeve475b1c2018-03-15 16:43:35 +0200139 '(document).write' => '[removed]',
Andrey Andreev57fa1432016-10-28 17:46:31 +0300140 '.parentNode' => '[removed]',
141 '.innerHTML' => '[removed]',
142 '-moz-binding' => '[removed]',
143 '<!--' => '&lt;!--',
144 '-->' => '--&gt;',
145 '<![CDATA[' => '&lt;![CDATA[',
146 '<comment>' => '&lt;comment&gt;',
147 '<%' => '&lt;&#37;'
Timothy Warren40403d22012-04-19 16:38:50 -0400148 );
Derek Jonese701d762010-03-02 18:17:01 -0600149
David Behler07b53422011-08-15 00:25:06 +0200150 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200151 * List of never allowed regex replacements
David Behler07b53422011-08-15 00:25:06 +0200152 *
Andrey Andreev64354102012-10-28 14:16:02 +0200153 * @var array
David Behler07b53422011-08-15 00:25:06 +0200154 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400155 protected $_never_allowed_regex = array(
Timothy Warren40403d22012-04-19 16:38:50 -0400156 'javascript\s*:',
Andrey Andreeve475b1c2018-03-15 16:43:35 +0200157 '(\(?document\)?|\(?window\)?(\.document)?)\.(location|on\w*)',
Timothy Warren40403d22012-04-19 16:38:50 -0400158 'expression\s*(\(|&\#40;)', // CSS and IE
159 'vbscript\s*:', // IE, surprise!
Andrey Andreeva30a7172014-02-10 09:17:25 +0200160 'wscript\s*:', // IE
Andrey Andreevf7f9dca2014-02-10 12:41:00 +0200161 'jscript\s*:', // IE
Andrey Andreeva30a7172014-02-10 09:17:25 +0200162 'vbs\s*:', // IE
Andrey Andreev43568062014-01-21 23:52:31 +0200163 'Redirect\s+30\d',
Wes Bakerd3481352012-05-07 16:49:33 -0400164 "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
Timothy Warren40403d22012-04-19 16:38:50 -0400165 );
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300166
Timothy Warrenad475052012-04-19 13:21:06 -0400167 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200168 * Class constructor
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300169 *
170 * @return void
Timothy Warrenad475052012-04-19 13:21:06 -0400171 */
Greg Akera9263282010-11-10 15:26:43 -0600172 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -0600173 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200174 // Is CSRF protection enabled?
Andrey Andreevd444d442014-10-06 00:00:08 +0300175 if (config_item('csrf_protection'))
patworkef1a55a2011-04-09 13:04:06 +0200176 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200177 // CSRF config
178 foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
patworkef1a55a2011-04-09 13:04:06 +0200179 {
Andrey Andreev7cc3e992014-10-06 08:04:44 +0300180 if (NULL !== ($val = config_item($key)))
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200181 {
182 $this->{'_'.$key} = $val;
183 }
patworkef1a55a2011-04-09 13:04:06 +0200184 }
patworkef1a55a2011-04-09 13:04:06 +0200185
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200186 // Append application specific cookie prefix
Andrey Andreevd444d442014-10-06 00:00:08 +0300187 if ($cookie_prefix = config_item('cookie_prefix'))
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200188 {
Andrey Andreevd444d442014-10-06 00:00:08 +0300189 $this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200190 }
Derek Jonesb3f10a22010-07-25 19:11:26 -0500191
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200192 // Set the CSRF hash
193 $this->_csrf_set_hash();
194 }
Derek Allard958543a2010-07-22 14:10:26 -0400195
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300196 $this->charset = strtoupper(config_item('charset'));
197
Andrey Andreev90726b82015-01-20 12:39:22 +0200198 log_message('info', 'Security Class Initialized');
Derek Jonese701d762010-03-02 18:17:01 -0600199 }
200
201 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200202
Derek Jonese701d762010-03-02 18:17:01 -0600203 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200204 * CSRF Verify
Derek Jonese701d762010-03-02 18:17:01 -0600205 *
Andrew Podner4296a652012-12-17 07:51:15 -0500206 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600207 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500208 public function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -0400209 {
Andrey Andreev5d27c432012-03-08 12:01:52 +0200210 // If it's not a POST request we will set the CSRF cookie
211 if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
Derek Jonese701d762010-03-02 18:17:01 -0600212 {
213 return $this->csrf_set_cookie();
214 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300215
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100216 // Check if URI has been whitelisted from CSRF checks
217 if ($exclude_uris = config_item('csrf_exclude_uris'))
218 {
219 $uri = load_class('URI', 'core');
caseyh5ac7c772014-08-18 05:10:24 -0400220 foreach ($exclude_uris as $excluded)
221 {
Andrey Andreev6c520962014-08-18 12:24:42 +0300222 if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
223 {
224 return $this;
225 }
226 }
Alex Bilbieaeb2c3e2011-08-21 16:14:54 +0100227 }
Derek Jonese701d762010-03-02 18:17:01 -0600228
Andrey Andreev5a2390d2017-01-04 16:01:27 +0200229 // Check CSRF token validity, but don't error on mismatch just yet - we'll want to regenerate
230 $valid = isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
231 && hash_equals($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name]);
Derek Jonese701d762010-03-02 18:17:01 -0600232
Andrey Andreev1d0bd832016-08-29 14:14:54 +0300233 // We kill this since we're done and we don't want to pollute the _POST array
Pascal Krietec9c045a2011-04-05 14:50:41 -0400234 unset($_POST[$this->_csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +0200235
RS712be25a62011-12-31 16:02:04 -0200236 // Regenerate on every submission?
237 if (config_item('csrf_regenerate'))
238 {
239 // Nothing should last forever
240 unset($_COOKIE[$this->_csrf_cookie_name]);
Andrey Andreev487ccc92014-08-27 16:26:23 +0300241 $this->_csrf_hash = NULL;
RS712be25a62011-12-31 16:02:04 -0200242 }
Andrey Andreev8a7d0782012-01-08 05:43:42 +0200243
Derek Jonesb3f10a22010-07-25 19:11:26 -0500244 $this->_csrf_set_hash();
245 $this->csrf_set_cookie();
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300246
Andrey Andreev5a2390d2017-01-04 16:01:27 +0200247 if ($valid !== TRUE)
248 {
249 $this->csrf_show_error();
250 }
251
Andrey Andreev90726b82015-01-20 12:39:22 +0200252 log_message('info', 'CSRF token verified');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400253 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600254 }
Barry Mienydd671972010-10-04 16:33:58 +0200255
Derek Jonese701d762010-03-02 18:17:01 -0600256 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200257
Derek Jonese701d762010-03-02 18:17:01 -0600258 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200259 * CSRF Set Cookie
Derek Jonese701d762010-03-02 18:17:01 -0600260 *
Taufan Aditya6c7526c2012-05-27 13:51:27 +0700261 * @codeCoverageIgnore
Andrew Podner4296a652012-12-17 07:51:15 -0500262 * @return CI_Security
Derek Jonese701d762010-03-02 18:17:01 -0600263 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500264 public function csrf_set_cookie()
Derek Jonese701d762010-03-02 18:17:01 -0600265 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400266 $expire = time() + $this->_csrf_expire;
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300267 $secure_cookie = (bool) config_item('cookie_secure');
Derek Jonese701d762010-03-02 18:17:01 -0600268
Andrey Andreev3fb02672012-10-22 16:48:01 +0300269 if ($secure_cookie && ! is_https())
Derek Jonese701d762010-03-02 18:17:01 -0600270 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200271 return FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600272 }
Derek Allard958543a2010-07-22 14:10:26 -0400273
freewil4ad0fd82012-03-13 22:37:42 -0400274 setcookie(
Andrey Andreev92ebfb62012-05-17 12:49:24 +0300275 $this->_csrf_cookie_name,
276 $this->_csrf_hash,
277 $expire,
278 config_item('cookie_path'),
279 config_item('cookie_domain'),
freewil4ad0fd82012-03-13 22:37:42 -0400280 $secure_cookie,
281 config_item('cookie_httponly')
282 );
Mohammad Sadegh Dehghan Niri7881fd12015-07-15 17:48:57 +0430283 log_message('info', 'CSRF cookie sent');
David Behler07b53422011-08-15 00:25:06 +0200284
Pascal Krietec9c045a2011-04-05 14:50:41 -0400285 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600286 }
287
288 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200289
Derek Jonese701d762010-03-02 18:17:01 -0600290 /**
291 * Show CSRF Error
292 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400293 * @return void
Derek Jonese701d762010-03-02 18:17:01 -0600294 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500295 public function csrf_show_error()
Derek Jonese701d762010-03-02 18:17:01 -0600296 {
Kyle Valade05fcc092014-07-06 13:43:20 -0700297 show_error('The action you have requested is not allowed.', 403);
Derek Jonese701d762010-03-02 18:17:01 -0600298 }
299
300 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200301
Derek Jonese701d762010-03-02 18:17:01 -0600302 /**
David Behler07b53422011-08-15 00:25:06 +0200303 * Get CSRF Hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400304 *
Andrey Andreev64354102012-10-28 14:16:02 +0200305 * @see CI_Security::$_csrf_hash
306 * @return string CSRF hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400307 */
308 public function get_csrf_hash()
309 {
310 return $this->_csrf_hash;
311 }
312
313 // --------------------------------------------------------------------
314
315 /**
316 * Get CSRF Token Name
317 *
Andrey Andreev64354102012-10-28 14:16:02 +0200318 * @see CI_Security::$_csrf_token_name
319 * @return string CSRF token name
Pascal Krietec9c045a2011-04-05 14:50:41 -0400320 */
321 public function get_csrf_token_name()
322 {
323 return $this->_csrf_token_name;
324 }
325
326 // --------------------------------------------------------------------
327
328 /**
Derek Jonese701d762010-03-02 18:17:01 -0600329 * XSS Clean
330 *
331 * Sanitizes data so that Cross Site Scripting Hacks can be
Andrey Andreev64354102012-10-28 14:16:02 +0200332 * prevented. This method does a fair amount of work but
Derek Jonese701d762010-03-02 18:17:01 -0600333 * it is extremely thorough, designed to prevent even the
Derek Jones37f4b9c2011-07-01 17:56:50 -0500334 * most obscure XSS attempts. Nothing is ever 100% foolproof,
Derek Jonese701d762010-03-02 18:17:01 -0600335 * of course, but I haven't been able to get anything passed
336 * the filter.
337 *
Andrey Andreev64354102012-10-28 14:16:02 +0200338 * Note: Should only be used to deal with data upon submission.
339 * It's not something that should be used for general
340 * runtime processing.
Derek Jonese701d762010-03-02 18:17:01 -0600341 *
Andrey Andreev64354102012-10-28 14:16:02 +0200342 * @link http://channel.bitflux.ch/wiki/XSS_Prevention
343 * Based in part on some code and ideas from Bitflux.
Derek Jonese701d762010-03-02 18:17:01 -0600344 *
Andrey Andreev64354102012-10-28 14:16:02 +0200345 * @link http://ha.ckers.org/xss.html
346 * To help develop this script I used this great list of
347 * vulnerabilities along with a few other hacks I've
348 * harvested from examining vulnerabilities in other programs.
Derek Jonese701d762010-03-02 18:17:01 -0600349 *
Andrey Andreev64354102012-10-28 14:16:02 +0200350 * @param string|string[] $str Input data
351 * @param bool $is_image Whether the input is an image
Derek Jonese701d762010-03-02 18:17:01 -0600352 * @return string
353 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500354 public function xss_clean($str, $is_image = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600355 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200356 // Is the string an array?
Derek Jonese701d762010-03-02 18:17:01 -0600357 if (is_array($str))
358 {
Andrey Andreev4316a152017-01-20 15:46:17 +0200359 foreach ($str as $key => &$value)
Derek Jonese701d762010-03-02 18:17:01 -0600360 {
Andrey Andreev4316a152017-01-20 15:46:17 +0200361 $str[$key] = $this->xss_clean($value);
Derek Jonese701d762010-03-02 18:17:01 -0600362 }
Barry Mienydd671972010-10-04 16:33:58 +0200363
Derek Jonese701d762010-03-02 18:17:01 -0600364 return $str;
365 }
366
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300367 // Remove Invisible Characters
368 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600369
370 /*
371 * URL Decode
372 *
373 * Just in case stuff like this is submitted:
374 *
375 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
376 *
377 * Note: Use rawurldecode() so it does not remove plus signs
Derek Jonese701d762010-03-02 18:17:01 -0600378 */
Andrey Andreev40282342016-10-26 17:41:18 +0300379 if (stripos($str, '%') !== false)
Andrey Andreev29e12642014-02-10 13:24:44 +0200380 {
Andrey Andreev40282342016-10-26 17:41:18 +0300381 do
382 {
383 $oldstr = $str;
384 $str = rawurldecode($str);
385 $str = preg_replace_callback('#%(?:\s*[0-9a-f]){2,}#i', array($this, '_urldecodespaces'), $str);
386 }
387 while ($oldstr !== $str);
388 unset($oldstr);
Andrey Andreev29e12642014-02-10 13:24:44 +0200389 }
Barry Mienydd671972010-10-04 16:33:58 +0200390
Derek Jonese701d762010-03-02 18:17:01 -0600391 /*
Barry Mienydd671972010-10-04 16:33:58 +0200392 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600393 *
394 * This permits our tests below to work reliably.
395 * We only convert entities that are within tags since
396 * these are the ones that will pose security problems.
Derek Jonese701d762010-03-02 18:17:01 -0600397 */
Andrey Andreev9b8286c2014-08-05 11:46:57 +0300398 $str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
brian97807ccbe52012-12-11 20:24:12 +0200399 $str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600400
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200401 // Remove Invisible Characters Again!
Greg Aker757dda62010-04-14 19:06:19 -0500402 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200403
Derek Jonese701d762010-03-02 18:17:01 -0600404 /*
405 * Convert all tabs to spaces
406 *
407 * This prevents strings like this: ja vascript
408 * NOTE: we deal with spaces between characters later.
David Behler07b53422011-08-15 00:25:06 +0200409 * NOTE: preg_replace was found to be amazingly slow here on
Pascal Krietec9c045a2011-04-05 14:50:41 -0400410 * large blocks of data, so we use str_replace.
Derek Jonese701d762010-03-02 18:17:01 -0600411 */
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200412 $str = str_replace("\t", ' ', $str);
Barry Mienydd671972010-10-04 16:33:58 +0200413
Andrey Andreev4562f2c2012-01-09 23:39:50 +0200414 // Capture converted string for later comparison
Derek Jonese701d762010-03-02 18:17:01 -0600415 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200416
Pascal Krietec9c045a2011-04-05 14:50:41 -0400417 // Remove Strings that are never allowed
418 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600419
420 /*
421 * Makes PHP tags safe
422 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400423 * Note: XML tags are inadvertently replaced too:
Derek Jonese701d762010-03-02 18:17:01 -0600424 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400425 * <?xml
Derek Jonese701d762010-03-02 18:17:01 -0600426 *
427 * But it doesn't seem to pose a problem.
Derek Jonese701d762010-03-02 18:17:01 -0600428 */
429 if ($is_image === TRUE)
430 {
David Behler07b53422011-08-15 00:25:06 +0200431 // Images have a tendency to have the PHP short opening and
432 // closing tags every so often so we skip those and only
Pascal Krietec9c045a2011-04-05 14:50:41 -0400433 // do the long opening tags.
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200434 $str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600435 }
436 else
437 {
Andrey Andreev838a9d62012-12-03 14:37:47 +0200438 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600439 }
Barry Mienydd671972010-10-04 16:33:58 +0200440
Derek Jonese701d762010-03-02 18:17:01 -0600441 /*
442 * Compact any exploded words
443 *
Derek Jones37f4b9c2011-07-01 17:56:50 -0500444 * This corrects words like: j a v a s c r i p t
Derek Jonese701d762010-03-02 18:17:01 -0600445 * These words are compacted back to their correct state.
Derek Jonese701d762010-03-02 18:17:01 -0600446 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400447 $words = array(
Andrey Andreeva30a7172014-02-10 09:17:25 +0200448 'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
449 'vbs', 'script', 'base64', 'applet', 'alert', 'document',
Andrey Andreev2a2578b2015-09-14 11:16:33 +0300450 'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
Timothy Warren40403d22012-04-19 16:38:50 -0400451 );
David Behler07b53422011-08-15 00:25:06 +0200452
Derek Jonese701d762010-03-02 18:17:01 -0600453 foreach ($words as $word)
454 {
Andrey Andreev67ccdc02012-02-27 23:57:58 +0200455 $word = implode('\s*', str_split($word)).'\s*';
Derek Jonese701d762010-03-02 18:17:01 -0600456
457 // We only want to do this when it is followed by a non-word character
458 // That way valid stuff like "dealer to" does not become "dealerto"
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300459 $str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600460 }
Barry Mienydd671972010-10-04 16:33:58 +0200461
Derek Jonese701d762010-03-02 18:17:01 -0600462 /*
463 * Remove disallowed Javascript in links or img tags
Andrey Andreevf9a615a2014-09-28 20:24:06 +0300464 * We used to do some version comparisons and use of stripos(),
David Behler07b53422011-08-15 00:25:06 +0200465 * but it is dog slow compared to these simplified non-capturing
Pascal Krietec9c045a2011-04-05 14:50:41 -0400466 * preg_match(), especially if the pattern exists in the string
Andrey Andreev12445ca2014-01-25 01:55:52 +0200467 *
468 * Note: It was reported that not only space characters, but all in
469 * the following pattern can be parsed as separators between a tag name
470 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
471 * ... however, remove_invisible_characters() above already strips the
472 * hex-encoded ones, so we'll skip them below.
Derek Jonese701d762010-03-02 18:17:01 -0600473 */
474 do
475 {
476 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200477
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200478 if (preg_match('/<a/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600479 {
Andrey Andreev40282342016-10-26 17:41:18 +0300480 $str = preg_replace_callback('#<a(?:rea)?[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600481 }
Barry Mienydd671972010-10-04 16:33:58 +0200482
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200483 if (preg_match('/<img/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600484 {
Andrey Andreevebb3aa02014-03-18 19:18:19 +0200485 $str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600486 }
Barry Mienydd671972010-10-04 16:33:58 +0200487
vlakoffa81f60c2012-07-02 15:20:11 +0200488 if (preg_match('/script|xss/i', $str))
Derek Jonese701d762010-03-02 18:17:01 -0600489 {
vlakoffa81f60c2012-07-02 15:20:11 +0200490 $str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
Derek Jonese701d762010-03-02 18:17:01 -0600491 }
492 }
vlakoffa81f60c2012-07-02 15:20:11 +0200493 while ($original !== $str);
Derek Jonese701d762010-03-02 18:17:01 -0600494 unset($original);
495
Derek Jonese701d762010-03-02 18:17:01 -0600496 /*
497 * Sanitize naughty HTML elements
498 *
499 * If a tag containing any of the words in the list
500 * below is found, the tag gets converted to entities.
501 *
502 * So this: <blink>
503 * Becomes: &lt;blink&gt;
Derek Jonese701d762010-03-02 18:17:01 -0600504 */
Andrey Andreevbc787482015-09-11 18:11:32 +0300505 $pattern = '#'
Andrey Andreev2ab1c192017-01-04 15:26:35 +0200506 .'<((?<slash>/*\s*)((?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)|.+)' // tag start and name, followed by a non-tag character
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300507 .'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
Andrey Andreevbc787482015-09-11 18:11:32 +0300508 // optional attributes
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300509 .'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
Andrey Andreevbc787482015-09-11 18:11:32 +0300510 .'[^\s\042\047>/=]+' // attribute characters
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300511 // optional attribute-value
Andrey Andreevf0f47da2015-10-05 12:37:16 +0300512 .'(?:\s*=' // attribute-value separator
513 .'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300514 .')?' // end optional attribute-value group
515 .')*)' // end optional attributes group
Andrey Andreev088e57d2015-09-17 15:55:57 +0300516 .'[^>]*)(?<closeTag>\>)?#isS';
Andrey Andreevbc787482015-09-11 18:11:32 +0300517
518 // Note: It would be nice to optimize this for speed, BUT
519 // only matching the naughty elements here results in
520 // false positives and in turn - vulnerabilities!
521 do
522 {
523 $old_str = $str;
524 $str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
525 }
526 while ($old_str !== $str);
527 unset($old_str);
Derek Jonese701d762010-03-02 18:17:01 -0600528
529 /*
530 * Sanitize naughty scripting elements
531 *
532 * Similar to above, only instead of looking for
533 * tags it looks for PHP and JavaScript commands
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200534 * that are disallowed. Rather than removing the
Derek Jonese701d762010-03-02 18:17:01 -0600535 * code, it simply converts the parenthesis to entities
536 * rendering the code un-executable.
537 *
538 * For example: eval('some code')
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200539 * Becomes: eval&#40;'some code'&#41;
Derek Jonese701d762010-03-02 18:17:01 -0600540 */
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300541 $str = preg_replace(
542 '#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
543 '\\1\\2&#40;\\3&#41;',
544 $str
545 );
Barry Mienydd671972010-10-04 16:33:58 +0200546
Andrey Andreev3d204b62018-03-10 02:40:22 +0200547 // Same thing, but for "tag functions" (e.g. eval`some code`)
548 // See https://github.com/bcit-ci/CodeIgniter/issues/5420
549 $str = preg_replace(
550 '#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)`(.*?)`#si',
551 '\\1\\2&#96;\\3&#96;',
552 $str
553 );
554
Pascal Krietec9c045a2011-04-05 14:50:41 -0400555 // Final clean up
556 // This adds a bit of extra precaution in case
557 // something got through the above filters
558 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600559
560 /*
Pascal Krietec9c045a2011-04-05 14:50:41 -0400561 * Images are Handled in a Special Way
David Behler07b53422011-08-15 00:25:06 +0200562 * - Essentially, we want to know that after all of the character
563 * conversion is done whether any unwanted, likely XSS, code was found.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400564 * If not, we return TRUE, as the image is clean.
David Behler07b53422011-08-15 00:25:06 +0200565 * However, if the string post-conversion does not matched the
566 * string post-removal of XSS, then it fails, as there was unwanted XSS
Pascal Krietec9c045a2011-04-05 14:50:41 -0400567 * code found and removed/changed during processing.
Derek Jonese701d762010-03-02 18:17:01 -0600568 */
Derek Jonese701d762010-03-02 18:17:01 -0600569 if ($is_image === TRUE)
570 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200571 return ($str === $converted_string);
Derek Jonese701d762010-03-02 18:17:01 -0600572 }
Barry Mienydd671972010-10-04 16:33:58 +0200573
Derek Jonese701d762010-03-02 18:17:01 -0600574 return $str;
575 }
576
577 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200578
Derek Jonese701d762010-03-02 18:17:01 -0600579 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200580 * XSS Hash
Derek Jonese701d762010-03-02 18:17:01 -0600581 *
Andrey Andreev64354102012-10-28 14:16:02 +0200582 * Generates the XSS hash if needed and returns it.
583 *
584 * @see CI_Security::$_xss_hash
585 * @return string XSS hash
Derek Jonese701d762010-03-02 18:17:01 -0600586 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500587 public function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200588 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300589 if ($this->_xss_hash === NULL)
Derek Jonese701d762010-03-02 18:17:01 -0600590 {
Andrey Andreev487ccc92014-08-27 16:26:23 +0300591 $rand = $this->get_random_bytes(16);
592 $this->_xss_hash = ($rand === FALSE)
593 ? md5(uniqid(mt_rand(), TRUE))
594 : bin2hex($rand);
Derek Jonese701d762010-03-02 18:17:01 -0600595 }
596
Pascal Krietec9c045a2011-04-05 14:50:41 -0400597 return $this->_xss_hash;
Derek Jonese701d762010-03-02 18:17:01 -0600598 }
599
600 // --------------------------------------------------------------------
601
602 /**
Andrey Andreev487ccc92014-08-27 16:26:23 +0300603 * Get random bytes
604 *
605 * @param int $length Output length
606 * @return string
607 */
608 public function get_random_bytes($length)
609 {
Andrey Andreevefe33a22014-08-28 09:53:44 +0300610 if (empty($length) OR ! ctype_digit((string) $length))
Andrey Andreev487ccc92014-08-27 16:26:23 +0300611 {
612 return FALSE;
613 }
614
Andrey Andreev5afa3482015-11-24 11:48:39 +0200615 if (function_exists('random_bytes'))
616 {
617 try
618 {
619 // The cast is required to avoid TypeError
620 return random_bytes((int) $length);
621 }
622 catch (Exception $e)
623 {
624 // If random_bytes() can't do the job, we can't either ...
625 // There's no point in using fallbacks.
626 log_message('error', $e->getMessage());
627 return FALSE;
628 }
629 }
630
Andrey Andreev487ccc92014-08-27 16:26:23 +0300631 // Unfortunately, none of the following PRNGs is guaranteed to exist ...
Andrey Andreev607d5e22014-09-17 14:54:05 +0300632 if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
Andrey Andreev487ccc92014-08-27 16:26:23 +0300633 {
634 return $output;
635 }
636
637
638 if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
639 {
Andrey Andreeve4b9cd62014-10-02 02:19:06 +0300640 // Try not to waste entropy ...
641 is_php('5.4') && stream_set_chunk_size($fp, $length);
Andrey Andreev487ccc92014-08-27 16:26:23 +0300642 $output = fread($fp, $length);
643 fclose($fp);
644 if ($output !== FALSE)
645 {
646 return $output;
647 }
648 }
649
650 if (function_exists('openssl_random_pseudo_bytes'))
651 {
652 return openssl_random_pseudo_bytes($length);
653 }
654
655 return FALSE;
656 }
657
658 // --------------------------------------------------------------------
659
660 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500661 * HTML Entities Decode
662 *
Andrey Andreev64354102012-10-28 14:16:02 +0200663 * A replacement for html_entity_decode()
Derek Jonesa0911472010-03-30 10:33:09 -0500664 *
Pascal Krietec38e3b62011-11-14 13:55:00 -0500665 * The reason we are not using html_entity_decode() by itself is because
666 * while it is not technically correct to leave out the semicolon
667 * at the end of an entity most browsers will still interpret the entity
Andrey Andreevbb488dc2012-01-07 23:35:16 +0200668 * correctly. html_entity_decode() does not convert entities without
Pascal Krietec38e3b62011-11-14 13:55:00 -0500669 * semicolons, so we are left with our own little solution here. Bummer.
Derek Jonesa0911472010-03-30 10:33:09 -0500670 *
Andrey Andreev64354102012-10-28 14:16:02 +0200671 * @link http://php.net/html-entity-decode
672 *
673 * @param string $str Input
674 * @param string $charset Character set
Derek Jonesa0911472010-03-30 10:33:09 -0500675 * @return string
676 */
freewil8cc0cfe2011-08-27 21:53:00 -0400677 public function entity_decode($str, $charset = NULL)
Derek Jonesa0911472010-03-30 10:33:09 -0500678 {
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300679 if (strpos($str, '&') === FALSE)
freewil5c9b0d12011-08-28 12:15:23 -0400680 {
681 return $str;
682 }
Andrey Andreev3d113bd2011-10-05 00:03:20 +0300683
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300684 static $_entities;
685
686 isset($charset) OR $charset = $this->charset;
687 $flag = is_php('5.4')
688 ? ENT_COMPAT | ENT_HTML5
689 : ENT_COMPAT;
Barry Mienydd671972010-10-04 16:33:58 +0200690
Andrey Andreeveea02de2016-09-27 14:59:37 +0300691 if ( ! isset($_entities))
692 {
693 $_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
694
695 // If we're not on PHP 5.4+, add the possibly dangerous HTML 5
696 // entities to the array manually
697 if ($flag === ENT_COMPAT)
698 {
699 $_entities[':'] = '&colon;';
700 $_entities['('] = '&lpar;';
701 $_entities[')'] = '&rpar;';
702 $_entities["\n"] = '&NewLine;';
703 $_entities["\t"] = '&Tab;';
704 }
705 }
706
brian978638a9d22012-12-18 13:25:54 +0200707 do
708 {
Andrey Andreeve7a2aa02014-03-18 18:44:53 +0200709 $str_compare = $str;
brian97807ccbe52012-12-11 20:24:12 +0200710
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300711 // Decode standard entities, avoiding false positives
Andrey Andreev074a2142015-02-09 14:03:14 +0200712 if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300713 {
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300714 $replace = array();
715 $matches = array_unique(array_map('strtolower', $matches[0]));
Andrey Andreev068ab202015-03-26 21:03:38 +0200716 foreach ($matches as &$match)
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300717 {
Andrey Andreev068ab202015-03-26 21:03:38 +0200718 if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300719 {
Andrey Andreev068ab202015-03-26 21:03:38 +0200720 $replace[$match] = $char;
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300721 }
722 }
723
Andrey Andreeveea02de2016-09-27 14:59:37 +0300724 $str = str_replace(array_keys($replace), array_values($replace), $str);
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300725 }
726
727 // Decode numeric & UTF16 two byte entities
728 $str = html_entity_decode(
warpcodebb177982014-12-16 11:29:53 +0000729 preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
Andrey Andreev487d1ae2014-05-23 14:41:32 +0300730 $flag,
731 $charset
732 );
Andrey Andreeveea02de2016-09-27 14:59:37 +0300733
734 if ($flag === ENT_COMPAT)
735 {
736 $str = str_replace(array_values($_entities), array_keys($_entities), $str);
737 }
brian978638a9d22012-12-18 13:25:54 +0200738 }
Andrey Andreeve7a2aa02014-03-18 18:44:53 +0200739 while ($str_compare !== $str);
brian978638a9d22012-12-18 13:25:54 +0200740 return $str;
Derek Jonesa0911472010-03-30 10:33:09 -0500741 }
Barry Mienydd671972010-10-04 16:33:58 +0200742
Derek Jonesa0911472010-03-30 10:33:09 -0500743 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200744
Derek Jonesa0911472010-03-30 10:33:09 -0500745 /**
Andrey Andreev64354102012-10-28 14:16:02 +0200746 * Sanitize Filename
Derek Jonese701d762010-03-02 18:17:01 -0600747 *
Andrey Andreev64354102012-10-28 14:16:02 +0200748 * @param string $str Input file name
749 * @param bool $relative_path Whether to preserve paths
Derek Jonese701d762010-03-02 18:17:01 -0600750 * @return string
751 */
Hunter Wu8df33522013-08-03 22:36:05 +0800752 public function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600753 {
Hunter Wu4495cc72013-08-04 12:31:52 +0800754 $bad = $this->filename_bad_chars;
David Behler07b53422011-08-15 00:25:06 +0200755
Derek Jones2ef37592010-10-06 17:51:59 -0500756 if ( ! $relative_path)
757 {
758 $bad[] = './';
759 $bad[] = '/';
760 }
Derek Jonese701d762010-03-02 18:17:01 -0600761
Pascal Krietec9c045a2011-04-05 14:50:41 -0400762 $str = remove_invisible_characters($str, FALSE);
Andrey Andreev7e559772013-01-29 15:38:33 +0200763
764 do
765 {
766 $old = $str;
767 $str = str_replace($bad, '', $str);
768 }
769 while ($old !== $str);
770
771 return stripslashes($str);
Derek Jonese701d762010-03-02 18:17:01 -0600772 }
773
Pascal Krietec9c045a2011-04-05 14:50:41 -0400774 // ----------------------------------------------------------------
775
776 /**
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300777 * Strip Image Tags
778 *
Andrey Andreev64354102012-10-28 14:16:02 +0200779 * @param string $str
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300780 * @return string
781 */
782 public function strip_image_tags($str)
783 {
Andrey Andreev3c0d8da2016-03-07 10:52:15 +0200784 return preg_replace(
785 array(
786 '#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
787 '#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
788 ),
789 '\\2',
790 $str
791 );
Andrey Andreev1a24a9d2012-06-27 00:52:47 +0300792 }
793
794 // ----------------------------------------------------------------
795
796 /**
Andrey Andreev40282342016-10-26 17:41:18 +0300797 * URL-decode taking spaces into account
798 *
799 * @see https://github.com/bcit-ci/CodeIgniter/issues/4877
800 * @param array $matches
801 * @return string
802 */
803 protected function _urldecodespaces($matches)
804 {
805 $input = $matches[0];
806 $nospaces = preg_replace('#\s+#', '', $input);
807 return ($nospaces === $input)
808 ? $input
809 : rawurldecode($nospaces);
810 }
811
812 // ----------------------------------------------------------------
813
814 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400815 * Compact Exploded Words
816 *
Andrey Andreev64354102012-10-28 14:16:02 +0200817 * Callback method for xss_clean() to remove whitespace from
818 * things like 'j a v a s c r i p t'.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400819 *
Andrey Andreev64354102012-10-28 14:16:02 +0200820 * @used-by CI_Security::xss_clean()
821 * @param array $matches
Timothy Warrenad475052012-04-19 13:21:06 -0400822 * @return string
Pascal Krietec9c045a2011-04-05 14:50:41 -0400823 */
824 protected function _compact_exploded_words($matches)
825 {
826 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
827 }
828
829 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200830
Timothy Warrenad475052012-04-19 13:21:06 -0400831 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400832 * Sanitize Naughty HTML
833 *
Andrey Andreev64354102012-10-28 14:16:02 +0200834 * Callback method for xss_clean() to remove naughty HTML elements.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400835 *
Andrey Andreev64354102012-10-28 14:16:02 +0200836 * @used-by CI_Security::xss_clean()
837 * @param array $matches
Pascal Krietec9c045a2011-04-05 14:50:41 -0400838 * @return string
839 */
840 protected function _sanitize_naughty_html($matches)
841 {
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300842 static $naughty_tags = array(
Andrey Andreev40282342016-10-26 17:41:18 +0300843 'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
Andrey Andreevbc787482015-09-11 18:11:32 +0300844 'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
845 'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
846 'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
847 );
848
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300849 static $evil_attributes = array(
850 'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
851 );
852
Andrey Andreev088e57d2015-09-17 15:55:57 +0300853 // First, escape unclosed tags
854 if (empty($matches['closeTag']))
855 {
856 return '&lt;'.$matches[1];
857 }
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300858 // Is the element that we caught naughty? If so, escape it
Andrey Andreev088e57d2015-09-17 15:55:57 +0300859 elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
Andrey Andreevbc787482015-09-11 18:11:32 +0300860 {
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300861 return '&lt;'.$matches[1].'&gt;';
862 }
863 // For other tags, see if their attributes are "evil" and strip those
864 elseif (isset($matches['attributes']))
865 {
Andrey Andreeve475b1c2018-03-15 16:43:35 +0200866 // We'll store the already filtered attributes here
Andrey Andreev71b1b3f2015-10-27 12:30:18 +0200867 $attributes = array();
868
869 // Attribute-catching pattern
870 $attributes_pattern = '#'
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300871 .'(?<name>[^\s\042\047>/=]+)' // attribute characters
872 // optional attribute-value
Andrey Andreevf0f47da2015-10-05 12:37:16 +0300873 .'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300874 .'#i';
875
Andrey Andreev71b1b3f2015-10-27 12:30:18 +0200876 // Blacklist pattern for evil attribute names
877 $is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
878
879 // Each iteration filters a single attribute
880 do
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300881 {
Andrey Andreev71d8f722017-01-17 12:01:00 +0200882 // Strip any non-alpha characters that may precede an attribute.
Andrey Andreev71b1b3f2015-10-27 12:30:18 +0200883 // Browsers often parse these incorrectly and that has been a
884 // of numerous XSS issues we've had.
885 $matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
886
887 if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300888 {
Andrey Andreev71b1b3f2015-10-27 12:30:18 +0200889 // No (valid) attribute found? Discard everything else inside the tag
890 break;
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300891 }
892
Andrey Andreev71b1b3f2015-10-27 12:30:18 +0200893 if (
894 // Is it indeed an "evil" attribute?
895 preg_match($is_evil_pattern, $attribute['name'][0])
896 // Or does it have an equals sign, but no value and not quoted? Strip that too!
897 OR (trim($attribute['value'][0]) === '')
898 )
899 {
900 $attributes[] = 'xss=removed';
901 }
902 else
903 {
904 $attributes[] = $attribute[0][0];
905 }
906
907 $matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300908 }
Andrey Andreev71b1b3f2015-10-27 12:30:18 +0200909 while ($matches['attributes'] !== '');
910
911 $attributes = empty($attributes)
912 ? ''
913 : ' '.implode(' ', $attributes);
914 return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
Andrey Andreevbc787482015-09-11 18:11:32 +0300915 }
916
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300917 return $matches[0];
Pascal Krietec9c045a2011-04-05 14:50:41 -0400918 }
919
920 // --------------------------------------------------------------------
921
922 /**
923 * JS Link Removal
924 *
Andrey Andreev64354102012-10-28 14:16:02 +0200925 * Callback method for xss_clean() to sanitize links.
926 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400927 * This limits the PCRE backtracks, making it more performance friendly
928 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200929 * PHP 5.2+ on link-heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400930 *
Andrey Andreev64354102012-10-28 14:16:02 +0200931 * @used-by CI_Security::xss_clean()
932 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400933 * @return string
934 */
935 protected function _js_link_removal($match)
936 {
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300937 return str_replace(
938 $match[1],
939 preg_replace(
Andrey Andreeve475b1c2018-03-15 16:43:35 +0200940 '#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;|`|&\#96;)|javascript:|livescript:|mocha:|charset=|window\.|\(?document\)?\.|\.cookie|<script|<xss|d\s*a\s*t\s*a\s*:)#si',
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300941 '',
Andrey Andreev4fbf2d12015-09-21 16:17:48 +0300942 $this->_filter_attributes($match[1])
Andrey Andreev3ceb14a2015-09-17 15:03:03 +0300943 ),
944 $match[0]
945 );
Pascal Krietec9c045a2011-04-05 14:50:41 -0400946 }
947
948 // --------------------------------------------------------------------
949
950 /**
951 * JS Image Removal
952 *
Andrey Andreev64354102012-10-28 14:16:02 +0200953 * Callback method for xss_clean() to sanitize image tags.
954 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400955 * This limits the PCRE backtracks, making it more performance friendly
956 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
Andrey Andreev64354102012-10-28 14:16:02 +0200957 * PHP 5.2+ on image tag heavy strings.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400958 *
Andrey Andreev64354102012-10-28 14:16:02 +0200959 * @used-by CI_Security::xss_clean()
960 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400961 * @return string
962 */
963 protected function _js_img_removal($match)
964 {
Andrey Andreev2a2578b2015-09-14 11:16:33 +0300965 return str_replace(
966 $match[1],
967 preg_replace(
Andrey Andreeve475b1c2018-03-15 16:43:35 +0200968 '#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;|`|&\#96;)|javascript:|livescript:|mocha:|charset=|window\.|\(?document\)?\.|\.cookie|<script|<xss|base64\s*,)#si',
Andrey Andreev2a2578b2015-09-14 11:16:33 +0300969 '',
Andrey Andreev4fbf2d12015-09-21 16:17:48 +0300970 $this->_filter_attributes($match[1])
Andrey Andreev2a2578b2015-09-14 11:16:33 +0300971 ),
972 $match[0]
973 );
Pascal Krietec9c045a2011-04-05 14:50:41 -0400974 }
975
976 // --------------------------------------------------------------------
977
978 /**
979 * Attribute Conversion
980 *
Andrey Andreev64354102012-10-28 14:16:02 +0200981 * @used-by CI_Security::xss_clean()
982 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -0400983 * @return string
984 */
985 protected function _convert_attribute($match)
986 {
987 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
988 }
989
990 // --------------------------------------------------------------------
991
992 /**
993 * Filter Attributes
994 *
Andrey Andreev64354102012-10-28 14:16:02 +0200995 * Filters tag attributes for consistency and safety.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400996 *
Andrey Andreev64354102012-10-28 14:16:02 +0200997 * @used-by CI_Security::_js_img_removal()
998 * @used-by CI_Security::_js_link_removal()
999 * @param string $str
Pascal Krietec9c045a2011-04-05 14:50:41 -04001000 * @return string
1001 */
1002 protected function _filter_attributes($str)
1003 {
1004 $out = '';
Pascal Krietec9c045a2011-04-05 14:50:41 -04001005 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
1006 {
1007 foreach ($matches[0] as $match)
1008 {
Andrey Andreev4562f2c2012-01-09 23:39:50 +02001009 $out .= preg_replace('#/\*.*?\*/#s', '', $match);
Pascal Krietec9c045a2011-04-05 14:50:41 -04001010 }
1011 }
1012
1013 return $out;
1014 }
1015
1016 // --------------------------------------------------------------------
1017
1018 /**
1019 * HTML Entity Decode Callback
1020 *
Andrey Andreev64354102012-10-28 14:16:02 +02001021 * @used-by CI_Security::xss_clean()
1022 * @param array $match
Pascal Krietec9c045a2011-04-05 14:50:41 -04001023 * @return string
1024 */
1025 protected function _decode_entity($match)
1026 {
Andrey Andreev487d1ae2014-05-23 14:41:32 +03001027 // Protect GET variables in URLs
1028 // 901119URL5918AMP18930PROTECT8198
1029 $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
1030
1031 // Decode, then un-protect URL GET vars
1032 return str_replace(
1033 $this->xss_hash(),
1034 '&',
1035 $this->entity_decode($match, $this->charset)
Andrey Andreevc67c3fb2014-01-22 13:26:00 +02001036 );
Pascal Krietec9c045a2011-04-05 14:50:41 -04001037 }
1038
1039 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +02001040
Pascal Krietec9c045a2011-04-05 14:50:41 -04001041 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -04001042 * Do Never Allowed
1043 *
Andrey Andreev64354102012-10-28 14:16:02 +02001044 * @used-by CI_Security::xss_clean()
Pascal Krietec9c045a2011-04-05 14:50:41 -04001045 * @param string
1046 * @return string
1047 */
1048 protected function _do_never_allowed($str)
1049 {
Andrey Andreevbb488dc2012-01-07 23:35:16 +02001050 $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -04001051
Andrey Andreevbb488dc2012-01-07 23:35:16 +02001052 foreach ($this->_never_allowed_regex as $regex)
Pascal Krietec9c045a2011-04-05 14:50:41 -04001053 {
Wes Baker5335bc32012-04-24 15:17:14 -04001054 $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -04001055 }
David Behler07b53422011-08-15 00:25:06 +02001056
Pascal Krietec9c045a2011-04-05 14:50:41 -04001057 return $str;
1058 }
1059
1060 // --------------------------------------------------------------------
1061
1062 /**
Andrey Andreev64354102012-10-28 14:16:02 +02001063 * Set CSRF Hash and Cookie
Pascal Krietec9c045a2011-04-05 14:50:41 -04001064 *
1065 * @return string
1066 */
1067 protected function _csrf_set_hash()
1068 {
Andrey Andreev487ccc92014-08-27 16:26:23 +03001069 if ($this->_csrf_hash === NULL)
Pascal Krietec9c045a2011-04-05 14:50:41 -04001070 {
vlakoff3a3d5f62013-10-17 22:22:16 +02001071 // If the cookie exists we will use its value.
Pascal Krietec9c045a2011-04-05 14:50:41 -04001072 // We don't necessarily want to regenerate it with
David Behler07b53422011-08-15 00:25:06 +02001073 // each page load since a page could contain embedded
Pascal Krietec9c045a2011-04-05 14:50:41 -04001074 // sub-pages causing this feature to fail
Andrey Andreev162b1a92014-12-08 10:59:51 +02001075 if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
1076 && preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
Pascal Krietec9c045a2011-04-05 14:50:41 -04001077 {
1078 return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
1079 }
David Behler07b53422011-08-15 00:25:06 +02001080
Andrey Andreev487ccc92014-08-27 16:26:23 +03001081 $rand = $this->get_random_bytes(16);
1082 $this->_csrf_hash = ($rand === FALSE)
1083 ? md5(uniqid(mt_rand(), TRUE))
1084 : bin2hex($rand);
Pascal Krietec9c045a2011-04-05 14:50:41 -04001085 }
1086
1087 return $this->_csrf_hash;
1088 }
1089
Derek Jonese701d762010-03-02 18:17:01 -06001090}