blob: a3e227437227f5aa9d50b4e1bb0e373ee3b1e59b [file] [log] [blame]
Derek Jones37f4b9c2011-07-01 17:56:50 -05001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
Derek Jonese701d762010-03-02 18:17:01 -06002/**
3 * CodeIgniter
4 *
Greg Aker741de1c2010-11-10 14:52:57 -06005 * An open source application development framework for PHP 5.1.6 or newer
Derek Jonese701d762010-03-02 18:17:01 -06006 *
7 * @package CodeIgniter
8 * @author ExpressionEngine Dev Team
Greg Aker0711dc82011-01-05 10:49:40 -06009 * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc.
Derek Jonese701d762010-03-02 18:17:01 -060010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Security Class
20 *
21 * @package CodeIgniter
22 * @subpackage Libraries
23 * @category Security
24 * @author ExpressionEngine Dev Team
Pascal Krietec9c045a2011-04-05 14:50:41 -040025 * @link http://codeigniter.com/user_guide/libraries/security.html
Derek Jonese701d762010-03-02 18:17:01 -060026 */
27class CI_Security {
Barry Mienydd671972010-10-04 16:33:58 +020028
David Behler07b53422011-08-15 00:25:06 +020029 /**
30 * Random Hash for protecting URLs
31 *
32 * @var string
33 * @access protected
34 */
35 protected $_xss_hash = '';
36 /**
37 * Random Hash for Cross Site Request Forgery Protection Cookie
38 *
39 * @var string
40 * @access protected
41 */
42 protected $_csrf_hash = '';
43 /**
44 * Expiration time for Cross Site Request Forgery Protection Cookie
45 * Defaults to two hours (in seconds)
46 *
47 * @var int
48 * @access protected
49 */
50 protected $_csrf_expire = 7200;
51 /**
52 * Token name for Cross Site Request Forgery Protection Cookie
53 *
54 * @var string
55 * @access protected
56 */
57 protected $_csrf_token_name = 'ci_csrf_token';
58 /**
59 * Cookie name for Cross Site Request Forgery Protection Cookie
60 *
61 * @var string
62 * @access protected
63 */
64 protected $_csrf_cookie_name = 'ci_csrf_token';
65 /**
66 * List of never allowed strings
67 *
68 * @var array
69 * @access protected
70 */
Pascal Krietec9c045a2011-04-05 14:50:41 -040071 protected $_never_allowed_str = array(
72 'document.cookie' => '[removed]',
73 'document.write' => '[removed]',
74 '.parentNode' => '[removed]',
75 '.innerHTML' => '[removed]',
76 'window.location' => '[removed]',
77 '-moz-binding' => '[removed]',
78 '<!--' => '&lt;!--',
79 '-->' => '--&gt;',
Pascal Krietec38e3b62011-11-14 13:55:00 -050080 '<![CDATA[' => '&lt;![CDATA[',
81 '<comment>' => '&lt;comment&gt;'
Pascal Krietec9c045a2011-04-05 14:50:41 -040082 );
Derek Jonese701d762010-03-02 18:17:01 -060083
Pascal Krietec9c045a2011-04-05 14:50:41 -040084 /* never allowed, regex replacement */
David Behler07b53422011-08-15 00:25:06 +020085 /**
86 * List of never allowed regex replacement
87 *
88 * @var array
89 * @access protected
90 */
Pascal Krietec9c045a2011-04-05 14:50:41 -040091 protected $_never_allowed_regex = array(
92 "javascript\s*:" => '[removed]',
93 "expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
94 "vbscript\s*:" => '[removed]', // IE, surprise!
95 "Redirect\s+302" => '[removed]'
96 );
David Behler07b53422011-08-15 00:25:06 +020097
Pascal Krietec9c045a2011-04-05 14:50:41 -040098 /**
99 * Constructor
100 */
Greg Akera9263282010-11-10 15:26:43 -0600101 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -0600102 {
patworkef1a55a2011-04-09 13:04:06 +0200103 // CSRF config
104 foreach(array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
105 {
106 if (FALSE !== ($val = config_item($key)))
107 {
108 $this->{'_'.$key} = $val;
109 }
110 }
111
patwork9e267982011-04-11 13:02:32 +0200112 // Append application specific cookie prefix
Greg Akerb3e614d2011-04-19 20:19:17 -0500113 if (config_item('cookie_prefix'))
114 {
patwork9e267982011-04-11 13:02:32 +0200115 $this->_csrf_cookie_name = config_item('cookie_prefix').$this->_csrf_cookie_name;
116 }
Derek Jonesb3f10a22010-07-25 19:11:26 -0500117
Derek Jonese701d762010-03-02 18:17:01 -0600118 // Set the CSRF hash
119 $this->_csrf_set_hash();
Derek Allard958543a2010-07-22 14:10:26 -0400120
Derek Jonese701d762010-03-02 18:17:01 -0600121 log_message('debug', "Security Class Initialized");
122 }
123
124 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200125
Derek Jonese701d762010-03-02 18:17:01 -0600126 /**
127 * Verify Cross Site Request Forgery Protection
128 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400129 * @return object
Derek Jonese701d762010-03-02 18:17:01 -0600130 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500131 public function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -0400132 {
Derek Jonese701d762010-03-02 18:17:01 -0600133 // If no POST data exists we will set the CSRF cookie
134 if (count($_POST) == 0)
135 {
136 return $this->csrf_set_cookie();
137 }
138
139 // Do the tokens exist in both the _POST and _COOKIE arrays?
David Behler07b53422011-08-15 00:25:06 +0200140 if ( ! isset($_POST[$this->_csrf_token_name]) OR
Pascal Krietec9c045a2011-04-05 14:50:41 -0400141 ! isset($_COOKIE[$this->_csrf_cookie_name]))
Derek Jonese701d762010-03-02 18:17:01 -0600142 {
143 $this->csrf_show_error();
144 }
145
146 // Do the tokens match?
Pascal Krietec9c045a2011-04-05 14:50:41 -0400147 if ($_POST[$this->_csrf_token_name] != $_COOKIE[$this->_csrf_cookie_name])
Derek Jonese701d762010-03-02 18:17:01 -0600148 {
149 $this->csrf_show_error();
150 }
151
David Behler07b53422011-08-15 00:25:06 +0200152 // We kill this since we're done and we don't want to
Pascal Krietec9c045a2011-04-05 14:50:41 -0400153 // polute the _POST array
154 unset($_POST[$this->_csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +0200155
Derek Jonesb3f10a22010-07-25 19:11:26 -0500156 // Nothing should last forever
Pascal Krietec9c045a2011-04-05 14:50:41 -0400157 unset($_COOKIE[$this->_csrf_cookie_name]);
Derek Jonesb3f10a22010-07-25 19:11:26 -0500158 $this->_csrf_set_hash();
159 $this->csrf_set_cookie();
Derek Jonese701d762010-03-02 18:17:01 -0600160
161 log_message('debug', "CSRF token verified ");
David Behler07b53422011-08-15 00:25:06 +0200162
Pascal Krietec9c045a2011-04-05 14:50:41 -0400163 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600164 }
Barry Mienydd671972010-10-04 16:33:58 +0200165
Derek Jonese701d762010-03-02 18:17:01 -0600166 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200167
Derek Jonese701d762010-03-02 18:17:01 -0600168 /**
169 * Set Cross Site Request Forgery Protection Cookie
170 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400171 * @return object
Derek Jonese701d762010-03-02 18:17:01 -0600172 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500173 public function csrf_set_cookie()
Derek Jonese701d762010-03-02 18:17:01 -0600174 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400175 $expire = time() + $this->_csrf_expire;
Robin Sowell154da112011-02-11 15:33:44 -0500176 $secure_cookie = (config_item('cookie_secure') === TRUE) ? 1 : 0;
Derek Jonese701d762010-03-02 18:17:01 -0600177
Pascal Krietec9c045a2011-04-05 14:50:41 -0400178 if ($secure_cookie)
Derek Jonese701d762010-03-02 18:17:01 -0600179 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400180 $req = isset($_SERVER['HTTPS']) ? $_SERVER['HTTPS'] : FALSE;
181
182 if ( ! $req OR $req == 'off')
Derek Jonese701d762010-03-02 18:17:01 -0600183 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400184 return FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600185 }
186 }
Derek Allard958543a2010-07-22 14:10:26 -0400187
Pascal Krietec9c045a2011-04-05 14:50:41 -0400188 setcookie($this->_csrf_cookie_name, $this->_csrf_hash, $expire, config_item('cookie_path'), config_item('cookie_domain'), $secure_cookie);
189
190 log_message('debug', "CRSF cookie Set");
David Behler07b53422011-08-15 00:25:06 +0200191
Pascal Krietec9c045a2011-04-05 14:50:41 -0400192 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600193 }
194
195 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200196
Derek Jonese701d762010-03-02 18:17:01 -0600197 /**
198 * Show CSRF Error
199 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400200 * @return void
Derek Jonese701d762010-03-02 18:17:01 -0600201 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500202 public function csrf_show_error()
Derek Jonese701d762010-03-02 18:17:01 -0600203 {
204 show_error('The action you have requested is not allowed.');
205 }
206
207 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200208
Derek Jonese701d762010-03-02 18:17:01 -0600209 /**
David Behler07b53422011-08-15 00:25:06 +0200210 * Get CSRF Hash
Pascal Krietec9c045a2011-04-05 14:50:41 -0400211 *
David Behler07b53422011-08-15 00:25:06 +0200212 * Getter Method
Pascal Krietec9c045a2011-04-05 14:50:41 -0400213 *
214 * @return string self::_csrf_hash
215 */
216 public function get_csrf_hash()
217 {
218 return $this->_csrf_hash;
219 }
220
221 // --------------------------------------------------------------------
222
223 /**
224 * Get CSRF Token Name
225 *
226 * Getter Method
227 *
228 * @return string self::csrf_token_name
229 */
230 public function get_csrf_token_name()
231 {
232 return $this->_csrf_token_name;
233 }
234
235 // --------------------------------------------------------------------
236
237 /**
Derek Jonese701d762010-03-02 18:17:01 -0600238 * XSS Clean
239 *
240 * Sanitizes data so that Cross Site Scripting Hacks can be
Derek Jones37f4b9c2011-07-01 17:56:50 -0500241 * prevented. This function does a fair amount of work but
Derek Jonese701d762010-03-02 18:17:01 -0600242 * it is extremely thorough, designed to prevent even the
Derek Jones37f4b9c2011-07-01 17:56:50 -0500243 * most obscure XSS attempts. Nothing is ever 100% foolproof,
Derek Jonese701d762010-03-02 18:17:01 -0600244 * of course, but I haven't been able to get anything passed
245 * the filter.
246 *
247 * Note: This function should only be used to deal with data
Derek Jones37f4b9c2011-07-01 17:56:50 -0500248 * upon submission. It's not something that should
Derek Jonese701d762010-03-02 18:17:01 -0600249 * be used for general runtime processing.
250 *
251 * This function was based in part on some code and ideas I
252 * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
253 *
254 * To help develop this script I used this great list of
255 * vulnerabilities along with a few other hacks I've
256 * harvested from examining vulnerabilities in other programs:
257 * http://ha.ckers.org/xss.html
258 *
Derek Jonese701d762010-03-02 18:17:01 -0600259 * @param mixed string or array
David Behler07b53422011-08-15 00:25:06 +0200260 * @param bool
Derek Jonese701d762010-03-02 18:17:01 -0600261 * @return string
262 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500263 public function xss_clean($str, $is_image = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600264 {
265 /*
266 * Is the string an array?
267 *
268 */
269 if (is_array($str))
270 {
271 while (list($key) = each($str))
272 {
273 $str[$key] = $this->xss_clean($str[$key]);
274 }
Barry Mienydd671972010-10-04 16:33:58 +0200275
Derek Jonese701d762010-03-02 18:17:01 -0600276 return $str;
277 }
278
279 /*
280 * Remove Invisible Characters
281 */
Greg Aker757dda62010-04-14 19:06:19 -0500282 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600283
Pascal Krietec9c045a2011-04-05 14:50:41 -0400284 // Validate Entities in URLs
285 $str = $this->_validate_entities($str);
Derek Jonese701d762010-03-02 18:17:01 -0600286
287 /*
288 * URL Decode
289 *
290 * Just in case stuff like this is submitted:
291 *
292 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
293 *
294 * Note: Use rawurldecode() so it does not remove plus signs
295 *
296 */
297 $str = rawurldecode($str);
Barry Mienydd671972010-10-04 16:33:58 +0200298
Derek Jonese701d762010-03-02 18:17:01 -0600299 /*
Barry Mienydd671972010-10-04 16:33:58 +0200300 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600301 *
302 * This permits our tests below to work reliably.
303 * We only convert entities that are within tags since
304 * these are the ones that will pose security problems.
305 *
306 */
307
308 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
David Behler07b53422011-08-15 00:25:06 +0200309
Derek Jonese701d762010-03-02 18:17:01 -0600310 $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
311
312 /*
313 * Remove Invisible Characters Again!
314 */
Greg Aker757dda62010-04-14 19:06:19 -0500315 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200316
Derek Jonese701d762010-03-02 18:17:01 -0600317 /*
318 * Convert all tabs to spaces
319 *
320 * This prevents strings like this: ja vascript
321 * NOTE: we deal with spaces between characters later.
David Behler07b53422011-08-15 00:25:06 +0200322 * NOTE: preg_replace was found to be amazingly slow here on
Pascal Krietec9c045a2011-04-05 14:50:41 -0400323 * large blocks of data, so we use str_replace.
Derek Jonese701d762010-03-02 18:17:01 -0600324 */
Barry Mienydd671972010-10-04 16:33:58 +0200325
Derek Jonese701d762010-03-02 18:17:01 -0600326 if (strpos($str, "\t") !== FALSE)
327 {
328 $str = str_replace("\t", ' ', $str);
329 }
Barry Mienydd671972010-10-04 16:33:58 +0200330
Derek Jonese701d762010-03-02 18:17:01 -0600331 /*
332 * Capture converted string for later comparison
333 */
334 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200335
Pascal Krietec9c045a2011-04-05 14:50:41 -0400336 // Remove Strings that are never allowed
337 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600338
339 /*
340 * Makes PHP tags safe
341 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400342 * Note: XML tags are inadvertently replaced too:
Derek Jonese701d762010-03-02 18:17:01 -0600343 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400344 * <?xml
Derek Jonese701d762010-03-02 18:17:01 -0600345 *
346 * But it doesn't seem to pose a problem.
Derek Jonese701d762010-03-02 18:17:01 -0600347 */
348 if ($is_image === TRUE)
349 {
David Behler07b53422011-08-15 00:25:06 +0200350 // Images have a tendency to have the PHP short opening and
351 // closing tags every so often so we skip those and only
Pascal Krietec9c045a2011-04-05 14:50:41 -0400352 // do the long opening tags.
Derek Jonese701d762010-03-02 18:17:01 -0600353 $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
354 }
355 else
356 {
Derek Jones37f4b9c2011-07-01 17:56:50 -0500357 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
Derek Jonese701d762010-03-02 18:17:01 -0600358 }
Barry Mienydd671972010-10-04 16:33:58 +0200359
Derek Jonese701d762010-03-02 18:17:01 -0600360 /*
361 * Compact any exploded words
362 *
Derek Jones37f4b9c2011-07-01 17:56:50 -0500363 * This corrects words like: j a v a s c r i p t
Derek Jonese701d762010-03-02 18:17:01 -0600364 * These words are compacted back to their correct state.
Derek Jonese701d762010-03-02 18:17:01 -0600365 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400366 $words = array(
David Behler07b53422011-08-15 00:25:06 +0200367 'javascript', 'expression', 'vbscript', 'script',
Pascal Krietec9c045a2011-04-05 14:50:41 -0400368 'applet', 'alert', 'document', 'write', 'cookie', 'window'
369 );
David Behler07b53422011-08-15 00:25:06 +0200370
Derek Jonese701d762010-03-02 18:17:01 -0600371 foreach ($words as $word)
372 {
373 $temp = '';
Barry Mienydd671972010-10-04 16:33:58 +0200374
Derek Jonese701d762010-03-02 18:17:01 -0600375 for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
376 {
377 $temp .= substr($word, $i, 1)."\s*";
378 }
379
380 // We only want to do this when it is followed by a non-word character
381 // That way valid stuff like "dealer to" does not become "dealerto"
382 $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
383 }
Barry Mienydd671972010-10-04 16:33:58 +0200384
Derek Jonese701d762010-03-02 18:17:01 -0600385 /*
386 * Remove disallowed Javascript in links or img tags
David Behler07b53422011-08-15 00:25:06 +0200387 * We used to do some version comparisons and use of stripos for PHP5,
388 * but it is dog slow compared to these simplified non-capturing
Pascal Krietec9c045a2011-04-05 14:50:41 -0400389 * preg_match(), especially if the pattern exists in the string
Derek Jonese701d762010-03-02 18:17:01 -0600390 */
391 do
392 {
393 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200394
Derek Jonese701d762010-03-02 18:17:01 -0600395 if (preg_match("/<a/i", $str))
396 {
397 $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
398 }
Barry Mienydd671972010-10-04 16:33:58 +0200399
Derek Jonese701d762010-03-02 18:17:01 -0600400 if (preg_match("/<img/i", $str))
401 {
402 $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
403 }
Barry Mienydd671972010-10-04 16:33:58 +0200404
Derek Jonese701d762010-03-02 18:17:01 -0600405 if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
406 {
407 $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
408 }
409 }
Pascal Krietec9c045a2011-04-05 14:50:41 -0400410 while($original != $str);
Derek Jonese701d762010-03-02 18:17:01 -0600411
412 unset($original);
413
Pascal Krietec9c045a2011-04-05 14:50:41 -0400414 // Remove evil attributes such as style, onclick and xmlns
415 $str = $this->_remove_evil_attributes($str, $is_image);
Barry Mienydd671972010-10-04 16:33:58 +0200416
Derek Jonese701d762010-03-02 18:17:01 -0600417 /*
418 * Sanitize naughty HTML elements
419 *
420 * If a tag containing any of the words in the list
421 * below is found, the tag gets converted to entities.
422 *
423 * So this: <blink>
424 * Becomes: &lt;blink&gt;
Derek Jonese701d762010-03-02 18:17:01 -0600425 */
426 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
427 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
428
429 /*
430 * Sanitize naughty scripting elements
431 *
432 * Similar to above, only instead of looking for
433 * tags it looks for PHP and JavaScript commands
Derek Jones37f4b9c2011-07-01 17:56:50 -0500434 * that are disallowed. Rather than removing the
Derek Jonese701d762010-03-02 18:17:01 -0600435 * code, it simply converts the parenthesis to entities
436 * rendering the code un-executable.
437 *
438 * For example: eval('some code')
439 * Becomes: eval&#40;'some code'&#41;
Derek Jonese701d762010-03-02 18:17:01 -0600440 */
441 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Barry Mienydd671972010-10-04 16:33:58 +0200442
Barry Mienydd671972010-10-04 16:33:58 +0200443
Pascal Krietec9c045a2011-04-05 14:50:41 -0400444 // Final clean up
445 // This adds a bit of extra precaution in case
446 // something got through the above filters
447 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600448
449 /*
Pascal Krietec9c045a2011-04-05 14:50:41 -0400450 * Images are Handled in a Special Way
David Behler07b53422011-08-15 00:25:06 +0200451 * - Essentially, we want to know that after all of the character
452 * conversion is done whether any unwanted, likely XSS, code was found.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400453 * If not, we return TRUE, as the image is clean.
David Behler07b53422011-08-15 00:25:06 +0200454 * However, if the string post-conversion does not matched the
455 * string post-removal of XSS, then it fails, as there was unwanted XSS
Pascal Krietec9c045a2011-04-05 14:50:41 -0400456 * code found and removed/changed during processing.
Derek Jonese701d762010-03-02 18:17:01 -0600457 */
458
459 if ($is_image === TRUE)
460 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400461 return ($str == $converted_string) ? TRUE: FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600462 }
Barry Mienydd671972010-10-04 16:33:58 +0200463
Derek Jonese701d762010-03-02 18:17:01 -0600464 log_message('debug', "XSS Filtering completed");
465 return $str;
466 }
467
468 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200469
Derek Jonese701d762010-03-02 18:17:01 -0600470 /**
471 * Random Hash for protecting URLs
472 *
Derek Jonese701d762010-03-02 18:17:01 -0600473 * @return string
474 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500475 public function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200476 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400477 if ($this->_xss_hash == '')
Derek Jonese701d762010-03-02 18:17:01 -0600478 {
Pascal Krietec38e3b62011-11-14 13:55:00 -0500479 mt_srand();
Pascal Krietec9c045a2011-04-05 14:50:41 -0400480 $this->_xss_hash = md5(time() + mt_rand(0, 1999999999));
Derek Jonese701d762010-03-02 18:17:01 -0600481 }
482
Pascal Krietec9c045a2011-04-05 14:50:41 -0400483 return $this->_xss_hash;
Derek Jonese701d762010-03-02 18:17:01 -0600484 }
485
486 // --------------------------------------------------------------------
487
488 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500489 * HTML Entities Decode
490 *
491 * This function is a replacement for html_entity_decode()
492 *
Pascal Krietec38e3b62011-11-14 13:55:00 -0500493 * The reason we are not using html_entity_decode() by itself is because
494 * while it is not technically correct to leave out the semicolon
495 * at the end of an entity most browsers will still interpret the entity
496 * correctly. html_entity_decode() does not convert entities without
497 * semicolons, so we are left with our own little solution here. Bummer.
Derek Jonesa0911472010-03-30 10:33:09 -0500498 *
Derek Jonesa0911472010-03-30 10:33:09 -0500499 * @param string
500 * @param string
501 * @return string
502 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500503 public function entity_decode($str, $charset='UTF-8')
Derek Jonesa0911472010-03-30 10:33:09 -0500504 {
Derek Jonesa0911472010-03-30 10:33:09 -0500505 if (stristr($str, '&') === FALSE)
506 {
Pascal Krietec38e3b62011-11-14 13:55:00 -0500507 return $str;
Derek Jonesa0911472010-03-30 10:33:09 -0500508 }
Barry Mienydd671972010-10-04 16:33:58 +0200509
Pascal Krietec38e3b62011-11-14 13:55:00 -0500510 $str = html_entity_decode($str, ENT_COMPAT, $charset);
511 $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
512 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
Derek Jonesa0911472010-03-30 10:33:09 -0500513 }
Barry Mienydd671972010-10-04 16:33:58 +0200514
Derek Jonesa0911472010-03-30 10:33:09 -0500515 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200516
Derek Jonesa0911472010-03-30 10:33:09 -0500517 /**
Derek Jonese701d762010-03-02 18:17:01 -0600518 * Filename Security
519 *
Derek Jonese701d762010-03-02 18:17:01 -0600520 * @param string
David Behler07b53422011-08-15 00:25:06 +0200521 * @param bool
Derek Jonese701d762010-03-02 18:17:01 -0600522 * @return string
523 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500524 public function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600525 {
526 $bad = array(
527 "../",
Derek Jonese701d762010-03-02 18:17:01 -0600528 "<!--",
529 "-->",
530 "<",
531 ">",
532 "'",
533 '"',
534 '&',
535 '$',
536 '#',
537 '{',
538 '}',
539 '[',
540 ']',
541 '=',
542 ';',
543 '?',
Derek Jonese701d762010-03-02 18:17:01 -0600544 "%20",
545 "%22",
546 "%3c", // <
Barry Mienydd671972010-10-04 16:33:58 +0200547 "%253c", // <
548 "%3e", // >
549 "%0e", // >
550 "%28", // (
551 "%29", // )
552 "%2528", // (
553 "%26", // &
554 "%24", // $
555 "%3f", // ?
556 "%3b", // ;
Derek Jonese701d762010-03-02 18:17:01 -0600557 "%3d" // =
558 );
David Behler07b53422011-08-15 00:25:06 +0200559
Derek Jones2ef37592010-10-06 17:51:59 -0500560 if ( ! $relative_path)
561 {
562 $bad[] = './';
563 $bad[] = '/';
564 }
Derek Jonese701d762010-03-02 18:17:01 -0600565
Pascal Krietec9c045a2011-04-05 14:50:41 -0400566 $str = remove_invisible_characters($str, FALSE);
Derek Jonese701d762010-03-02 18:17:01 -0600567 return stripslashes(str_replace($bad, '', $str));
568 }
569
Pascal Krietec9c045a2011-04-05 14:50:41 -0400570 // ----------------------------------------------------------------
571
572 /**
573 * Compact Exploded Words
574 *
575 * Callback function for xss_clean() to remove whitespace from
576 * things like j a v a s c r i p t
577 *
578 * @param type
579 * @return type
580 */
581 protected function _compact_exploded_words($matches)
582 {
583 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
584 }
585
586 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200587
Pascal Krietec9c045a2011-04-05 14:50:41 -0400588 /*
589 * Remove Evil HTML Attributes (like evenhandlers and style)
590 *
591 * It removes the evil attribute and either:
592 * - Everything up until a space
593 * For example, everything between the pipes:
594 * <a |style=document.write('hello');alert('world');| class=link>
David Behler07b53422011-08-15 00:25:06 +0200595 * - Everything inside the quotes
Pascal Krietec9c045a2011-04-05 14:50:41 -0400596 * For example, everything between the pipes:
597 * <a |style="document.write('hello'); alert('world');"| class="link">
598 *
599 * @param string $str The string to check
600 * @param boolean $is_image TRUE if this is an image
601 * @return string The string with the evil attributes removed
602 */
603 protected function _remove_evil_attributes($str, $is_image)
604 {
605 // All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
Pascal Krietec38e3b62011-11-14 13:55:00 -0500606 $evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction');
Pascal Krietec9c045a2011-04-05 14:50:41 -0400607
608 if ($is_image === TRUE)
609 {
610 /*
Pascal Krietec38e3b62011-11-14 13:55:00 -0500611 * Adobe Photoshop puts XML metadata into JFIF images,
Pascal Krietec9c045a2011-04-05 14:50:41 -0400612 * including namespacing, so we have to allow this for images.
613 */
614 unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
615 }
Pascal Krietec38e3b62011-11-14 13:55:00 -0500616
Pascal Krietec9c045a2011-04-05 14:50:41 -0400617 do {
Pascal Krietec38e3b62011-11-14 13:55:00 -0500618 $count = 0;
619 $attribs = array();
620
621 // find occurrences of illegal attribute strings without quotes
622 preg_match_all("/(".implode('|', $evil_attributes).")\s*=\s*([^\s]*)/is", $str, $matches, PREG_SET_ORDER);
623
624 foreach ($matches as $attr)
625 {
626 $attribs[] = preg_quote($attr[0], '/');
627 }
628
629 // find occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes)
630 preg_match_all("/(".implode('|', $evil_attributes).")\s*=\s*(\042|\047)([^\\2]*?)(\\2)/is", $str, $matches, PREG_SET_ORDER);
David Behler07b53422011-08-15 00:25:06 +0200631
Pascal Krietec38e3b62011-11-14 13:55:00 -0500632 foreach ($matches as $attr)
633 {
634 $attribs[] = preg_quote($attr[0], '/');
635 }
636
637 // replace illegal attribute strings that are inside an html tag
638 if (count($attribs) > 0)
639 {
640 $str = preg_replace("/<(\/?[^><]+?)([^A-Za-z\-])(".implode('|', $attribs).")([\s><])([><]*)/i", '<$1$2$4$5', $str, -1, $count);
641 }
642
643 } while ($count);
644
Pascal Krietec9c045a2011-04-05 14:50:41 -0400645 return $str;
646 }
David Behler07b53422011-08-15 00:25:06 +0200647
Pascal Krietec9c045a2011-04-05 14:50:41 -0400648 // --------------------------------------------------------------------
649
650 /**
651 * Sanitize Naughty HTML
652 *
653 * Callback function for xss_clean() to remove naughty HTML elements
654 *
655 * @param array
656 * @return string
657 */
658 protected function _sanitize_naughty_html($matches)
659 {
660 // encode opening brace
661 $str = '&lt;'.$matches[1].$matches[2].$matches[3];
662
663 // encode captured opening or closing brace to prevent recursive vectors
David Behler07b53422011-08-15 00:25:06 +0200664 $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'),
Pascal Krietec9c045a2011-04-05 14:50:41 -0400665 $matches[4]);
666
667 return $str;
668 }
669
670 // --------------------------------------------------------------------
671
672 /**
673 * JS Link Removal
674 *
675 * Callback function for xss_clean() to sanitize links
676 * This limits the PCRE backtracks, making it more performance friendly
677 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
678 * PHP 5.2+ on link-heavy strings
679 *
680 * @param array
681 * @return string
682 */
683 protected function _js_link_removal($match)
684 {
685 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
David Behler07b53422011-08-15 00:25:06 +0200686
Pascal Krietec9c045a2011-04-05 14:50:41 -0400687 return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
688 }
689
690 // --------------------------------------------------------------------
691
692 /**
693 * JS Image Removal
694 *
695 * Callback function for xss_clean() to sanitize image tags
696 * This limits the PCRE backtracks, making it more performance friendly
697 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
698 * PHP 5.2+ on image tag heavy strings
699 *
700 * @param array
701 * @return string
702 */
703 protected function _js_img_removal($match)
704 {
705 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
David Behler07b53422011-08-15 00:25:06 +0200706
Pascal Krietec9c045a2011-04-05 14:50:41 -0400707 return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
708 }
709
710 // --------------------------------------------------------------------
711
712 /**
713 * Attribute Conversion
714 *
715 * Used as a callback for XSS Clean
716 *
717 * @param array
718 * @return string
719 */
720 protected function _convert_attribute($match)
721 {
722 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
723 }
724
725 // --------------------------------------------------------------------
726
727 /**
728 * Filter Attributes
729 *
730 * Filters tag attributes for consistency and safety
731 *
732 * @param string
733 * @return string
734 */
735 protected function _filter_attributes($str)
736 {
737 $out = '';
738
739 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
740 {
741 foreach ($matches[0] as $match)
742 {
743 $out .= preg_replace("#/\*.*?\*/#s", '', $match);
744 }
745 }
746
747 return $out;
748 }
749
750 // --------------------------------------------------------------------
751
752 /**
753 * HTML Entity Decode Callback
754 *
755 * Used as a callback for XSS Clean
756 *
757 * @param array
758 * @return string
759 */
760 protected function _decode_entity($match)
761 {
762 return $this->entity_decode($match[0], strtoupper(config_item('charset')));
763 }
764
765 // --------------------------------------------------------------------
David Behler07b53422011-08-15 00:25:06 +0200766
Pascal Krietec9c045a2011-04-05 14:50:41 -0400767 /**
768 * Validate URL entities
769 *
770 * Called by xss_clean()
771 *
David Behler07b53422011-08-15 00:25:06 +0200772 * @param string
Pascal Krietec9c045a2011-04-05 14:50:41 -0400773 * @return string
774 */
775 protected function _validate_entities($str)
776 {
777 /*
778 * Protect GET variables in URLs
779 */
David Behler07b53422011-08-15 00:25:06 +0200780
Pascal Krietec9c045a2011-04-05 14:50:41 -0400781 // 901119URL5918AMP18930PROTECT8198
David Behler07b53422011-08-15 00:25:06 +0200782
Pascal Krietec9c045a2011-04-05 14:50:41 -0400783 $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
784
785 /*
786 * Validate standard character entities
787 *
Derek Jones37f4b9c2011-07-01 17:56:50 -0500788 * Add a semicolon if missing. We do this to enable
Pascal Krietec9c045a2011-04-05 14:50:41 -0400789 * the conversion of entities to ASCII later.
790 *
791 */
792 $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
793
794 /*
795 * Validate UTF16 two byte encoding (x00)
796 *
797 * Just as above, adds a semicolon if missing.
798 *
799 */
800 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
801
802 /*
803 * Un-Protect GET variables in URLs
804 */
805 $str = str_replace($this->xss_hash(), '&', $str);
David Behler07b53422011-08-15 00:25:06 +0200806
Pascal Krietec9c045a2011-04-05 14:50:41 -0400807 return $str;
808 }
809
810 // ----------------------------------------------------------------------
811
812 /**
813 * Do Never Allowed
814 *
815 * A utility function for xss_clean()
816 *
817 * @param string
818 * @return string
819 */
820 protected function _do_never_allowed($str)
821 {
822 foreach ($this->_never_allowed_str as $key => $val)
823 {
824 $str = str_replace($key, $val, $str);
825 }
826
827 foreach ($this->_never_allowed_regex as $key => $val)
828 {
829 $str = preg_replace("#".$key."#i", $val, $str);
830 }
David Behler07b53422011-08-15 00:25:06 +0200831
Pascal Krietec9c045a2011-04-05 14:50:41 -0400832 return $str;
833 }
834
835 // --------------------------------------------------------------------
836
837 /**
838 * Set Cross Site Request Forgery Protection Cookie
839 *
840 * @return string
841 */
842 protected function _csrf_set_hash()
843 {
844 if ($this->_csrf_hash == '')
845 {
David Behler07b53422011-08-15 00:25:06 +0200846 // If the cookie exists we will use it's value.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400847 // We don't necessarily want to regenerate it with
David Behler07b53422011-08-15 00:25:06 +0200848 // each page load since a page could contain embedded
Pascal Krietec9c045a2011-04-05 14:50:41 -0400849 // sub-pages causing this feature to fail
David Behler07b53422011-08-15 00:25:06 +0200850 if (isset($_COOKIE[$this->_csrf_cookie_name]) &&
Pascal Krietec9c045a2011-04-05 14:50:41 -0400851 $_COOKIE[$this->_csrf_cookie_name] != '')
852 {
853 return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
854 }
David Behler07b53422011-08-15 00:25:06 +0200855
Pascal Krietec9c045a2011-04-05 14:50:41 -0400856 return $this->_csrf_hash = md5(uniqid(rand(), TRUE));
857 }
858
859 return $this->_csrf_hash;
860 }
861
Derek Jonese701d762010-03-02 18:17:01 -0600862}
863// END Security Class
864
865/* End of file Security.php */
patworkef1a55a2011-04-09 13:04:06 +0200866/* Location: ./system/libraries/Security.php */