blob: 73a3cfb31b746636ca1193ff14e01b960e3c1661 [file] [log] [blame]
Derek Jonese701d762010-03-02 18:17:01 -06001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * CodeIgniter
4 *
Greg Aker741de1c2010-11-10 14:52:57 -06005 * An open source application development framework for PHP 5.1.6 or newer
Derek Jonese701d762010-03-02 18:17:01 -06006 *
7 * @package CodeIgniter
8 * @author ExpressionEngine Dev Team
Greg Aker0711dc82011-01-05 10:49:40 -06009 * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc.
Derek Jonese701d762010-03-02 18:17:01 -060010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Security Class
20 *
21 * @package CodeIgniter
22 * @subpackage Libraries
23 * @category Security
24 * @author ExpressionEngine Dev Team
Pascal Krietec9c045a2011-04-05 14:50:41 -040025 * @link http://codeigniter.com/user_guide/libraries/security.html
Derek Jonese701d762010-03-02 18:17:01 -060026 */
27class CI_Security {
Pascal Krietec9c045a2011-04-05 14:50:41 -040028
29 protected $_xss_hash = '';
30 protected $_csrf_hash = '';
31 protected $_csrf_expire = 7200; // Two hours (in seconds)
32 protected $_csrf_token_name = 'ci_csrf_token';
33 protected $_csrf_cookie_name = 'ci_csrf_token';
Barry Mienydd671972010-10-04 16:33:58 +020034
Derek Jonese701d762010-03-02 18:17:01 -060035 /* never allowed, string replacement */
Pascal Krietec9c045a2011-04-05 14:50:41 -040036 protected $_never_allowed_str = array(
37 'document.cookie' => '[removed]',
38 'document.write' => '[removed]',
39 '.parentNode' => '[removed]',
40 '.innerHTML' => '[removed]',
41 'window.location' => '[removed]',
42 '-moz-binding' => '[removed]',
43 '<!--' => '&lt;!--',
44 '-->' => '--&gt;',
45 '<![CDATA[' => '&lt;![CDATA['
46 );
Derek Jonese701d762010-03-02 18:17:01 -060047
Pascal Krietec9c045a2011-04-05 14:50:41 -040048 /* never allowed, regex replacement */
49 protected $_never_allowed_regex = array(
50 "javascript\s*:" => '[removed]',
51 "expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
52 "vbscript\s*:" => '[removed]', // IE, surprise!
53 "Redirect\s+302" => '[removed]'
54 );
55
56 /**
57 * Constructor
58 */
Greg Akera9263282010-11-10 15:26:43 -060059 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -060060 {
patworkef1a55a2011-04-09 13:04:06 +020061 // CSRF config
62 foreach(array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
63 {
64 if (FALSE !== ($val = config_item($key)))
65 {
66 $this->{'_'.$key} = $val;
67 }
68 }
69
Derek Jonesb3f10a22010-07-25 19:11:26 -050070 // Append application specific cookie prefix to token name
Pascal Krietec9c045a2011-04-05 14:50:41 -040071 $this->_csrf_cookie_name = (config_item('cookie_prefix')) ? config_item('cookie_prefix').$this->_csrf_token_name : $this->_csrf_token_name;
Derek Jonesb3f10a22010-07-25 19:11:26 -050072
Derek Jonese701d762010-03-02 18:17:01 -060073 // Set the CSRF hash
74 $this->_csrf_set_hash();
Derek Allard958543a2010-07-22 14:10:26 -040075
Derek Jonese701d762010-03-02 18:17:01 -060076 log_message('debug', "Security Class Initialized");
77 }
78
79 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020080
Derek Jonese701d762010-03-02 18:17:01 -060081 /**
82 * Verify Cross Site Request Forgery Protection
83 *
Pascal Krietec9c045a2011-04-05 14:50:41 -040084 * @return object
Derek Jonese701d762010-03-02 18:17:01 -060085 */
Eric Barnes9805ecc2011-01-16 23:35:16 -050086 public function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -040087 {
Derek Jonese701d762010-03-02 18:17:01 -060088 // If no POST data exists we will set the CSRF cookie
89 if (count($_POST) == 0)
90 {
91 return $this->csrf_set_cookie();
92 }
93
94 // Do the tokens exist in both the _POST and _COOKIE arrays?
Pascal Krietec9c045a2011-04-05 14:50:41 -040095 if ( ! isset($_POST[$this->_csrf_token_name]) OR
96 ! isset($_COOKIE[$this->_csrf_cookie_name]))
Derek Jonese701d762010-03-02 18:17:01 -060097 {
98 $this->csrf_show_error();
99 }
100
101 // Do the tokens match?
Pascal Krietec9c045a2011-04-05 14:50:41 -0400102 if ($_POST[$this->_csrf_token_name] != $_COOKIE[$this->_csrf_cookie_name])
Derek Jonese701d762010-03-02 18:17:01 -0600103 {
104 $this->csrf_show_error();
105 }
106
Pascal Krietec9c045a2011-04-05 14:50:41 -0400107 // We kill this since we're done and we don't want to
108 // polute the _POST array
109 unset($_POST[$this->_csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +0200110
Derek Jonesb3f10a22010-07-25 19:11:26 -0500111 // Nothing should last forever
Pascal Krietec9c045a2011-04-05 14:50:41 -0400112 unset($_COOKIE[$this->_csrf_cookie_name]);
Derek Jonesb3f10a22010-07-25 19:11:26 -0500113 $this->_csrf_set_hash();
114 $this->csrf_set_cookie();
Derek Jonese701d762010-03-02 18:17:01 -0600115
116 log_message('debug', "CSRF token verified ");
Pascal Krietec9c045a2011-04-05 14:50:41 -0400117
118 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600119 }
Barry Mienydd671972010-10-04 16:33:58 +0200120
Derek Jonese701d762010-03-02 18:17:01 -0600121 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200122
Derek Jonese701d762010-03-02 18:17:01 -0600123 /**
124 * Set Cross Site Request Forgery Protection Cookie
125 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400126 * @return object
Derek Jonese701d762010-03-02 18:17:01 -0600127 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500128 public function csrf_set_cookie()
Derek Jonese701d762010-03-02 18:17:01 -0600129 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400130 $expire = time() + $this->_csrf_expire;
Robin Sowell154da112011-02-11 15:33:44 -0500131 $secure_cookie = (config_item('cookie_secure') === TRUE) ? 1 : 0;
Derek Jonese701d762010-03-02 18:17:01 -0600132
Pascal Krietec9c045a2011-04-05 14:50:41 -0400133 if ($secure_cookie)
Derek Jonese701d762010-03-02 18:17:01 -0600134 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400135 $req = isset($_SERVER['HTTPS']) ? $_SERVER['HTTPS'] : FALSE;
136
137 if ( ! $req OR $req == 'off')
Derek Jonese701d762010-03-02 18:17:01 -0600138 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400139 return FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600140 }
141 }
Derek Allard958543a2010-07-22 14:10:26 -0400142
Pascal Krietec9c045a2011-04-05 14:50:41 -0400143 setcookie($this->_csrf_cookie_name, $this->_csrf_hash, $expire, config_item('cookie_path'), config_item('cookie_domain'), $secure_cookie);
144
145 log_message('debug', "CRSF cookie Set");
146
147 return $this;
Derek Jonese701d762010-03-02 18:17:01 -0600148 }
149
150 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200151
Derek Jonese701d762010-03-02 18:17:01 -0600152 /**
153 * Show CSRF Error
154 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400155 * @return void
Derek Jonese701d762010-03-02 18:17:01 -0600156 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500157 public function csrf_show_error()
Derek Jonese701d762010-03-02 18:17:01 -0600158 {
159 show_error('The action you have requested is not allowed.');
160 }
161
162 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200163
Derek Jonese701d762010-03-02 18:17:01 -0600164 /**
Pascal Krietec9c045a2011-04-05 14:50:41 -0400165 * Get CSRF Hash
166 *
167 * Getter Method
168 *
169 * @return string self::_csrf_hash
170 */
171 public function get_csrf_hash()
172 {
173 return $this->_csrf_hash;
174 }
175
176 // --------------------------------------------------------------------
177
178 /**
179 * Get CSRF Token Name
180 *
181 * Getter Method
182 *
183 * @return string self::csrf_token_name
184 */
185 public function get_csrf_token_name()
186 {
187 return $this->_csrf_token_name;
188 }
189
190 // --------------------------------------------------------------------
191
192 /**
Derek Jonese701d762010-03-02 18:17:01 -0600193 * XSS Clean
194 *
195 * Sanitizes data so that Cross Site Scripting Hacks can be
196 * prevented. This function does a fair amount of work but
197 * it is extremely thorough, designed to prevent even the
198 * most obscure XSS attempts. Nothing is ever 100% foolproof,
199 * of course, but I haven't been able to get anything passed
200 * the filter.
201 *
202 * Note: This function should only be used to deal with data
203 * upon submission. It's not something that should
204 * be used for general runtime processing.
205 *
206 * This function was based in part on some code and ideas I
207 * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
208 *
209 * To help develop this script I used this great list of
210 * vulnerabilities along with a few other hacks I've
211 * harvested from examining vulnerabilities in other programs:
212 * http://ha.ckers.org/xss.html
213 *
Derek Jonese701d762010-03-02 18:17:01 -0600214 * @param mixed string or array
215 * @return string
216 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500217 public function xss_clean($str, $is_image = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600218 {
219 /*
220 * Is the string an array?
221 *
222 */
223 if (is_array($str))
224 {
225 while (list($key) = each($str))
226 {
227 $str[$key] = $this->xss_clean($str[$key]);
228 }
Barry Mienydd671972010-10-04 16:33:58 +0200229
Derek Jonese701d762010-03-02 18:17:01 -0600230 return $str;
231 }
232
233 /*
234 * Remove Invisible Characters
235 */
Greg Aker757dda62010-04-14 19:06:19 -0500236 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600237
Pascal Krietec9c045a2011-04-05 14:50:41 -0400238 // Validate Entities in URLs
239 $str = $this->_validate_entities($str);
Derek Jonese701d762010-03-02 18:17:01 -0600240
241 /*
242 * URL Decode
243 *
244 * Just in case stuff like this is submitted:
245 *
246 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
247 *
248 * Note: Use rawurldecode() so it does not remove plus signs
249 *
250 */
251 $str = rawurldecode($str);
Barry Mienydd671972010-10-04 16:33:58 +0200252
Derek Jonese701d762010-03-02 18:17:01 -0600253 /*
Barry Mienydd671972010-10-04 16:33:58 +0200254 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600255 *
256 * This permits our tests below to work reliably.
257 * We only convert entities that are within tags since
258 * these are the ones that will pose security problems.
259 *
260 */
261
262 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
Pascal Krietec9c045a2011-04-05 14:50:41 -0400263
Derek Jonese701d762010-03-02 18:17:01 -0600264 $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
265
266 /*
267 * Remove Invisible Characters Again!
268 */
Greg Aker757dda62010-04-14 19:06:19 -0500269 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200270
Derek Jonese701d762010-03-02 18:17:01 -0600271 /*
272 * Convert all tabs to spaces
273 *
274 * This prevents strings like this: ja vascript
275 * NOTE: we deal with spaces between characters later.
Pascal Krietec9c045a2011-04-05 14:50:41 -0400276 * NOTE: preg_replace was found to be amazingly slow here on
277 * large blocks of data, so we use str_replace.
Derek Jonese701d762010-03-02 18:17:01 -0600278 */
Barry Mienydd671972010-10-04 16:33:58 +0200279
Derek Jonese701d762010-03-02 18:17:01 -0600280 if (strpos($str, "\t") !== FALSE)
281 {
282 $str = str_replace("\t", ' ', $str);
283 }
Barry Mienydd671972010-10-04 16:33:58 +0200284
Derek Jonese701d762010-03-02 18:17:01 -0600285 /*
286 * Capture converted string for later comparison
287 */
288 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200289
Pascal Krietec9c045a2011-04-05 14:50:41 -0400290 // Remove Strings that are never allowed
291 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600292
293 /*
294 * Makes PHP tags safe
295 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400296 * Note: XML tags are inadvertently replaced too:
Derek Jonese701d762010-03-02 18:17:01 -0600297 *
Pascal Krietec9c045a2011-04-05 14:50:41 -0400298 * <?xml
Derek Jonese701d762010-03-02 18:17:01 -0600299 *
300 * But it doesn't seem to pose a problem.
Derek Jonese701d762010-03-02 18:17:01 -0600301 */
302 if ($is_image === TRUE)
303 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400304 // Images have a tendency to have the PHP short opening and
305 // closing tags every so often so we skip those and only
306 // do the long opening tags.
Derek Jonese701d762010-03-02 18:17:01 -0600307 $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
308 }
309 else
310 {
311 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
312 }
Barry Mienydd671972010-10-04 16:33:58 +0200313
Derek Jonese701d762010-03-02 18:17:01 -0600314 /*
315 * Compact any exploded words
316 *
317 * This corrects words like: j a v a s c r i p t
318 * These words are compacted back to their correct state.
Derek Jonese701d762010-03-02 18:17:01 -0600319 */
Pascal Krietec9c045a2011-04-05 14:50:41 -0400320 $words = array(
321 'javascript', 'expression', 'vbscript', 'script',
322 'applet', 'alert', 'document', 'write', 'cookie', 'window'
323 );
324
Derek Jonese701d762010-03-02 18:17:01 -0600325 foreach ($words as $word)
326 {
327 $temp = '';
Barry Mienydd671972010-10-04 16:33:58 +0200328
Derek Jonese701d762010-03-02 18:17:01 -0600329 for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
330 {
331 $temp .= substr($word, $i, 1)."\s*";
332 }
333
334 // We only want to do this when it is followed by a non-word character
335 // That way valid stuff like "dealer to" does not become "dealerto"
336 $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
337 }
Barry Mienydd671972010-10-04 16:33:58 +0200338
Derek Jonese701d762010-03-02 18:17:01 -0600339 /*
340 * Remove disallowed Javascript in links or img tags
Pascal Krietec9c045a2011-04-05 14:50:41 -0400341 * We used to do some version comparisons and use of stripos for PHP5,
342 * but it is dog slow compared to these simplified non-capturing
343 * preg_match(), especially if the pattern exists in the string
Derek Jonese701d762010-03-02 18:17:01 -0600344 */
345 do
346 {
347 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200348
Derek Jonese701d762010-03-02 18:17:01 -0600349 if (preg_match("/<a/i", $str))
350 {
351 $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
352 }
Barry Mienydd671972010-10-04 16:33:58 +0200353
Derek Jonese701d762010-03-02 18:17:01 -0600354 if (preg_match("/<img/i", $str))
355 {
356 $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
357 }
Barry Mienydd671972010-10-04 16:33:58 +0200358
Derek Jonese701d762010-03-02 18:17:01 -0600359 if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
360 {
361 $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
362 }
363 }
Pascal Krietec9c045a2011-04-05 14:50:41 -0400364 while($original != $str);
Derek Jonese701d762010-03-02 18:17:01 -0600365
366 unset($original);
367
Pascal Krietec9c045a2011-04-05 14:50:41 -0400368 // Remove evil attributes such as style, onclick and xmlns
369 $str = $this->_remove_evil_attributes($str, $is_image);
Barry Mienydd671972010-10-04 16:33:58 +0200370
Derek Jonese701d762010-03-02 18:17:01 -0600371 /*
372 * Sanitize naughty HTML elements
373 *
374 * If a tag containing any of the words in the list
375 * below is found, the tag gets converted to entities.
376 *
377 * So this: <blink>
378 * Becomes: &lt;blink&gt;
Derek Jonese701d762010-03-02 18:17:01 -0600379 */
380 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
381 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
382
383 /*
384 * Sanitize naughty scripting elements
385 *
386 * Similar to above, only instead of looking for
387 * tags it looks for PHP and JavaScript commands
388 * that are disallowed. Rather than removing the
389 * code, it simply converts the parenthesis to entities
390 * rendering the code un-executable.
391 *
392 * For example: eval('some code')
393 * Becomes: eval&#40;'some code'&#41;
Derek Jonese701d762010-03-02 18:17:01 -0600394 */
395 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Barry Mienydd671972010-10-04 16:33:58 +0200396
Barry Mienydd671972010-10-04 16:33:58 +0200397
Pascal Krietec9c045a2011-04-05 14:50:41 -0400398 // Final clean up
399 // This adds a bit of extra precaution in case
400 // something got through the above filters
401 $str = $this->_do_never_allowed($str);
Derek Jonese701d762010-03-02 18:17:01 -0600402
403 /*
Pascal Krietec9c045a2011-04-05 14:50:41 -0400404 * Images are Handled in a Special Way
405 * - Essentially, we want to know that after all of the character
406 * conversion is done whether any unwanted, likely XSS, code was found.
407 * If not, we return TRUE, as the image is clean.
408 * However, if the string post-conversion does not matched the
409 * string post-removal of XSS, then it fails, as there was unwanted XSS
410 * code found and removed/changed during processing.
Derek Jonese701d762010-03-02 18:17:01 -0600411 */
412
413 if ($is_image === TRUE)
414 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400415 return ($str == $converted_string) ? TRUE: FALSE;
Derek Jonese701d762010-03-02 18:17:01 -0600416 }
Barry Mienydd671972010-10-04 16:33:58 +0200417
Derek Jonese701d762010-03-02 18:17:01 -0600418 log_message('debug', "XSS Filtering completed");
419 return $str;
420 }
421
422 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200423
Derek Jonese701d762010-03-02 18:17:01 -0600424 /**
425 * Random Hash for protecting URLs
426 *
Derek Jonese701d762010-03-02 18:17:01 -0600427 * @return string
428 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500429 public function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200430 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400431 if ($this->_xss_hash == '')
Derek Jonese701d762010-03-02 18:17:01 -0600432 {
433 if (phpversion() >= 4.2)
Derek Jonese701d762010-03-02 18:17:01 -0600434 {
Pascal Krietec9c045a2011-04-05 14:50:41 -0400435 mt_srand();
Derek Jonese701d762010-03-02 18:17:01 -0600436 }
Pascal Krietec9c045a2011-04-05 14:50:41 -0400437 else
438 {
439 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
440 }
441
442 $this->_xss_hash = md5(time() + mt_rand(0, 1999999999));
Derek Jonese701d762010-03-02 18:17:01 -0600443 }
444
Pascal Krietec9c045a2011-04-05 14:50:41 -0400445 return $this->_xss_hash;
Derek Jonese701d762010-03-02 18:17:01 -0600446 }
447
448 // --------------------------------------------------------------------
449
450 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500451 * HTML Entities Decode
452 *
453 * This function is a replacement for html_entity_decode()
454 *
455 * In some versions of PHP the native function does not work
456 * when UTF-8 is the specified character set, so this gives us
457 * a work-around. More info here:
458 * http://bugs.php.net/bug.php?id=25670
459 *
460 * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
461 * character set, and the PHP developers said they were not back porting the
462 * fix to versions other than PHP 5.x.
463 *
Derek Jonesa0911472010-03-30 10:33:09 -0500464 * @param string
465 * @param string
466 * @return string
467 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500468 public function entity_decode($str, $charset='UTF-8')
Derek Jonesa0911472010-03-30 10:33:09 -0500469 {
470 if (stristr($str, '&') === FALSE) return $str;
Barry Mienydd671972010-10-04 16:33:58 +0200471
Derek Jonesa0911472010-03-30 10:33:09 -0500472 // The reason we are not using html_entity_decode() by itself is because
473 // while it is not technically correct to leave out the semicolon
474 // at the end of an entity most browsers will still interpret the entity
475 // correctly. html_entity_decode() does not convert entities without
476 // semicolons, so we are left with our own little solution here. Bummer.
Barry Mienydd671972010-10-04 16:33:58 +0200477
Pascal Krietec9c045a2011-04-05 14:50:41 -0400478 if (function_exists('html_entity_decode') &&
479 (strtolower($charset) != 'utf-8'))
Derek Jonesa0911472010-03-30 10:33:09 -0500480 {
481 $str = html_entity_decode($str, ENT_COMPAT, $charset);
482 $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
483 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
484 }
Barry Mienydd671972010-10-04 16:33:58 +0200485
Derek Jonesa0911472010-03-30 10:33:09 -0500486 // Numeric Entities
487 $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
488 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
Barry Mienydd671972010-10-04 16:33:58 +0200489
Derek Jonesa0911472010-03-30 10:33:09 -0500490 // Literal Entities - Slightly slow so we do another check
491 if (stristr($str, '&') === FALSE)
492 {
493 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
494 }
Barry Mienydd671972010-10-04 16:33:58 +0200495
Derek Jonesa0911472010-03-30 10:33:09 -0500496 return $str;
497 }
Barry Mienydd671972010-10-04 16:33:58 +0200498
Derek Jonesa0911472010-03-30 10:33:09 -0500499 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200500
Derek Jonesa0911472010-03-30 10:33:09 -0500501 /**
Derek Jonese701d762010-03-02 18:17:01 -0600502 * Filename Security
503 *
Derek Jonese701d762010-03-02 18:17:01 -0600504 * @param string
505 * @return string
506 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500507 public function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600508 {
509 $bad = array(
510 "../",
Derek Jonese701d762010-03-02 18:17:01 -0600511 "<!--",
512 "-->",
513 "<",
514 ">",
515 "'",
516 '"',
517 '&',
518 '$',
519 '#',
520 '{',
521 '}',
522 '[',
523 ']',
524 '=',
525 ';',
526 '?',
Derek Jonese701d762010-03-02 18:17:01 -0600527 "%20",
528 "%22",
529 "%3c", // <
Barry Mienydd671972010-10-04 16:33:58 +0200530 "%253c", // <
531 "%3e", // >
532 "%0e", // >
533 "%28", // (
534 "%29", // )
535 "%2528", // (
536 "%26", // &
537 "%24", // $
538 "%3f", // ?
539 "%3b", // ;
Derek Jonese701d762010-03-02 18:17:01 -0600540 "%3d" // =
541 );
Pascal Krietec9c045a2011-04-05 14:50:41 -0400542
Derek Jones2ef37592010-10-06 17:51:59 -0500543 if ( ! $relative_path)
544 {
545 $bad[] = './';
546 $bad[] = '/';
547 }
Derek Jonese701d762010-03-02 18:17:01 -0600548
Pascal Krietec9c045a2011-04-05 14:50:41 -0400549 $str = remove_invisible_characters($str, FALSE);
Derek Jonese701d762010-03-02 18:17:01 -0600550 return stripslashes(str_replace($bad, '', $str));
551 }
552
Pascal Krietec9c045a2011-04-05 14:50:41 -0400553 // ----------------------------------------------------------------
554
555 /**
556 * Compact Exploded Words
557 *
558 * Callback function for xss_clean() to remove whitespace from
559 * things like j a v a s c r i p t
560 *
561 * @param type
562 * @return type
563 */
564 protected function _compact_exploded_words($matches)
565 {
566 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
567 }
568
569 // --------------------------------------------------------------------
570
571 /*
572 * Remove Evil HTML Attributes (like evenhandlers and style)
573 *
574 * It removes the evil attribute and either:
575 * - Everything up until a space
576 * For example, everything between the pipes:
577 * <a |style=document.write('hello');alert('world');| class=link>
578 * - Everything inside the quotes
579 * For example, everything between the pipes:
580 * <a |style="document.write('hello'); alert('world');"| class="link">
581 *
582 * @param string $str The string to check
583 * @param boolean $is_image TRUE if this is an image
584 * @return string The string with the evil attributes removed
585 */
586 protected function _remove_evil_attributes($str, $is_image)
587 {
588 // All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
589 $evil_attributes = array('on\w*', 'style', 'xmlns');
590
591 if ($is_image === TRUE)
592 {
593 /*
594 * Adobe Photoshop puts XML metadata into JFIF images,
595 * including namespacing, so we have to allow this for images.
596 */
597 unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
598 }
599
600 do {
601 $str = preg_replace(
602 "#<(/?[^><]+?)([^A-Za-z\-])(".implode('|', $evil_attributes).")(\s*=\s*)([\"][^>]*?[\"]|[\'][^>]*?[\']|[^>]*?)([\s><])([><]*)#i",
603 "<$1$6",
604 $str, -1, $count
605 );
606 } while ($count);
607
608 return $str;
609 }
610
611 // --------------------------------------------------------------------
612
613 /**
614 * Sanitize Naughty HTML
615 *
616 * Callback function for xss_clean() to remove naughty HTML elements
617 *
618 * @param array
619 * @return string
620 */
621 protected function _sanitize_naughty_html($matches)
622 {
623 // encode opening brace
624 $str = '&lt;'.$matches[1].$matches[2].$matches[3];
625
626 // encode captured opening or closing brace to prevent recursive vectors
627 $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'),
628 $matches[4]);
629
630 return $str;
631 }
632
633 // --------------------------------------------------------------------
634
635 /**
636 * JS Link Removal
637 *
638 * Callback function for xss_clean() to sanitize links
639 * This limits the PCRE backtracks, making it more performance friendly
640 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
641 * PHP 5.2+ on link-heavy strings
642 *
643 * @param array
644 * @return string
645 */
646 protected function _js_link_removal($match)
647 {
648 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
649
650 return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
651 }
652
653 // --------------------------------------------------------------------
654
655 /**
656 * JS Image Removal
657 *
658 * Callback function for xss_clean() to sanitize image tags
659 * This limits the PCRE backtracks, making it more performance friendly
660 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
661 * PHP 5.2+ on image tag heavy strings
662 *
663 * @param array
664 * @return string
665 */
666 protected function _js_img_removal($match)
667 {
668 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
669
670 return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
671 }
672
673 // --------------------------------------------------------------------
674
675 /**
676 * Attribute Conversion
677 *
678 * Used as a callback for XSS Clean
679 *
680 * @param array
681 * @return string
682 */
683 protected function _convert_attribute($match)
684 {
685 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
686 }
687
688 // --------------------------------------------------------------------
689
690 /**
691 * Filter Attributes
692 *
693 * Filters tag attributes for consistency and safety
694 *
695 * @param string
696 * @return string
697 */
698 protected function _filter_attributes($str)
699 {
700 $out = '';
701
702 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
703 {
704 foreach ($matches[0] as $match)
705 {
706 $out .= preg_replace("#/\*.*?\*/#s", '', $match);
707 }
708 }
709
710 return $out;
711 }
712
713 // --------------------------------------------------------------------
714
715 /**
716 * HTML Entity Decode Callback
717 *
718 * Used as a callback for XSS Clean
719 *
720 * @param array
721 * @return string
722 */
723 protected function _decode_entity($match)
724 {
725 return $this->entity_decode($match[0], strtoupper(config_item('charset')));
726 }
727
728 // --------------------------------------------------------------------
729
730 /**
731 * Validate URL entities
732 *
733 * Called by xss_clean()
734 *
735 * @param string
736 * @return string
737 */
738 protected function _validate_entities($str)
739 {
740 /*
741 * Protect GET variables in URLs
742 */
743
744 // 901119URL5918AMP18930PROTECT8198
745
746 $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
747
748 /*
749 * Validate standard character entities
750 *
751 * Add a semicolon if missing. We do this to enable
752 * the conversion of entities to ASCII later.
753 *
754 */
755 $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
756
757 /*
758 * Validate UTF16 two byte encoding (x00)
759 *
760 * Just as above, adds a semicolon if missing.
761 *
762 */
763 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
764
765 /*
766 * Un-Protect GET variables in URLs
767 */
768 $str = str_replace($this->xss_hash(), '&', $str);
769
770 return $str;
771 }
772
773 // ----------------------------------------------------------------------
774
775 /**
776 * Do Never Allowed
777 *
778 * A utility function for xss_clean()
779 *
780 * @param string
781 * @return string
782 */
783 protected function _do_never_allowed($str)
784 {
785 foreach ($this->_never_allowed_str as $key => $val)
786 {
787 $str = str_replace($key, $val, $str);
788 }
789
790 foreach ($this->_never_allowed_regex as $key => $val)
791 {
792 $str = preg_replace("#".$key."#i", $val, $str);
793 }
794
795 return $str;
796 }
797
798 // --------------------------------------------------------------------
799
800 /**
801 * Set Cross Site Request Forgery Protection Cookie
802 *
803 * @return string
804 */
805 protected function _csrf_set_hash()
806 {
807 if ($this->_csrf_hash == '')
808 {
809 // If the cookie exists we will use it's value.
810 // We don't necessarily want to regenerate it with
811 // each page load since a page could contain embedded
812 // sub-pages causing this feature to fail
813 if (isset($_COOKIE[$this->_csrf_cookie_name]) &&
814 $_COOKIE[$this->_csrf_cookie_name] != '')
815 {
816 return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
817 }
818
819 return $this->_csrf_hash = md5(uniqid(rand(), TRUE));
820 }
821
822 return $this->_csrf_hash;
823 }
824
Derek Jonese701d762010-03-02 18:17:01 -0600825}
826// END Security Class
827
828/* End of file Security.php */
patworkef1a55a2011-04-09 13:04:06 +0200829/* Location: ./system/libraries/Security.php */