blob: 58db4e79c0947a5dc3dda6ee510a07ebb1c79353 [file] [log] [blame]
Derek Jonese701d762010-03-02 18:17:01 -06001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * CodeIgniter
4 *
Greg Aker741de1c2010-11-10 14:52:57 -06005 * An open source application development framework for PHP 5.1.6 or newer
Derek Jonese701d762010-03-02 18:17:01 -06006 *
7 * @package CodeIgniter
8 * @author ExpressionEngine Dev Team
Greg Aker0711dc82011-01-05 10:49:40 -06009 * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc.
Derek Jonese701d762010-03-02 18:17:01 -060010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Security Class
20 *
21 * @package CodeIgniter
22 * @subpackage Libraries
23 * @category Security
24 * @author ExpressionEngine Dev Team
25 * @link http://codeigniter.com/user_guide/libraries/sessions.html
26 */
27class CI_Security {
Eric Barnes9805ecc2011-01-16 23:35:16 -050028
29 public $xss_hash = '';
30 public $csrf_hash = '';
31 public $csrf_expire = 7200; // Two hours (in seconds)
32 public $csrf_token_name = 'ci_csrf_token';
33 public $csrf_cookie_name = 'ci_csrf_token';
Barry Mienydd671972010-10-04 16:33:58 +020034
Derek Jonese701d762010-03-02 18:17:01 -060035 /* never allowed, string replacement */
Eric Barnes9805ecc2011-01-16 23:35:16 -050036 public $never_allowed_str = array(
Derek Jonese701d762010-03-02 18:17:01 -060037 'document.cookie' => '[removed]',
38 'document.write' => '[removed]',
39 '.parentNode' => '[removed]',
40 '.innerHTML' => '[removed]',
41 'window.location' => '[removed]',
42 '-moz-binding' => '[removed]',
43 '<!--' => '&lt;!--',
44 '-->' => '--&gt;',
45 '<![CDATA[' => '&lt;![CDATA['
46 );
47 /* never allowed, regex replacement */
Eric Barnes9805ecc2011-01-16 23:35:16 -050048 public $never_allowed_regex = array(
Derek Jonese701d762010-03-02 18:17:01 -060049 "javascript\s*:" => '[removed]',
50 "expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
51 "vbscript\s*:" => '[removed]', // IE, surprise!
52 "Redirect\s+302" => '[removed]'
53 );
54
Greg Akera9263282010-11-10 15:26:43 -060055 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -060056 {
Eric Barnes9805ecc2011-01-16 23:35:16 -050057 $this->csrf_token_name = (config_item('csrf_token_name')) ? config_item('csrf_token_name') : 'csrf_token_name';
58 $this->csrf_cookie_name = (config_item('csrf_cookie_name')) ? config_item('csrf_cookie_name') : 'csrf_cookie_name';
59 $this->csrf_expire = (config_item('csrf_expire')) ? config_item('csrf_expire') : 7200;
60
Derek Jonesb3f10a22010-07-25 19:11:26 -050061 // Append application specific cookie prefix to token name
Derek Jones95b183ad2010-08-31 09:42:39 -050062 $this->csrf_cookie_name = (config_item('cookie_prefix')) ? config_item('cookie_prefix').$this->csrf_token_name : $this->csrf_token_name;
Derek Jonesb3f10a22010-07-25 19:11:26 -050063
Derek Jonese701d762010-03-02 18:17:01 -060064 // Set the CSRF hash
65 $this->_csrf_set_hash();
Derek Allard958543a2010-07-22 14:10:26 -040066
Derek Jonese701d762010-03-02 18:17:01 -060067 log_message('debug', "Security Class Initialized");
68 }
69
70 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020071
Derek Jonese701d762010-03-02 18:17:01 -060072 /**
73 * Verify Cross Site Request Forgery Protection
74 *
75 * @access public
76 * @return null
77 */
Eric Barnes9805ecc2011-01-16 23:35:16 -050078 public function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -040079 {
Derek Jonese701d762010-03-02 18:17:01 -060080 // If no POST data exists we will set the CSRF cookie
81 if (count($_POST) == 0)
82 {
83 return $this->csrf_set_cookie();
84 }
85
86 // Do the tokens exist in both the _POST and _COOKIE arrays?
Derek Jones95b183ad2010-08-31 09:42:39 -050087 if ( ! isset($_POST[$this->csrf_token_name]) OR ! isset($_COOKIE[$this->csrf_cookie_name]))
Derek Jonese701d762010-03-02 18:17:01 -060088 {
89 $this->csrf_show_error();
90 }
91
92 // Do the tokens match?
Derek Jones95b183ad2010-08-31 09:42:39 -050093 if ($_POST[$this->csrf_token_name] != $_COOKIE[$this->csrf_cookie_name])
Derek Jonese701d762010-03-02 18:17:01 -060094 {
95 $this->csrf_show_error();
96 }
97
98 // We kill this since we're done and we don't want to polute the _POST array
99 unset($_POST[$this->csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +0200100
Derek Jonesb3f10a22010-07-25 19:11:26 -0500101 // Nothing should last forever
Derek Jones95b183ad2010-08-31 09:42:39 -0500102 unset($_COOKIE[$this->csrf_cookie_name]);
Derek Jonesb3f10a22010-07-25 19:11:26 -0500103 $this->_csrf_set_hash();
104 $this->csrf_set_cookie();
Derek Jonese701d762010-03-02 18:17:01 -0600105
106 log_message('debug', "CSRF token verified ");
107 }
Barry Mienydd671972010-10-04 16:33:58 +0200108
Derek Jonese701d762010-03-02 18:17:01 -0600109 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200110
Derek Jonese701d762010-03-02 18:17:01 -0600111 /**
112 * Set Cross Site Request Forgery Protection Cookie
113 *
114 * @access public
115 * @return null
116 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500117 public function csrf_set_cookie()
Derek Jonese701d762010-03-02 18:17:01 -0600118 {
Derek Jonese701d762010-03-02 18:17:01 -0600119 $expire = time() + $this->csrf_expire;
Robin Sowell154da112011-02-11 15:33:44 -0500120 $secure_cookie = (config_item('cookie_secure') === TRUE) ? 1 : 0;
Derek Jonese701d762010-03-02 18:17:01 -0600121
Robin Sowell154da112011-02-11 15:33:44 -0500122 setcookie($this->csrf_cookie_name, $this->csrf_hash, $expire, config_item('cookie_path'), config_item('cookie_domain'), $secure_cookie);
Barry Mienydd671972010-10-04 16:33:58 +0200123
124 log_message('debug', "CRSF cookie Set");
Derek Jonese701d762010-03-02 18:17:01 -0600125 }
Barry Mienydd671972010-10-04 16:33:58 +0200126
Derek Jonese701d762010-03-02 18:17:01 -0600127 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200128
Derek Jonese701d762010-03-02 18:17:01 -0600129 /**
130 * Set Cross Site Request Forgery Protection Cookie
131 *
Eric Barnes9805ecc2011-01-16 23:35:16 -0500132 * @access private
Derek Jonese701d762010-03-02 18:17:01 -0600133 * @return null
134 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500135 private function _csrf_set_hash()
Derek Jonese701d762010-03-02 18:17:01 -0600136 {
137 if ($this->csrf_hash == '')
138 {
139 // If the cookie exists we will use it's value. We don't necessarily want to regenerate it with
140 // each page load since a page could contain embedded sub-pages causing this feature to fail
Derek Jones95b183ad2010-08-31 09:42:39 -0500141 if (isset($_COOKIE[$this->csrf_cookie_name]) AND $_COOKIE[$this->csrf_cookie_name] != '')
Derek Jonese701d762010-03-02 18:17:01 -0600142 {
Derek Jones95b183ad2010-08-31 09:42:39 -0500143 $this->csrf_hash = $_COOKIE[$this->csrf_cookie_name];
Derek Jonese701d762010-03-02 18:17:01 -0600144 }
145 else
146 {
147 $this->csrf_hash = md5(uniqid(rand(), TRUE));
148 }
149 }
Derek Allard958543a2010-07-22 14:10:26 -0400150
Derek Jonese701d762010-03-02 18:17:01 -0600151 return $this->csrf_hash;
152 }
153
154 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200155
Derek Jonese701d762010-03-02 18:17:01 -0600156 /**
157 * Show CSRF Error
158 *
159 * @access public
160 * @return null
161 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500162 public function csrf_show_error()
Derek Jonese701d762010-03-02 18:17:01 -0600163 {
164 show_error('The action you have requested is not allowed.');
165 }
166
167 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200168
Derek Jonese701d762010-03-02 18:17:01 -0600169 /**
170 * XSS Clean
171 *
172 * Sanitizes data so that Cross Site Scripting Hacks can be
173 * prevented. This function does a fair amount of work but
174 * it is extremely thorough, designed to prevent even the
175 * most obscure XSS attempts. Nothing is ever 100% foolproof,
176 * of course, but I haven't been able to get anything passed
177 * the filter.
178 *
179 * Note: This function should only be used to deal with data
180 * upon submission. It's not something that should
181 * be used for general runtime processing.
182 *
183 * This function was based in part on some code and ideas I
184 * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
185 *
186 * To help develop this script I used this great list of
187 * vulnerabilities along with a few other hacks I've
188 * harvested from examining vulnerabilities in other programs:
189 * http://ha.ckers.org/xss.html
190 *
191 * @access public
192 * @param mixed string or array
193 * @return string
194 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500195 public function xss_clean($str, $is_image = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600196 {
197 /*
198 * Is the string an array?
199 *
200 */
201 if (is_array($str))
202 {
203 while (list($key) = each($str))
204 {
205 $str[$key] = $this->xss_clean($str[$key]);
206 }
Barry Mienydd671972010-10-04 16:33:58 +0200207
Derek Jonese701d762010-03-02 18:17:01 -0600208 return $str;
209 }
210
211 /*
212 * Remove Invisible Characters
213 */
Greg Aker757dda62010-04-14 19:06:19 -0500214 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600215
216 /*
217 * Protect GET variables in URLs
218 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500219
Derek Jonese701d762010-03-02 18:17:01 -0600220 // 901119URL5918AMP18930PROTECT8198
Eric Barnes9805ecc2011-01-16 23:35:16 -0500221
Derek Jonese701d762010-03-02 18:17:01 -0600222 $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
223
224 /*
225 * Validate standard character entities
226 *
227 * Add a semicolon if missing. We do this to enable
228 * the conversion of entities to ASCII later.
229 *
230 */
231 $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
232
233 /*
Barry Mienydd671972010-10-04 16:33:58 +0200234 * Validate UTF16 two byte encoding (x00)
Derek Jonese701d762010-03-02 18:17:01 -0600235 *
236 * Just as above, adds a semicolon if missing.
237 *
238 */
239 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
240
241 /*
242 * Un-Protect GET variables in URLs
243 */
244 $str = str_replace($this->xss_hash(), '&', $str);
245
246 /*
247 * URL Decode
248 *
249 * Just in case stuff like this is submitted:
250 *
251 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
252 *
253 * Note: Use rawurldecode() so it does not remove plus signs
254 *
255 */
256 $str = rawurldecode($str);
Barry Mienydd671972010-10-04 16:33:58 +0200257
Derek Jonese701d762010-03-02 18:17:01 -0600258 /*
Barry Mienydd671972010-10-04 16:33:58 +0200259 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600260 *
261 * This permits our tests below to work reliably.
262 * We only convert entities that are within tags since
263 * these are the ones that will pose security problems.
264 *
265 */
266
267 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
Eric Barnes9805ecc2011-01-16 23:35:16 -0500268
Derek Jonese701d762010-03-02 18:17:01 -0600269 $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
270
271 /*
272 * Remove Invisible Characters Again!
273 */
Greg Aker757dda62010-04-14 19:06:19 -0500274 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200275
Derek Jonese701d762010-03-02 18:17:01 -0600276 /*
277 * Convert all tabs to spaces
278 *
279 * This prevents strings like this: ja vascript
280 * NOTE: we deal with spaces between characters later.
281 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
282 * so we use str_replace.
283 *
284 */
Barry Mienydd671972010-10-04 16:33:58 +0200285
Derek Jonese701d762010-03-02 18:17:01 -0600286 if (strpos($str, "\t") !== FALSE)
287 {
288 $str = str_replace("\t", ' ', $str);
289 }
Barry Mienydd671972010-10-04 16:33:58 +0200290
Derek Jonese701d762010-03-02 18:17:01 -0600291 /*
292 * Capture converted string for later comparison
293 */
294 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200295
Derek Jonese701d762010-03-02 18:17:01 -0600296 /*
297 * Not Allowed Under Any Conditions
298 */
Barry Mienydd671972010-10-04 16:33:58 +0200299
Derek Jonese701d762010-03-02 18:17:01 -0600300 foreach ($this->never_allowed_str as $key => $val)
301 {
Barry Mienydd671972010-10-04 16:33:58 +0200302 $str = str_replace($key, $val, $str);
Derek Jonese701d762010-03-02 18:17:01 -0600303 }
Barry Mienydd671972010-10-04 16:33:58 +0200304
Derek Jonese701d762010-03-02 18:17:01 -0600305 foreach ($this->never_allowed_regex as $key => $val)
306 {
Barry Mienydd671972010-10-04 16:33:58 +0200307 $str = preg_replace("#".$key."#i", $val, $str);
Derek Jonese701d762010-03-02 18:17:01 -0600308 }
309
310 /*
311 * Makes PHP tags safe
312 *
313 * Note: XML tags are inadvertently replaced too:
314 *
315 * <?xml
316 *
317 * But it doesn't seem to pose a problem.
318 *
319 */
320 if ($is_image === TRUE)
321 {
322 // Images have a tendency to have the PHP short opening and closing tags every so often
323 // so we skip those and only do the long opening tags.
324 $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
325 }
326 else
327 {
328 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
329 }
Barry Mienydd671972010-10-04 16:33:58 +0200330
Derek Jonese701d762010-03-02 18:17:01 -0600331 /*
332 * Compact any exploded words
333 *
334 * This corrects words like: j a v a s c r i p t
335 * These words are compacted back to their correct state.
336 *
337 */
338 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
339 foreach ($words as $word)
340 {
341 $temp = '';
Barry Mienydd671972010-10-04 16:33:58 +0200342
Derek Jonese701d762010-03-02 18:17:01 -0600343 for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
344 {
345 $temp .= substr($word, $i, 1)."\s*";
346 }
347
348 // We only want to do this when it is followed by a non-word character
349 // That way valid stuff like "dealer to" does not become "dealerto"
350 $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
351 }
Barry Mienydd671972010-10-04 16:33:58 +0200352
Derek Jonese701d762010-03-02 18:17:01 -0600353 /*
354 * Remove disallowed Javascript in links or img tags
355 * We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
356 * to these simplified non-capturing preg_match(), especially if the pattern exists in the string
357 */
358 do
359 {
360 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200361
Derek Jonese701d762010-03-02 18:17:01 -0600362 if (preg_match("/<a/i", $str))
363 {
364 $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
365 }
Barry Mienydd671972010-10-04 16:33:58 +0200366
Derek Jonese701d762010-03-02 18:17:01 -0600367 if (preg_match("/<img/i", $str))
368 {
369 $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
370 }
Barry Mienydd671972010-10-04 16:33:58 +0200371
Derek Jonese701d762010-03-02 18:17:01 -0600372 if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
373 {
374 $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
375 }
376 }
Pascal Kriete14287f32011-02-14 13:39:34 -0500377 while ($original != $str);
Derek Jonese701d762010-03-02 18:17:01 -0600378
379 unset($original);
380
381 /*
382 * Remove JavaScript Event Handlers
383 *
384 * Note: This code is a little blunt. It removes
385 * the event handler and anything up to the closing >,
386 * but it's unlikely to be a problem.
387 *
388 */
389 $event_handlers = array('[^a-z_\-]on\w*','xmlns');
390
391 if ($is_image === TRUE)
392 {
393 /*
Barry Mienydd671972010-10-04 16:33:58 +0200394 * Adobe Photoshop puts XML metadata into JFIF images, including namespacing,
Derek Jonese701d762010-03-02 18:17:01 -0600395 * so we have to allow this for images. -Paul
396 */
397 unset($event_handlers[array_search('xmlns', $event_handlers)]);
398 }
399
400 $str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
Barry Mienydd671972010-10-04 16:33:58 +0200401
Derek Jonese701d762010-03-02 18:17:01 -0600402 /*
403 * Sanitize naughty HTML elements
404 *
405 * If a tag containing any of the words in the list
406 * below is found, the tag gets converted to entities.
407 *
408 * So this: <blink>
409 * Becomes: &lt;blink&gt;
410 *
411 */
412 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
413 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
414
415 /*
416 * Sanitize naughty scripting elements
417 *
418 * Similar to above, only instead of looking for
419 * tags it looks for PHP and JavaScript commands
420 * that are disallowed. Rather than removing the
421 * code, it simply converts the parenthesis to entities
422 * rendering the code un-executable.
423 *
424 * For example: eval('some code')
425 * Becomes: eval&#40;'some code'&#41;
426 *
427 */
428 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Barry Mienydd671972010-10-04 16:33:58 +0200429
Derek Jonese701d762010-03-02 18:17:01 -0600430 /*
431 * Final clean up
432 *
433 * This adds a bit of extra precaution in case
434 * something got through the above filters
435 *
436 */
437 foreach ($this->never_allowed_str as $key => $val)
438 {
Barry Mienydd671972010-10-04 16:33:58 +0200439 $str = str_replace($key, $val, $str);
Derek Jonese701d762010-03-02 18:17:01 -0600440 }
Barry Mienydd671972010-10-04 16:33:58 +0200441
Derek Jonese701d762010-03-02 18:17:01 -0600442 foreach ($this->never_allowed_regex as $key => $val)
443 {
444 $str = preg_replace("#".$key."#i", $val, $str);
445 }
446
447 /*
448 * Images are Handled in a Special Way
449 * - Essentially, we want to know that after all of the character conversion is done whether
450 * any unwanted, likely XSS, code was found. If not, we return TRUE, as the image is clean.
451 * However, if the string post-conversion does not matched the string post-removal of XSS,
452 * then it fails, as there was unwanted XSS code found and removed/changed during processing.
453 */
454
455 if ($is_image === TRUE)
456 {
457 if ($str == $converted_string)
458 {
459 return TRUE;
460 }
461 else
462 {
463 return FALSE;
464 }
465 }
Barry Mienydd671972010-10-04 16:33:58 +0200466
Derek Jonese701d762010-03-02 18:17:01 -0600467 log_message('debug', "XSS Filtering completed");
468 return $str;
469 }
470
471 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200472
Derek Jonese701d762010-03-02 18:17:01 -0600473 /**
474 * Random Hash for protecting URLs
475 *
476 * @access public
477 * @return string
478 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500479 public function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200480 {
Derek Jonese701d762010-03-02 18:17:01 -0600481 if ($this->xss_hash == '')
482 {
483 if (phpversion() >= 4.2)
484 mt_srand();
485 else
486 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
Barry Mienydd671972010-10-04 16:33:58 +0200487
Derek Jonese701d762010-03-02 18:17:01 -0600488 $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
489 }
Barry Mienydd671972010-10-04 16:33:58 +0200490
Derek Jonese701d762010-03-02 18:17:01 -0600491 return $this->xss_hash;
492 }
493
494 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200495
Derek Jonese701d762010-03-02 18:17:01 -0600496 /**
Derek Jonese701d762010-03-02 18:17:01 -0600497 * Compact Exploded Words
498 *
499 * Callback function for xss_clean() to remove whitespace from
500 * things like j a v a s c r i p t
501 *
Eric Barnes9805ecc2011-01-16 23:35:16 -0500502 * @access private
Derek Jonese701d762010-03-02 18:17:01 -0600503 * @param type
504 * @return type
505 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500506 private function _compact_exploded_words($matches)
Derek Jonese701d762010-03-02 18:17:01 -0600507 {
508 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
509 }
Barry Mienydd671972010-10-04 16:33:58 +0200510
Derek Jonese701d762010-03-02 18:17:01 -0600511 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200512
Derek Jonese701d762010-03-02 18:17:01 -0600513 /**
514 * Sanitize Naughty HTML
515 *
516 * Callback function for xss_clean() to remove naughty HTML elements
517 *
518 * @access private
519 * @param array
520 * @return string
521 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500522 private function _sanitize_naughty_html($matches)
Derek Jonese701d762010-03-02 18:17:01 -0600523 {
524 // encode opening brace
525 $str = '&lt;'.$matches[1].$matches[2].$matches[3];
Barry Mienydd671972010-10-04 16:33:58 +0200526
Derek Jonese701d762010-03-02 18:17:01 -0600527 // encode captured opening or closing brace to prevent recursive vectors
528 $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
Barry Mienydd671972010-10-04 16:33:58 +0200529
Derek Jonese701d762010-03-02 18:17:01 -0600530 return $str;
531 }
Barry Mienydd671972010-10-04 16:33:58 +0200532
Derek Jonese701d762010-03-02 18:17:01 -0600533 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200534
Derek Jonese701d762010-03-02 18:17:01 -0600535 /**
536 * JS Link Removal
537 *
538 * Callback function for xss_clean() to sanitize links
539 * This limits the PCRE backtracks, making it more performance friendly
540 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
541 * PHP 5.2+ on link-heavy strings
542 *
543 * @access private
544 * @param array
545 * @return string
546 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500547 private function _js_link_removal($match)
Derek Jonese701d762010-03-02 18:17:01 -0600548 {
549 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
550 return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
551 }
Barry Mienydd671972010-10-04 16:33:58 +0200552
Derek Jonese701d762010-03-02 18:17:01 -0600553 /**
554 * JS Image Removal
555 *
556 * Callback function for xss_clean() to sanitize image tags
557 * This limits the PCRE backtracks, making it more performance friendly
558 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
559 * PHP 5.2+ on image tag heavy strings
560 *
561 * @access private
562 * @param array
563 * @return string
564 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500565 private function _js_img_removal($match)
Derek Jonese701d762010-03-02 18:17:01 -0600566 {
567 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
568 return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
569 }
570
571 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200572
Derek Jonese701d762010-03-02 18:17:01 -0600573 /**
574 * Attribute Conversion
575 *
576 * Used as a callback for XSS Clean
577 *
Eric Barnes9805ecc2011-01-16 23:35:16 -0500578 * @access private
Derek Jonese701d762010-03-02 18:17:01 -0600579 * @param array
580 * @return string
581 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500582 private function _convert_attribute($match)
Derek Jonese701d762010-03-02 18:17:01 -0600583 {
584 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
585 }
Barry Mienydd671972010-10-04 16:33:58 +0200586
Derek Jonese701d762010-03-02 18:17:01 -0600587 // --------------------------------------------------------------------
588
589 /**
590 * Filter Attributes
591 *
592 * Filters tag attributes for consistency and safety
593 *
Eric Barnes9805ecc2011-01-16 23:35:16 -0500594 * @access private
Derek Jonese701d762010-03-02 18:17:01 -0600595 * @param string
596 * @return string
597 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500598 private function _filter_attributes($str)
Derek Jonese701d762010-03-02 18:17:01 -0600599 {
600 $out = '';
601
602 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
603 {
604 foreach ($matches[0] as $match)
605 {
606 $out .= preg_replace("#/\*.*?\*/#s", '', $match);
607 }
608 }
609
610 return $out;
611 }
612
613 // --------------------------------------------------------------------
614
615 /**
616 * HTML Entity Decode Callback
617 *
618 * Used as a callback for XSS Clean
619 *
Eric Barnes9805ecc2011-01-16 23:35:16 -0500620 * @access private
Derek Jonese701d762010-03-02 18:17:01 -0600621 * @param array
622 * @return string
623 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500624 private function _decode_entity($match)
Derek Jonese701d762010-03-02 18:17:01 -0600625 {
Derek Jonesa0911472010-03-30 10:33:09 -0500626 return $this->entity_decode($match[0], strtoupper(config_item('charset')));
Derek Jonese701d762010-03-02 18:17:01 -0600627 }
628
629 // --------------------------------------------------------------------
630
631 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500632 * HTML Entities Decode
633 *
634 * This function is a replacement for html_entity_decode()
635 *
636 * In some versions of PHP the native function does not work
637 * when UTF-8 is the specified character set, so this gives us
638 * a work-around. More info here:
639 * http://bugs.php.net/bug.php?id=25670
640 *
641 * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
642 * character set, and the PHP developers said they were not back porting the
643 * fix to versions other than PHP 5.x.
644 *
645 * @access public
646 * @param string
647 * @param string
648 * @return string
649 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500650 public function entity_decode($str, $charset='UTF-8')
Derek Jonesa0911472010-03-30 10:33:09 -0500651 {
652 if (stristr($str, '&') === FALSE) return $str;
Barry Mienydd671972010-10-04 16:33:58 +0200653
Derek Jonesa0911472010-03-30 10:33:09 -0500654 // The reason we are not using html_entity_decode() by itself is because
655 // while it is not technically correct to leave out the semicolon
656 // at the end of an entity most browsers will still interpret the entity
657 // correctly. html_entity_decode() does not convert entities without
658 // semicolons, so we are left with our own little solution here. Bummer.
Barry Mienydd671972010-10-04 16:33:58 +0200659
Derek Jonesa0911472010-03-30 10:33:09 -0500660 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0')))
661 {
662 $str = html_entity_decode($str, ENT_COMPAT, $charset);
663 $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
664 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
665 }
Barry Mienydd671972010-10-04 16:33:58 +0200666
Derek Jonesa0911472010-03-30 10:33:09 -0500667 // Numeric Entities
668 $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
669 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
Barry Mienydd671972010-10-04 16:33:58 +0200670
Derek Jonesa0911472010-03-30 10:33:09 -0500671 // Literal Entities - Slightly slow so we do another check
672 if (stristr($str, '&') === FALSE)
673 {
674 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
675 }
Barry Mienydd671972010-10-04 16:33:58 +0200676
Derek Jonesa0911472010-03-30 10:33:09 -0500677 return $str;
678 }
Barry Mienydd671972010-10-04 16:33:58 +0200679
Derek Jonesa0911472010-03-30 10:33:09 -0500680 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200681
Derek Jonesa0911472010-03-30 10:33:09 -0500682 /**
Derek Jonese701d762010-03-02 18:17:01 -0600683 * Filename Security
684 *
685 * @access public
686 * @param string
687 * @return string
688 */
Eric Barnes9805ecc2011-01-16 23:35:16 -0500689 public function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600690 {
691 $bad = array(
692 "../",
Derek Jonese701d762010-03-02 18:17:01 -0600693 "<!--",
694 "-->",
695 "<",
696 ">",
697 "'",
698 '"',
699 '&',
700 '$',
701 '#',
702 '{',
703 '}',
704 '[',
705 ']',
706 '=',
707 ';',
708 '?',
Derek Jonese701d762010-03-02 18:17:01 -0600709 "%20",
710 "%22",
711 "%3c", // <
Barry Mienydd671972010-10-04 16:33:58 +0200712 "%253c", // <
713 "%3e", // >
714 "%0e", // >
715 "%28", // (
716 "%29", // )
717 "%2528", // (
718 "%26", // &
719 "%24", // $
720 "%3f", // ?
721 "%3b", // ;
Derek Jonese701d762010-03-02 18:17:01 -0600722 "%3d" // =
723 );
Eric Barnes9805ecc2011-01-16 23:35:16 -0500724
Derek Jones2ef37592010-10-06 17:51:59 -0500725 if ( ! $relative_path)
726 {
727 $bad[] = './';
728 $bad[] = '/';
729 }
Derek Jonese701d762010-03-02 18:17:01 -0600730
731 return stripslashes(str_replace($bad, '', $str));
732 }
733
734}
735// END Security Class
736
737/* End of file Security.php */
738/* Location: ./system/libraries/Security.php */