blob: c8d435046ffb24bf4926858cc9ebda71997ca474 [file] [log] [blame]
Derek Jonese701d762010-03-02 18:17:01 -06001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * CodeIgniter
4 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author ExpressionEngine Dev Team
9 * @copyright Copyright (c) 2008 - 2010, EllisLab, Inc.
10 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Security Class
20 *
21 * @package CodeIgniter
22 * @subpackage Libraries
23 * @category Security
24 * @author ExpressionEngine Dev Team
25 * @link http://codeigniter.com/user_guide/libraries/sessions.html
26 */
27class CI_Security {
28 var $xss_hash = '';
29 var $csrf_hash = '';
30 var $csrf_expire = 7200; // Two hours (in seconds)
31 var $csrf_token_name = 'ci_csrf_token';
32
33 /* never allowed, string replacement */
34 var $never_allowed_str = array(
35 'document.cookie' => '[removed]',
36 'document.write' => '[removed]',
37 '.parentNode' => '[removed]',
38 '.innerHTML' => '[removed]',
39 'window.location' => '[removed]',
40 '-moz-binding' => '[removed]',
41 '<!--' => '&lt;!--',
42 '-->' => '--&gt;',
43 '<![CDATA[' => '&lt;![CDATA['
44 );
45 /* never allowed, regex replacement */
46 var $never_allowed_regex = array(
47 "javascript\s*:" => '[removed]',
48 "expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
49 "vbscript\s*:" => '[removed]', // IE, surprise!
50 "Redirect\s+302" => '[removed]'
51 );
52
53 function CI_Security()
54 {
55 // Set the CSRF hash
56 $this->_csrf_set_hash();
Derek Allard958543a2010-07-22 14:10:26 -040057
Derek Jonese701d762010-03-02 18:17:01 -060058 log_message('debug', "Security Class Initialized");
59 }
60
61 // --------------------------------------------------------------------
62
63 /**
64 * Verify Cross Site Request Forgery Protection
65 *
66 * @access public
67 * @return null
68 */
69 function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -040070 {
Derek Jonese701d762010-03-02 18:17:01 -060071 // If no POST data exists we will set the CSRF cookie
72 if (count($_POST) == 0)
73 {
74 return $this->csrf_set_cookie();
75 }
76
Derek Allard958543a2010-07-22 14:10:26 -040077 // Append application specific cookie prefix to token name
78 $csrf_token_name = (config_item('cookie_prefix')) ? config_item('cookie_prefix').$this->csrf_token_name : $this->csrf_token_name;
79
Derek Jonese701d762010-03-02 18:17:01 -060080 // Do the tokens exist in both the _POST and _COOKIE arrays?
Derek Allard958543a2010-07-22 14:10:26 -040081 if ( ! isset($_POST[$this->csrf_token_name]) OR ! isset($_COOKIE[$csrf_token_name]))
Derek Jonese701d762010-03-02 18:17:01 -060082 {
83 $this->csrf_show_error();
84 }
85
86 // Do the tokens match?
Derek Allard958543a2010-07-22 14:10:26 -040087 if ($_POST[$this->csrf_token_name] != $_COOKIE[$csrf_token_name])
Derek Jonese701d762010-03-02 18:17:01 -060088 {
89 $this->csrf_show_error();
90 }
91
92 // We kill this since we're done and we don't want to polute the _POST array
93 unset($_POST[$this->csrf_token_name]);
94
95 log_message('debug', "CSRF token verified ");
96 }
97
98 // --------------------------------------------------------------------
99
100 /**
101 * Set Cross Site Request Forgery Protection Cookie
102 *
103 * @access public
104 * @return null
105 */
106 function csrf_set_cookie()
107 {
108 $prefix = ( ! is_string(config_item('cookie_prefix'))) ? '' : config_item('cookie_prefix');
109
110 $expire = time() + $this->csrf_expire;
111
112 setcookie($prefix.$this->csrf_token_name, $this->csrf_hash, $expire, config_item('cookie_path'), config_item('cookie_domain'), 0);
113
114 log_message('debug', "CRSF cookie Set");
115 }
116
117 // --------------------------------------------------------------------
118
119 /**
120 * Set Cross Site Request Forgery Protection Cookie
121 *
122 * @access public
123 * @return null
124 */
125 function _csrf_set_hash()
126 {
127 if ($this->csrf_hash == '')
128 {
129 // If the cookie exists we will use it's value. We don't necessarily want to regenerate it with
130 // each page load since a page could contain embedded sub-pages causing this feature to fail
131 if (isset($_COOKIE[$this->csrf_token_name]) AND $_COOKIE[$this->csrf_token_name] != '')
132 {
133 $this->csrf_hash = $_COOKIE[$this->csrf_token_name];
134 }
135 else
136 {
137 $this->csrf_hash = md5(uniqid(rand(), TRUE));
138 }
139 }
Derek Allard958543a2010-07-22 14:10:26 -0400140
141 // Create the cookie before we finish up
142 $this->csrf_set_cookie();
143
Derek Jonese701d762010-03-02 18:17:01 -0600144 return $this->csrf_hash;
145 }
146
147 // --------------------------------------------------------------------
148
149 /**
150 * Show CSRF Error
151 *
152 * @access public
153 * @return null
154 */
155 function csrf_show_error()
156 {
157 show_error('The action you have requested is not allowed.');
158 }
159
160 // --------------------------------------------------------------------
161
162 /**
163 * XSS Clean
164 *
165 * Sanitizes data so that Cross Site Scripting Hacks can be
166 * prevented. This function does a fair amount of work but
167 * it is extremely thorough, designed to prevent even the
168 * most obscure XSS attempts. Nothing is ever 100% foolproof,
169 * of course, but I haven't been able to get anything passed
170 * the filter.
171 *
172 * Note: This function should only be used to deal with data
173 * upon submission. It's not something that should
174 * be used for general runtime processing.
175 *
176 * This function was based in part on some code and ideas I
177 * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
178 *
179 * To help develop this script I used this great list of
180 * vulnerabilities along with a few other hacks I've
181 * harvested from examining vulnerabilities in other programs:
182 * http://ha.ckers.org/xss.html
183 *
184 * @access public
185 * @param mixed string or array
186 * @return string
187 */
188 function xss_clean($str, $is_image = FALSE)
189 {
190 /*
191 * Is the string an array?
192 *
193 */
194 if (is_array($str))
195 {
196 while (list($key) = each($str))
197 {
198 $str[$key] = $this->xss_clean($str[$key]);
199 }
200
201 return $str;
202 }
203
204 /*
205 * Remove Invisible Characters
206 */
Greg Aker757dda62010-04-14 19:06:19 -0500207 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600208
209 /*
210 * Protect GET variables in URLs
211 */
212
213 // 901119URL5918AMP18930PROTECT8198
214
215 $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
216
217 /*
218 * Validate standard character entities
219 *
220 * Add a semicolon if missing. We do this to enable
221 * the conversion of entities to ASCII later.
222 *
223 */
224 $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
225
226 /*
227 * Validate UTF16 two byte encoding (x00)
228 *
229 * Just as above, adds a semicolon if missing.
230 *
231 */
232 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
233
234 /*
235 * Un-Protect GET variables in URLs
236 */
237 $str = str_replace($this->xss_hash(), '&', $str);
238
239 /*
240 * URL Decode
241 *
242 * Just in case stuff like this is submitted:
243 *
244 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
245 *
246 * Note: Use rawurldecode() so it does not remove plus signs
247 *
248 */
249 $str = rawurldecode($str);
250
251 /*
252 * Convert character entities to ASCII
253 *
254 * This permits our tests below to work reliably.
255 * We only convert entities that are within tags since
256 * these are the ones that will pose security problems.
257 *
258 */
259
260 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
261
262 $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
263
264 /*
265 * Remove Invisible Characters Again!
266 */
Greg Aker757dda62010-04-14 19:06:19 -0500267 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600268
269 /*
270 * Convert all tabs to spaces
271 *
272 * This prevents strings like this: ja vascript
273 * NOTE: we deal with spaces between characters later.
274 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
275 * so we use str_replace.
276 *
277 */
278
279 if (strpos($str, "\t") !== FALSE)
280 {
281 $str = str_replace("\t", ' ', $str);
282 }
283
284 /*
285 * Capture converted string for later comparison
286 */
287 $converted_string = $str;
288
289 /*
290 * Not Allowed Under Any Conditions
291 */
292
293 foreach ($this->never_allowed_str as $key => $val)
294 {
295 $str = str_replace($key, $val, $str);
296 }
297
298 foreach ($this->never_allowed_regex as $key => $val)
299 {
300 $str = preg_replace("#".$key."#i", $val, $str);
301 }
302
303 /*
304 * Makes PHP tags safe
305 *
306 * Note: XML tags are inadvertently replaced too:
307 *
308 * <?xml
309 *
310 * But it doesn't seem to pose a problem.
311 *
312 */
313 if ($is_image === TRUE)
314 {
315 // Images have a tendency to have the PHP short opening and closing tags every so often
316 // so we skip those and only do the long opening tags.
317 $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
318 }
319 else
320 {
321 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
322 }
323
324 /*
325 * Compact any exploded words
326 *
327 * This corrects words like: j a v a s c r i p t
328 * These words are compacted back to their correct state.
329 *
330 */
331 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
332 foreach ($words as $word)
333 {
334 $temp = '';
335
336 for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
337 {
338 $temp .= substr($word, $i, 1)."\s*";
339 }
340
341 // We only want to do this when it is followed by a non-word character
342 // That way valid stuff like "dealer to" does not become "dealerto"
343 $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
344 }
345
346 /*
347 * Remove disallowed Javascript in links or img tags
348 * We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
349 * to these simplified non-capturing preg_match(), especially if the pattern exists in the string
350 */
351 do
352 {
353 $original = $str;
354
355 if (preg_match("/<a/i", $str))
356 {
357 $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
358 }
359
360 if (preg_match("/<img/i", $str))
361 {
362 $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
363 }
364
365 if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
366 {
367 $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
368 }
369 }
370 while($original != $str);
371
372 unset($original);
373
374 /*
375 * Remove JavaScript Event Handlers
376 *
377 * Note: This code is a little blunt. It removes
378 * the event handler and anything up to the closing >,
379 * but it's unlikely to be a problem.
380 *
381 */
382 $event_handlers = array('[^a-z_\-]on\w*','xmlns');
383
384 if ($is_image === TRUE)
385 {
386 /*
387 * Adobe Photoshop puts XML metadata into JFIF images, including namespacing,
388 * so we have to allow this for images. -Paul
389 */
390 unset($event_handlers[array_search('xmlns', $event_handlers)]);
391 }
392
393 $str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
394
395 /*
396 * Sanitize naughty HTML elements
397 *
398 * If a tag containing any of the words in the list
399 * below is found, the tag gets converted to entities.
400 *
401 * So this: <blink>
402 * Becomes: &lt;blink&gt;
403 *
404 */
405 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
406 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
407
408 /*
409 * Sanitize naughty scripting elements
410 *
411 * Similar to above, only instead of looking for
412 * tags it looks for PHP and JavaScript commands
413 * that are disallowed. Rather than removing the
414 * code, it simply converts the parenthesis to entities
415 * rendering the code un-executable.
416 *
417 * For example: eval('some code')
418 * Becomes: eval&#40;'some code'&#41;
419 *
420 */
421 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
422
423 /*
424 * Final clean up
425 *
426 * This adds a bit of extra precaution in case
427 * something got through the above filters
428 *
429 */
430 foreach ($this->never_allowed_str as $key => $val)
431 {
432 $str = str_replace($key, $val, $str);
433 }
434
435 foreach ($this->never_allowed_regex as $key => $val)
436 {
437 $str = preg_replace("#".$key."#i", $val, $str);
438 }
439
440 /*
441 * Images are Handled in a Special Way
442 * - Essentially, we want to know that after all of the character conversion is done whether
443 * any unwanted, likely XSS, code was found. If not, we return TRUE, as the image is clean.
444 * However, if the string post-conversion does not matched the string post-removal of XSS,
445 * then it fails, as there was unwanted XSS code found and removed/changed during processing.
446 */
447
448 if ($is_image === TRUE)
449 {
450 if ($str == $converted_string)
451 {
452 return TRUE;
453 }
454 else
455 {
456 return FALSE;
457 }
458 }
459
460 log_message('debug', "XSS Filtering completed");
461 return $str;
462 }
463
464 // --------------------------------------------------------------------
465
466 /**
467 * Random Hash for protecting URLs
468 *
469 * @access public
470 * @return string
471 */
472 function xss_hash()
473 {
474 if ($this->xss_hash == '')
475 {
476 if (phpversion() >= 4.2)
477 mt_srand();
478 else
479 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
480
481 $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
482 }
483
484 return $this->xss_hash;
485 }
486
487 // --------------------------------------------------------------------
488
489 /**
Derek Jonese701d762010-03-02 18:17:01 -0600490 * Compact Exploded Words
491 *
492 * Callback function for xss_clean() to remove whitespace from
493 * things like j a v a s c r i p t
494 *
495 * @access public
496 * @param type
497 * @return type
498 */
499 function _compact_exploded_words($matches)
500 {
501 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
502 }
503
504 // --------------------------------------------------------------------
505
506 /**
507 * Sanitize Naughty HTML
508 *
509 * Callback function for xss_clean() to remove naughty HTML elements
510 *
511 * @access private
512 * @param array
513 * @return string
514 */
515 function _sanitize_naughty_html($matches)
516 {
517 // encode opening brace
518 $str = '&lt;'.$matches[1].$matches[2].$matches[3];
519
520 // encode captured opening or closing brace to prevent recursive vectors
521 $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
522
523 return $str;
524 }
525
526 // --------------------------------------------------------------------
527
528 /**
529 * JS Link Removal
530 *
531 * Callback function for xss_clean() to sanitize links
532 * This limits the PCRE backtracks, making it more performance friendly
533 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
534 * PHP 5.2+ on link-heavy strings
535 *
536 * @access private
537 * @param array
538 * @return string
539 */
540 function _js_link_removal($match)
541 {
542 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
543 return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
544 }
545
546 /**
547 * JS Image Removal
548 *
549 * Callback function for xss_clean() to sanitize image tags
550 * This limits the PCRE backtracks, making it more performance friendly
551 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
552 * PHP 5.2+ on image tag heavy strings
553 *
554 * @access private
555 * @param array
556 * @return string
557 */
558 function _js_img_removal($match)
559 {
560 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
561 return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
562 }
563
564 // --------------------------------------------------------------------
565
566 /**
567 * Attribute Conversion
568 *
569 * Used as a callback for XSS Clean
570 *
571 * @access public
572 * @param array
573 * @return string
574 */
575 function _convert_attribute($match)
576 {
577 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
578 }
579
580 // --------------------------------------------------------------------
581
582 /**
583 * Filter Attributes
584 *
585 * Filters tag attributes for consistency and safety
586 *
587 * @access public
588 * @param string
589 * @return string
590 */
591 function _filter_attributes($str)
592 {
593 $out = '';
594
595 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
596 {
597 foreach ($matches[0] as $match)
598 {
599 $out .= preg_replace("#/\*.*?\*/#s", '', $match);
600 }
601 }
602
603 return $out;
604 }
605
606 // --------------------------------------------------------------------
607
608 /**
609 * HTML Entity Decode Callback
610 *
611 * Used as a callback for XSS Clean
612 *
613 * @access public
614 * @param array
615 * @return string
616 */
617 function _decode_entity($match)
618 {
Derek Jonesa0911472010-03-30 10:33:09 -0500619 return $this->entity_decode($match[0], strtoupper(config_item('charset')));
Derek Jonese701d762010-03-02 18:17:01 -0600620 }
621
622 // --------------------------------------------------------------------
623
624 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500625 * HTML Entities Decode
626 *
627 * This function is a replacement for html_entity_decode()
628 *
629 * In some versions of PHP the native function does not work
630 * when UTF-8 is the specified character set, so this gives us
631 * a work-around. More info here:
632 * http://bugs.php.net/bug.php?id=25670
633 *
634 * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
635 * character set, and the PHP developers said they were not back porting the
636 * fix to versions other than PHP 5.x.
637 *
638 * @access public
639 * @param string
640 * @param string
641 * @return string
642 */
643 function entity_decode($str, $charset='UTF-8')
644 {
645 if (stristr($str, '&') === FALSE) return $str;
646
647 // The reason we are not using html_entity_decode() by itself is because
648 // while it is not technically correct to leave out the semicolon
649 // at the end of an entity most browsers will still interpret the entity
650 // correctly. html_entity_decode() does not convert entities without
651 // semicolons, so we are left with our own little solution here. Bummer.
652
653 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0')))
654 {
655 $str = html_entity_decode($str, ENT_COMPAT, $charset);
656 $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
657 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
658 }
659
660 // Numeric Entities
661 $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
662 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
663
664 // Literal Entities - Slightly slow so we do another check
665 if (stristr($str, '&') === FALSE)
666 {
667 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
668 }
669
670 return $str;
671 }
672
673 // --------------------------------------------------------------------
674
675 /**
Derek Jonese701d762010-03-02 18:17:01 -0600676 * Filename Security
677 *
678 * @access public
679 * @param string
680 * @return string
681 */
682 function sanitize_filename($str)
683 {
684 $bad = array(
685 "../",
686 "./",
687 "<!--",
688 "-->",
689 "<",
690 ">",
691 "'",
692 '"',
693 '&',
694 '$',
695 '#',
696 '{',
697 '}',
698 '[',
699 ']',
700 '=',
701 ';',
702 '?',
703 '/',
704 "%20",
705 "%22",
706 "%3c", // <
707 "%253c", // <
708 "%3e", // >
709 "%0e", // >
710 "%28", // (
711 "%29", // )
712 "%2528", // (
713 "%26", // &
714 "%24", // $
715 "%3f", // ?
716 "%3b", // ;
717 "%3d" // =
718 );
719
720 return stripslashes(str_replace($bad, '', $str));
721 }
722
723}
724// END Security Class
725
726/* End of file Security.php */
727/* Location: ./system/libraries/Security.php */