blob: cdae501688646ebf26ae84bf49f1ecf5dcce5af9 [file] [log] [blame]
Derek Jonese701d762010-03-02 18:17:01 -06001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * CodeIgniter
4 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author ExpressionEngine Dev Team
9 * @copyright Copyright (c) 2008 - 2010, EllisLab, Inc.
10 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Security Class
20 *
21 * @package CodeIgniter
22 * @subpackage Libraries
23 * @category Security
24 * @author ExpressionEngine Dev Team
25 * @link http://codeigniter.com/user_guide/libraries/sessions.html
26 */
27class CI_Security {
28 var $xss_hash = '';
29 var $csrf_hash = '';
30 var $csrf_expire = 7200; // Two hours (in seconds)
31 var $csrf_token_name = 'ci_csrf_token';
32
33 /* never allowed, string replacement */
34 var $never_allowed_str = array(
35 'document.cookie' => '[removed]',
36 'document.write' => '[removed]',
37 '.parentNode' => '[removed]',
38 '.innerHTML' => '[removed]',
39 'window.location' => '[removed]',
40 '-moz-binding' => '[removed]',
41 '<!--' => '&lt;!--',
42 '-->' => '--&gt;',
43 '<![CDATA[' => '&lt;![CDATA['
44 );
45 /* never allowed, regex replacement */
46 var $never_allowed_regex = array(
47 "javascript\s*:" => '[removed]',
48 "expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
49 "vbscript\s*:" => '[removed]', // IE, surprise!
50 "Redirect\s+302" => '[removed]'
51 );
52
53 function CI_Security()
54 {
55 // Set the CSRF hash
56 $this->_csrf_set_hash();
57
58 log_message('debug', "Security Class Initialized");
59 }
60
61 // --------------------------------------------------------------------
62
63 /**
64 * Verify Cross Site Request Forgery Protection
65 *
66 * @access public
67 * @return null
68 */
69 function csrf_verify()
70 {
71 // If no POST data exists we will set the CSRF cookie
72 if (count($_POST) == 0)
73 {
74 return $this->csrf_set_cookie();
75 }
76
77 // Do the tokens exist in both the _POST and _COOKIE arrays?
78 if ( ! isset($_POST[$this->csrf_token_name]) OR ! isset($_COOKIE[$this->csrf_token_name]))
79 {
80 $this->csrf_show_error();
81 }
82
83 // Do the tokens match?
84 if ($_POST[$this->csrf_token_name] != $_COOKIE[$this->csrf_token_name])
85 {
86 $this->csrf_show_error();
87 }
88
89 // We kill this since we're done and we don't want to polute the _POST array
90 unset($_POST[$this->csrf_token_name]);
91
92 log_message('debug', "CSRF token verified ");
93 }
94
95 // --------------------------------------------------------------------
96
97 /**
98 * Set Cross Site Request Forgery Protection Cookie
99 *
100 * @access public
101 * @return null
102 */
103 function csrf_set_cookie()
104 {
105 $prefix = ( ! is_string(config_item('cookie_prefix'))) ? '' : config_item('cookie_prefix');
106
107 $expire = time() + $this->csrf_expire;
108
109 setcookie($prefix.$this->csrf_token_name, $this->csrf_hash, $expire, config_item('cookie_path'), config_item('cookie_domain'), 0);
110
111 log_message('debug', "CRSF cookie Set");
112 }
113
114 // --------------------------------------------------------------------
115
116 /**
117 * Set Cross Site Request Forgery Protection Cookie
118 *
119 * @access public
120 * @return null
121 */
122 function _csrf_set_hash()
123 {
124 if ($this->csrf_hash == '')
125 {
126 // If the cookie exists we will use it's value. We don't necessarily want to regenerate it with
127 // each page load since a page could contain embedded sub-pages causing this feature to fail
128 if (isset($_COOKIE[$this->csrf_token_name]) AND $_COOKIE[$this->csrf_token_name] != '')
129 {
130 $this->csrf_hash = $_COOKIE[$this->csrf_token_name];
131 }
132 else
133 {
134 $this->csrf_hash = md5(uniqid(rand(), TRUE));
135 }
136 }
137
138 return $this->csrf_hash;
139 }
140
141 // --------------------------------------------------------------------
142
143 /**
144 * Show CSRF Error
145 *
146 * @access public
147 * @return null
148 */
149 function csrf_show_error()
150 {
151 show_error('The action you have requested is not allowed.');
152 }
153
154 // --------------------------------------------------------------------
155
156 /**
157 * XSS Clean
158 *
159 * Sanitizes data so that Cross Site Scripting Hacks can be
160 * prevented. This function does a fair amount of work but
161 * it is extremely thorough, designed to prevent even the
162 * most obscure XSS attempts. Nothing is ever 100% foolproof,
163 * of course, but I haven't been able to get anything passed
164 * the filter.
165 *
166 * Note: This function should only be used to deal with data
167 * upon submission. It's not something that should
168 * be used for general runtime processing.
169 *
170 * This function was based in part on some code and ideas I
171 * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
172 *
173 * To help develop this script I used this great list of
174 * vulnerabilities along with a few other hacks I've
175 * harvested from examining vulnerabilities in other programs:
176 * http://ha.ckers.org/xss.html
177 *
178 * @access public
179 * @param mixed string or array
180 * @return string
181 */
182 function xss_clean($str, $is_image = FALSE)
183 {
184 /*
185 * Is the string an array?
186 *
187 */
188 if (is_array($str))
189 {
190 while (list($key) = each($str))
191 {
192 $str[$key] = $this->xss_clean($str[$key]);
193 }
194
195 return $str;
196 }
197
198 /*
199 * Remove Invisible Characters
200 */
Greg Aker757dda62010-04-14 19:06:19 -0500201 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600202
203 /*
204 * Protect GET variables in URLs
205 */
206
207 // 901119URL5918AMP18930PROTECT8198
208
209 $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
210
211 /*
212 * Validate standard character entities
213 *
214 * Add a semicolon if missing. We do this to enable
215 * the conversion of entities to ASCII later.
216 *
217 */
218 $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
219
220 /*
221 * Validate UTF16 two byte encoding (x00)
222 *
223 * Just as above, adds a semicolon if missing.
224 *
225 */
226 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
227
228 /*
229 * Un-Protect GET variables in URLs
230 */
231 $str = str_replace($this->xss_hash(), '&', $str);
232
233 /*
234 * URL Decode
235 *
236 * Just in case stuff like this is submitted:
237 *
238 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
239 *
240 * Note: Use rawurldecode() so it does not remove plus signs
241 *
242 */
243 $str = rawurldecode($str);
244
245 /*
246 * Convert character entities to ASCII
247 *
248 * This permits our tests below to work reliably.
249 * We only convert entities that are within tags since
250 * these are the ones that will pose security problems.
251 *
252 */
253
254 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
255
256 $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
257
258 /*
259 * Remove Invisible Characters Again!
260 */
Greg Aker757dda62010-04-14 19:06:19 -0500261 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600262
263 /*
264 * Convert all tabs to spaces
265 *
266 * This prevents strings like this: ja vascript
267 * NOTE: we deal with spaces between characters later.
268 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
269 * so we use str_replace.
270 *
271 */
272
273 if (strpos($str, "\t") !== FALSE)
274 {
275 $str = str_replace("\t", ' ', $str);
276 }
277
278 /*
279 * Capture converted string for later comparison
280 */
281 $converted_string = $str;
282
283 /*
284 * Not Allowed Under Any Conditions
285 */
286
287 foreach ($this->never_allowed_str as $key => $val)
288 {
289 $str = str_replace($key, $val, $str);
290 }
291
292 foreach ($this->never_allowed_regex as $key => $val)
293 {
294 $str = preg_replace("#".$key."#i", $val, $str);
295 }
296
297 /*
298 * Makes PHP tags safe
299 *
300 * Note: XML tags are inadvertently replaced too:
301 *
302 * <?xml
303 *
304 * But it doesn't seem to pose a problem.
305 *
306 */
307 if ($is_image === TRUE)
308 {
309 // Images have a tendency to have the PHP short opening and closing tags every so often
310 // so we skip those and only do the long opening tags.
311 $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
312 }
313 else
314 {
315 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
316 }
317
318 /*
319 * Compact any exploded words
320 *
321 * This corrects words like: j a v a s c r i p t
322 * These words are compacted back to their correct state.
323 *
324 */
325 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
326 foreach ($words as $word)
327 {
328 $temp = '';
329
330 for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
331 {
332 $temp .= substr($word, $i, 1)."\s*";
333 }
334
335 // We only want to do this when it is followed by a non-word character
336 // That way valid stuff like "dealer to" does not become "dealerto"
337 $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
338 }
339
340 /*
341 * Remove disallowed Javascript in links or img tags
342 * We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
343 * to these simplified non-capturing preg_match(), especially if the pattern exists in the string
344 */
345 do
346 {
347 $original = $str;
348
349 if (preg_match("/<a/i", $str))
350 {
351 $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
352 }
353
354 if (preg_match("/<img/i", $str))
355 {
356 $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
357 }
358
359 if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
360 {
361 $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
362 }
363 }
364 while($original != $str);
365
366 unset($original);
367
368 /*
369 * Remove JavaScript Event Handlers
370 *
371 * Note: This code is a little blunt. It removes
372 * the event handler and anything up to the closing >,
373 * but it's unlikely to be a problem.
374 *
375 */
376 $event_handlers = array('[^a-z_\-]on\w*','xmlns');
377
378 if ($is_image === TRUE)
379 {
380 /*
381 * Adobe Photoshop puts XML metadata into JFIF images, including namespacing,
382 * so we have to allow this for images. -Paul
383 */
384 unset($event_handlers[array_search('xmlns', $event_handlers)]);
385 }
386
387 $str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
388
389 /*
390 * Sanitize naughty HTML elements
391 *
392 * If a tag containing any of the words in the list
393 * below is found, the tag gets converted to entities.
394 *
395 * So this: <blink>
396 * Becomes: &lt;blink&gt;
397 *
398 */
399 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
400 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
401
402 /*
403 * Sanitize naughty scripting elements
404 *
405 * Similar to above, only instead of looking for
406 * tags it looks for PHP and JavaScript commands
407 * that are disallowed. Rather than removing the
408 * code, it simply converts the parenthesis to entities
409 * rendering the code un-executable.
410 *
411 * For example: eval('some code')
412 * Becomes: eval&#40;'some code'&#41;
413 *
414 */
415 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
416
417 /*
418 * Final clean up
419 *
420 * This adds a bit of extra precaution in case
421 * something got through the above filters
422 *
423 */
424 foreach ($this->never_allowed_str as $key => $val)
425 {
426 $str = str_replace($key, $val, $str);
427 }
428
429 foreach ($this->never_allowed_regex as $key => $val)
430 {
431 $str = preg_replace("#".$key."#i", $val, $str);
432 }
433
434 /*
435 * Images are Handled in a Special Way
436 * - Essentially, we want to know that after all of the character conversion is done whether
437 * any unwanted, likely XSS, code was found. If not, we return TRUE, as the image is clean.
438 * However, if the string post-conversion does not matched the string post-removal of XSS,
439 * then it fails, as there was unwanted XSS code found and removed/changed during processing.
440 */
441
442 if ($is_image === TRUE)
443 {
444 if ($str == $converted_string)
445 {
446 return TRUE;
447 }
448 else
449 {
450 return FALSE;
451 }
452 }
453
454 log_message('debug', "XSS Filtering completed");
455 return $str;
456 }
457
458 // --------------------------------------------------------------------
459
460 /**
461 * Random Hash for protecting URLs
462 *
463 * @access public
464 * @return string
465 */
466 function xss_hash()
467 {
468 if ($this->xss_hash == '')
469 {
470 if (phpversion() >= 4.2)
471 mt_srand();
472 else
473 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
474
475 $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
476 }
477
478 return $this->xss_hash;
479 }
480
481 // --------------------------------------------------------------------
482
483 /**
Derek Jonese701d762010-03-02 18:17:01 -0600484 * Compact Exploded Words
485 *
486 * Callback function for xss_clean() to remove whitespace from
487 * things like j a v a s c r i p t
488 *
489 * @access public
490 * @param type
491 * @return type
492 */
493 function _compact_exploded_words($matches)
494 {
495 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
496 }
497
498 // --------------------------------------------------------------------
499
500 /**
501 * Sanitize Naughty HTML
502 *
503 * Callback function for xss_clean() to remove naughty HTML elements
504 *
505 * @access private
506 * @param array
507 * @return string
508 */
509 function _sanitize_naughty_html($matches)
510 {
511 // encode opening brace
512 $str = '&lt;'.$matches[1].$matches[2].$matches[3];
513
514 // encode captured opening or closing brace to prevent recursive vectors
515 $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
516
517 return $str;
518 }
519
520 // --------------------------------------------------------------------
521
522 /**
523 * JS Link Removal
524 *
525 * Callback function for xss_clean() to sanitize links
526 * This limits the PCRE backtracks, making it more performance friendly
527 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
528 * PHP 5.2+ on link-heavy strings
529 *
530 * @access private
531 * @param array
532 * @return string
533 */
534 function _js_link_removal($match)
535 {
536 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
537 return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
538 }
539
540 /**
541 * JS Image Removal
542 *
543 * Callback function for xss_clean() to sanitize image tags
544 * This limits the PCRE backtracks, making it more performance friendly
545 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
546 * PHP 5.2+ on image tag heavy strings
547 *
548 * @access private
549 * @param array
550 * @return string
551 */
552 function _js_img_removal($match)
553 {
554 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
555 return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
556 }
557
558 // --------------------------------------------------------------------
559
560 /**
561 * Attribute Conversion
562 *
563 * Used as a callback for XSS Clean
564 *
565 * @access public
566 * @param array
567 * @return string
568 */
569 function _convert_attribute($match)
570 {
571 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
572 }
573
574 // --------------------------------------------------------------------
575
576 /**
577 * Filter Attributes
578 *
579 * Filters tag attributes for consistency and safety
580 *
581 * @access public
582 * @param string
583 * @return string
584 */
585 function _filter_attributes($str)
586 {
587 $out = '';
588
589 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
590 {
591 foreach ($matches[0] as $match)
592 {
593 $out .= preg_replace("#/\*.*?\*/#s", '', $match);
594 }
595 }
596
597 return $out;
598 }
599
600 // --------------------------------------------------------------------
601
602 /**
603 * HTML Entity Decode Callback
604 *
605 * Used as a callback for XSS Clean
606 *
607 * @access public
608 * @param array
609 * @return string
610 */
611 function _decode_entity($match)
612 {
Derek Jonesa0911472010-03-30 10:33:09 -0500613 return $this->entity_decode($match[0], strtoupper(config_item('charset')));
Derek Jonese701d762010-03-02 18:17:01 -0600614 }
615
616 // --------------------------------------------------------------------
617
618 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500619 * HTML Entities Decode
620 *
621 * This function is a replacement for html_entity_decode()
622 *
623 * In some versions of PHP the native function does not work
624 * when UTF-8 is the specified character set, so this gives us
625 * a work-around. More info here:
626 * http://bugs.php.net/bug.php?id=25670
627 *
628 * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
629 * character set, and the PHP developers said they were not back porting the
630 * fix to versions other than PHP 5.x.
631 *
632 * @access public
633 * @param string
634 * @param string
635 * @return string
636 */
637 function entity_decode($str, $charset='UTF-8')
638 {
639 if (stristr($str, '&') === FALSE) return $str;
640
641 // The reason we are not using html_entity_decode() by itself is because
642 // while it is not technically correct to leave out the semicolon
643 // at the end of an entity most browsers will still interpret the entity
644 // correctly. html_entity_decode() does not convert entities without
645 // semicolons, so we are left with our own little solution here. Bummer.
646
647 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0')))
648 {
649 $str = html_entity_decode($str, ENT_COMPAT, $charset);
650 $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
651 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
652 }
653
654 // Numeric Entities
655 $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
656 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
657
658 // Literal Entities - Slightly slow so we do another check
659 if (stristr($str, '&') === FALSE)
660 {
661 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
662 }
663
664 return $str;
665 }
666
667 // --------------------------------------------------------------------
668
669 /**
Derek Jonese701d762010-03-02 18:17:01 -0600670 * Filename Security
671 *
672 * @access public
673 * @param string
674 * @return string
675 */
676 function sanitize_filename($str)
677 {
678 $bad = array(
679 "../",
680 "./",
681 "<!--",
682 "-->",
683 "<",
684 ">",
685 "'",
686 '"',
687 '&',
688 '$',
689 '#',
690 '{',
691 '}',
692 '[',
693 ']',
694 '=',
695 ';',
696 '?',
697 '/',
698 "%20",
699 "%22",
700 "%3c", // <
701 "%253c", // <
702 "%3e", // >
703 "%0e", // >
704 "%28", // (
705 "%29", // )
706 "%2528", // (
707 "%26", // &
708 "%24", // $
709 "%3f", // ?
710 "%3b", // ;
711 "%3d" // =
712 );
713
714 return stripslashes(str_replace($bad, '', $str));
715 }
716
717}
718// END Security Class
719
720/* End of file Security.php */
721/* Location: ./system/libraries/Security.php */