blob: 93da592049ac1298bd070e3aef0840b50ef16492 [file] [log] [blame]
Derek Jonese701d762010-03-02 18:17:01 -06001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * CodeIgniter
4 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author ExpressionEngine Dev Team
9 * @copyright Copyright (c) 2008 - 2010, EllisLab, Inc.
10 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Security Class
20 *
21 * @package CodeIgniter
22 * @subpackage Libraries
23 * @category Security
24 * @author ExpressionEngine Dev Team
25 * @link http://codeigniter.com/user_guide/libraries/sessions.html
26 */
27class CI_Security {
28 var $xss_hash = '';
29 var $csrf_hash = '';
30 var $csrf_expire = 7200; // Two hours (in seconds)
31 var $csrf_token_name = 'ci_csrf_token';
32
33 /* never allowed, string replacement */
34 var $never_allowed_str = array(
35 'document.cookie' => '[removed]',
36 'document.write' => '[removed]',
37 '.parentNode' => '[removed]',
38 '.innerHTML' => '[removed]',
39 'window.location' => '[removed]',
40 '-moz-binding' => '[removed]',
41 '<!--' => '&lt;!--',
42 '-->' => '--&gt;',
43 '<![CDATA[' => '&lt;![CDATA['
44 );
45 /* never allowed, regex replacement */
46 var $never_allowed_regex = array(
47 "javascript\s*:" => '[removed]',
48 "expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
49 "vbscript\s*:" => '[removed]', // IE, surprise!
50 "Redirect\s+302" => '[removed]'
51 );
52
53 function CI_Security()
54 {
55 // Set the CSRF hash
56 $this->_csrf_set_hash();
57
58 log_message('debug', "Security Class Initialized");
59 }
60
61 // --------------------------------------------------------------------
62
63 /**
64 * Verify Cross Site Request Forgery Protection
65 *
66 * @access public
67 * @return null
68 */
69 function csrf_verify()
70 {
71 // If no POST data exists we will set the CSRF cookie
72 if (count($_POST) == 0)
73 {
74 return $this->csrf_set_cookie();
75 }
76
77 // Do the tokens exist in both the _POST and _COOKIE arrays?
78 if ( ! isset($_POST[$this->csrf_token_name]) OR ! isset($_COOKIE[$this->csrf_token_name]))
79 {
80 $this->csrf_show_error();
81 }
82
83 // Do the tokens match?
84 if ($_POST[$this->csrf_token_name] != $_COOKIE[$this->csrf_token_name])
85 {
86 $this->csrf_show_error();
87 }
88
89 // We kill this since we're done and we don't want to polute the _POST array
90 unset($_POST[$this->csrf_token_name]);
91
92 log_message('debug', "CSRF token verified ");
93 }
94
95 // --------------------------------------------------------------------
96
97 /**
98 * Set Cross Site Request Forgery Protection Cookie
99 *
100 * @access public
101 * @return null
102 */
103 function csrf_set_cookie()
104 {
105 $prefix = ( ! is_string(config_item('cookie_prefix'))) ? '' : config_item('cookie_prefix');
106
107 $expire = time() + $this->csrf_expire;
108
109 setcookie($prefix.$this->csrf_token_name, $this->csrf_hash, $expire, config_item('cookie_path'), config_item('cookie_domain'), 0);
110
111 log_message('debug', "CRSF cookie Set");
112 }
113
114 // --------------------------------------------------------------------
115
116 /**
117 * Set Cross Site Request Forgery Protection Cookie
118 *
119 * @access public
120 * @return null
121 */
122 function _csrf_set_hash()
123 {
124 if ($this->csrf_hash == '')
125 {
126 // If the cookie exists we will use it's value. We don't necessarily want to regenerate it with
127 // each page load since a page could contain embedded sub-pages causing this feature to fail
128 if (isset($_COOKIE[$this->csrf_token_name]) AND $_COOKIE[$this->csrf_token_name] != '')
129 {
130 $this->csrf_hash = $_COOKIE[$this->csrf_token_name];
131 }
132 else
133 {
134 $this->csrf_hash = md5(uniqid(rand(), TRUE));
135 }
136 }
137
138 return $this->csrf_hash;
139 }
140
141 // --------------------------------------------------------------------
142
143 /**
144 * Show CSRF Error
145 *
146 * @access public
147 * @return null
148 */
149 function csrf_show_error()
150 {
151 show_error('The action you have requested is not allowed.');
152 }
153
154 // --------------------------------------------------------------------
155
156 /**
157 * XSS Clean
158 *
159 * Sanitizes data so that Cross Site Scripting Hacks can be
160 * prevented. This function does a fair amount of work but
161 * it is extremely thorough, designed to prevent even the
162 * most obscure XSS attempts. Nothing is ever 100% foolproof,
163 * of course, but I haven't been able to get anything passed
164 * the filter.
165 *
166 * Note: This function should only be used to deal with data
167 * upon submission. It's not something that should
168 * be used for general runtime processing.
169 *
170 * This function was based in part on some code and ideas I
171 * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
172 *
173 * To help develop this script I used this great list of
174 * vulnerabilities along with a few other hacks I've
175 * harvested from examining vulnerabilities in other programs:
176 * http://ha.ckers.org/xss.html
177 *
178 * @access public
179 * @param mixed string or array
180 * @return string
181 */
182 function xss_clean($str, $is_image = FALSE)
183 {
184 /*
185 * Is the string an array?
186 *
187 */
188 if (is_array($str))
189 {
190 while (list($key) = each($str))
191 {
192 $str[$key] = $this->xss_clean($str[$key]);
193 }
194
195 return $str;
196 }
197
198 /*
199 * Remove Invisible Characters
200 */
201 $str = $this->_remove_invisible_characters($str);
202
203 /*
204 * Protect GET variables in URLs
205 */
206
207 // 901119URL5918AMP18930PROTECT8198
208
209 $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
210
211 /*
212 * Validate standard character entities
213 *
214 * Add a semicolon if missing. We do this to enable
215 * the conversion of entities to ASCII later.
216 *
217 */
218 $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
219
220 /*
221 * Validate UTF16 two byte encoding (x00)
222 *
223 * Just as above, adds a semicolon if missing.
224 *
225 */
226 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
227
228 /*
229 * Un-Protect GET variables in URLs
230 */
231 $str = str_replace($this->xss_hash(), '&', $str);
232
233 /*
234 * URL Decode
235 *
236 * Just in case stuff like this is submitted:
237 *
238 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
239 *
240 * Note: Use rawurldecode() so it does not remove plus signs
241 *
242 */
243 $str = rawurldecode($str);
244
245 /*
246 * Convert character entities to ASCII
247 *
248 * This permits our tests below to work reliably.
249 * We only convert entities that are within tags since
250 * these are the ones that will pose security problems.
251 *
252 */
253
254 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
255
256 $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
257
258 /*
259 * Remove Invisible Characters Again!
260 */
261 $str = $this->_remove_invisible_characters($str);
262
263 /*
264 * Convert all tabs to spaces
265 *
266 * This prevents strings like this: ja vascript
267 * NOTE: we deal with spaces between characters later.
268 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
269 * so we use str_replace.
270 *
271 */
272
273 if (strpos($str, "\t") !== FALSE)
274 {
275 $str = str_replace("\t", ' ', $str);
276 }
277
278 /*
279 * Capture converted string for later comparison
280 */
281 $converted_string = $str;
282
283 /*
284 * Not Allowed Under Any Conditions
285 */
286
287 foreach ($this->never_allowed_str as $key => $val)
288 {
289 $str = str_replace($key, $val, $str);
290 }
291
292 foreach ($this->never_allowed_regex as $key => $val)
293 {
294 $str = preg_replace("#".$key."#i", $val, $str);
295 }
296
297 /*
298 * Makes PHP tags safe
299 *
300 * Note: XML tags are inadvertently replaced too:
301 *
302 * <?xml
303 *
304 * But it doesn't seem to pose a problem.
305 *
306 */
307 if ($is_image === TRUE)
308 {
309 // Images have a tendency to have the PHP short opening and closing tags every so often
310 // so we skip those and only do the long opening tags.
311 $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
312 }
313 else
314 {
315 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
316 }
317
318 /*
319 * Compact any exploded words
320 *
321 * This corrects words like: j a v a s c r i p t
322 * These words are compacted back to their correct state.
323 *
324 */
325 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
326 foreach ($words as $word)
327 {
328 $temp = '';
329
330 for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
331 {
332 $temp .= substr($word, $i, 1)."\s*";
333 }
334
335 // We only want to do this when it is followed by a non-word character
336 // That way valid stuff like "dealer to" does not become "dealerto"
337 $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
338 }
339
340 /*
341 * Remove disallowed Javascript in links or img tags
342 * We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
343 * to these simplified non-capturing preg_match(), especially if the pattern exists in the string
344 */
345 do
346 {
347 $original = $str;
348
349 if (preg_match("/<a/i", $str))
350 {
351 $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
352 }
353
354 if (preg_match("/<img/i", $str))
355 {
356 $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
357 }
358
359 if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
360 {
361 $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
362 }
363 }
364 while($original != $str);
365
366 unset($original);
367
368 /*
369 * Remove JavaScript Event Handlers
370 *
371 * Note: This code is a little blunt. It removes
372 * the event handler and anything up to the closing >,
373 * but it's unlikely to be a problem.
374 *
375 */
376 $event_handlers = array('[^a-z_\-]on\w*','xmlns');
377
378 if ($is_image === TRUE)
379 {
380 /*
381 * Adobe Photoshop puts XML metadata into JFIF images, including namespacing,
382 * so we have to allow this for images. -Paul
383 */
384 unset($event_handlers[array_search('xmlns', $event_handlers)]);
385 }
386
387 $str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
388
389 /*
390 * Sanitize naughty HTML elements
391 *
392 * If a tag containing any of the words in the list
393 * below is found, the tag gets converted to entities.
394 *
395 * So this: <blink>
396 * Becomes: &lt;blink&gt;
397 *
398 */
399 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
400 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
401
402 /*
403 * Sanitize naughty scripting elements
404 *
405 * Similar to above, only instead of looking for
406 * tags it looks for PHP and JavaScript commands
407 * that are disallowed. Rather than removing the
408 * code, it simply converts the parenthesis to entities
409 * rendering the code un-executable.
410 *
411 * For example: eval('some code')
412 * Becomes: eval&#40;'some code'&#41;
413 *
414 */
415 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
416
417 /*
418 * Final clean up
419 *
420 * This adds a bit of extra precaution in case
421 * something got through the above filters
422 *
423 */
424 foreach ($this->never_allowed_str as $key => $val)
425 {
426 $str = str_replace($key, $val, $str);
427 }
428
429 foreach ($this->never_allowed_regex as $key => $val)
430 {
431 $str = preg_replace("#".$key."#i", $val, $str);
432 }
433
434 /*
435 * Images are Handled in a Special Way
436 * - Essentially, we want to know that after all of the character conversion is done whether
437 * any unwanted, likely XSS, code was found. If not, we return TRUE, as the image is clean.
438 * However, if the string post-conversion does not matched the string post-removal of XSS,
439 * then it fails, as there was unwanted XSS code found and removed/changed during processing.
440 */
441
442 if ($is_image === TRUE)
443 {
444 if ($str == $converted_string)
445 {
446 return TRUE;
447 }
448 else
449 {
450 return FALSE;
451 }
452 }
453
454 log_message('debug', "XSS Filtering completed");
455 return $str;
456 }
457
458 // --------------------------------------------------------------------
459
460 /**
461 * Random Hash for protecting URLs
462 *
463 * @access public
464 * @return string
465 */
466 function xss_hash()
467 {
468 if ($this->xss_hash == '')
469 {
470 if (phpversion() >= 4.2)
471 mt_srand();
472 else
473 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
474
475 $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
476 }
477
478 return $this->xss_hash;
479 }
480
481 // --------------------------------------------------------------------
482
483 /**
484 * Remove Invisible Characters
485 *
486 * This prevents sandwiching null characters
487 * between ascii characters, like Java\0script.
488 *
489 * @access public
490 * @param string
491 * @return string
492 */
493 function _remove_invisible_characters($str)
494 {
495 static $non_displayables;
496
497 if ( ! isset($non_displayables))
498 {
499 // every control character except newline (dec 10), carriage return (dec 13), and horizontal tab (dec 09),
500 $non_displayables = array(
501 '/%0[0-8bcef]/', // url encoded 00-08, 11, 12, 14, 15
502 '/%1[0-9a-f]/', // url encoded 16-31
503 '/[\x00-\x08]/', // 00-08
504 '/\x0b/', '/\x0c/', // 11, 12
505 '/[\x0e-\x1f]/' // 14-31
506 );
507 }
508
509 do
510 {
511 $cleaned = $str;
512 $str = preg_replace($non_displayables, '', $str);
513 }
514 while ($cleaned != $str);
515
516 return $str;
517 }
518
519 // --------------------------------------------------------------------
520
521 /**
522 * Compact Exploded Words
523 *
524 * Callback function for xss_clean() to remove whitespace from
525 * things like j a v a s c r i p t
526 *
527 * @access public
528 * @param type
529 * @return type
530 */
531 function _compact_exploded_words($matches)
532 {
533 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
534 }
535
536 // --------------------------------------------------------------------
537
538 /**
539 * Sanitize Naughty HTML
540 *
541 * Callback function for xss_clean() to remove naughty HTML elements
542 *
543 * @access private
544 * @param array
545 * @return string
546 */
547 function _sanitize_naughty_html($matches)
548 {
549 // encode opening brace
550 $str = '&lt;'.$matches[1].$matches[2].$matches[3];
551
552 // encode captured opening or closing brace to prevent recursive vectors
553 $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
554
555 return $str;
556 }
557
558 // --------------------------------------------------------------------
559
560 /**
561 * JS Link Removal
562 *
563 * Callback function for xss_clean() to sanitize links
564 * This limits the PCRE backtracks, making it more performance friendly
565 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
566 * PHP 5.2+ on link-heavy strings
567 *
568 * @access private
569 * @param array
570 * @return string
571 */
572 function _js_link_removal($match)
573 {
574 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
575 return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
576 }
577
578 /**
579 * JS Image Removal
580 *
581 * Callback function for xss_clean() to sanitize image tags
582 * This limits the PCRE backtracks, making it more performance friendly
583 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
584 * PHP 5.2+ on image tag heavy strings
585 *
586 * @access private
587 * @param array
588 * @return string
589 */
590 function _js_img_removal($match)
591 {
592 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
593 return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
594 }
595
596 // --------------------------------------------------------------------
597
598 /**
599 * Attribute Conversion
600 *
601 * Used as a callback for XSS Clean
602 *
603 * @access public
604 * @param array
605 * @return string
606 */
607 function _convert_attribute($match)
608 {
609 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
610 }
611
612 // --------------------------------------------------------------------
613
614 /**
615 * Filter Attributes
616 *
617 * Filters tag attributes for consistency and safety
618 *
619 * @access public
620 * @param string
621 * @return string
622 */
623 function _filter_attributes($str)
624 {
625 $out = '';
626
627 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
628 {
629 foreach ($matches[0] as $match)
630 {
631 $out .= preg_replace("#/\*.*?\*/#s", '', $match);
632 }
633 }
634
635 return $out;
636 }
637
638 // --------------------------------------------------------------------
639
640 /**
641 * HTML Entity Decode Callback
642 *
643 * Used as a callback for XSS Clean
644 *
645 * @access public
646 * @param array
647 * @return string
648 */
649 function _decode_entity($match)
650 {
651 $CI =& get_instance();
652 $CI->load->helper('typography');
653 return entity_decode($match[0], strtoupper($CI->config->item('charset')));
654 }
655
656 // --------------------------------------------------------------------
657
658 /**
659 * Filename Security
660 *
661 * @access public
662 * @param string
663 * @return string
664 */
665 function sanitize_filename($str)
666 {
667 $bad = array(
668 "../",
669 "./",
670 "<!--",
671 "-->",
672 "<",
673 ">",
674 "'",
675 '"',
676 '&',
677 '$',
678 '#',
679 '{',
680 '}',
681 '[',
682 ']',
683 '=',
684 ';',
685 '?',
686 '/',
687 "%20",
688 "%22",
689 "%3c", // <
690 "%253c", // <
691 "%3e", // >
692 "%0e", // >
693 "%28", // (
694 "%29", // )
695 "%2528", // (
696 "%26", // &
697 "%24", // $
698 "%3f", // ?
699 "%3b", // ;
700 "%3d" // =
701 );
702
703 return stripslashes(str_replace($bad, '', $str));
704 }
705
706}
707// END Security Class
708
709/* End of file Security.php */
710/* Location: ./system/libraries/Security.php */