blob: f113cff71b14a854cfec2ed36777f36843aa61c4 [file] [log] [blame]
Derek Allarda72b60d2007-01-31 23:56:11 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
Derek Allardd2df9bc2007-04-15 17:41:17 +00003 * CodeIgniter
Derek Allarda72b60d2007-01-31 23:56:11 +00004 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author Rick Ellis
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Allarda72b60d2007-01-31 23:56:11 +000010 * @license http://www.codeignitor.com/user_guide/license.html
11 * @link http://www.codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
26 * @author Rick Ellis
27 * @link http://www.codeigniter.com/user_guide/libraries/input.html
28 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
31 var $ip_address = FALSE;
32 var $user_agent = FALSE;
33 var $allow_get_array = FALSE;
34
35 /**
36 * Constructor
37 *
38 * Sets whether to globally enable the XSS processing
39 * and whether to allow the $_GET array
40 *
41 * @access public
42 */
43 function CI_Input()
44 {
45 log_message('debug', "Input Class Initialized");
46
47 $CFG =& load_class('Config');
48 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
49 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
50 $this->_sanitize_globals();
51 }
52
53 // --------------------------------------------------------------------
54
55 /**
56 * Sanitize Globals
57 *
58 * This function does the following:
59 *
60 * Unsets $_GET data (if query strings are not enabled)
61 *
62 * Unsets all globals if register_globals is enabled
63 *
64 * Standardizes newline characters to \n
65 *
66 * @access private
67 * @return void
68 */
69 function _sanitize_globals()
70 {
Rick Ellisbb2041d2007-06-09 00:16:13 +000071 // Unset globals for securiy.
72 // This is effectively the same as register_globals = off
Derek Allarda72b60d2007-01-31 23:56:11 +000073 foreach (array($_GET, $_POST, $_COOKIE) as $global)
74 {
75 if ( ! is_array($global))
76 {
77 global $global;
78 $$global = NULL;
79 }
80 else
81 {
82 foreach ($global as $key => $val)
83 {
84 global $$key;
85 $$key = NULL;
86 }
87 }
88 }
89
90 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
91 if ($this->allow_get_array == FALSE)
92 {
93 $_GET = array();
94 }
Rick Ellis112569d2007-02-26 19:19:08 +000095 else
96 {
97 if (is_array($_GET) AND count($_GET) > 0)
98 {
99 foreach($_GET as $key => $val)
100 {
101 $_GET[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
102 }
103 }
104 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000105
106 // Clean $_POST Data
107 if (is_array($_POST) AND count($_POST) > 0)
108 {
109 foreach($_POST as $key => $val)
110 {
111 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
112 }
113 }
114
115 // Clean $_COOKIE Data
116 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
117 {
118 foreach($_COOKIE as $key => $val)
119 {
120 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
121 }
122 }
123
124 log_message('debug', "Global POST and COOKIE data sanitized");
125 }
126
127 // --------------------------------------------------------------------
128
129 /**
130 * Clean Input Data
131 *
132 * This is a helper function. It escapes data and
133 * standardizes newline characters to \n
134 *
135 * @access private
136 * @param string
137 * @return string
138 */
139 function _clean_input_data($str)
140 {
141 if (is_array($str))
142 {
143 $new_array = array();
144 foreach ($str as $key => $val)
145 {
146 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
147 }
148 return $new_array;
149 }
150
Rick Ellisbb2041d2007-06-09 00:16:13 +0000151 // We strip slashes if magic quotes is on to keep things consistent
152 if (get_magic_quotes_gpc())
153 {
154 $str = stripslashes($str);
155 }
156
157 // Should we filter the input data?
Derek Allarda72b60d2007-01-31 23:56:11 +0000158 if ($this->use_xss_clean === TRUE)
159 {
160 $str = $this->xss_clean($str);
161 }
162
163 // Standardize newlines
164 return preg_replace("/\015\012|\015|\012/", "\n", $str);
165 }
166
167 // --------------------------------------------------------------------
168
169 /**
170 * Clean Keys
171 *
172 * This is a helper function. To prevent malicious users
173 * from trying to exploit keys we make sure that keys are
174 * only named with alpha-numeric text and a few other items.
175 *
176 * @access private
177 * @param string
178 * @return string
179 */
180 function _clean_input_keys($str)
181 {
182 if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
183 {
184 exit('Disallowed Key Characters.');
185 }
Rick Ellisbb2041d2007-06-09 00:16:13 +0000186
Derek Allarda72b60d2007-01-31 23:56:11 +0000187 return $str;
188 }
Rick Ellis112569d2007-02-26 19:19:08 +0000189
190 // --------------------------------------------------------------------
191
192 /**
193 * Fetch an item from the GET array
194 *
195 * @access public
196 * @param string
197 * @param bool
198 * @return string
199 */
Derek Allard87d1eeb2007-03-01 13:20:43 +0000200 function get($index = '', $xss_clean = FALSE)
Rick Ellis112569d2007-02-26 19:19:08 +0000201 {
202 if ( ! isset($_GET[$index]))
203 {
204 return FALSE;
205 }
206
207 if ($xss_clean === TRUE)
208 {
209 if (is_array($_GET[$index]))
210 {
211 foreach($_GET[$index] as $key => $val)
212 {
213 $_GET[$index][$key] = $this->xss_clean($val);
214 }
215 }
216 else
217 {
218 return $this->xss_clean($_GET[$index]);
219 }
220 }
221
222 return $_GET[$index];
223 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000224
225 // --------------------------------------------------------------------
226
227 /**
228 * Fetch an item from the POST array
229 *
230 * @access public
231 * @param string
232 * @param bool
233 * @return string
234 */
235 function post($index = '', $xss_clean = FALSE)
236 {
237 if ( ! isset($_POST[$index]))
238 {
239 return FALSE;
240 }
241
242 if ($xss_clean === TRUE)
243 {
244 if (is_array($_POST[$index]))
245 {
246 foreach($_POST[$index] as $key => $val)
247 {
248 $_POST[$index][$key] = $this->xss_clean($val);
249 }
250 }
251 else
252 {
253 return $this->xss_clean($_POST[$index]);
254 }
255 }
256
257 return $_POST[$index];
258 }
259
260 // --------------------------------------------------------------------
261
262 /**
263 * Fetch an item from the COOKIE array
264 *
265 * @access public
266 * @param string
267 * @param bool
268 * @return string
269 */
270 function cookie($index = '', $xss_clean = FALSE)
271 {
272 if ( ! isset($_COOKIE[$index]))
273 {
274 return FALSE;
275 }
276
277 if ($xss_clean === TRUE)
278 {
279 if (is_array($_COOKIE[$index]))
280 {
281 $cookie = array();
282 foreach($_COOKIE[$index] as $key => $val)
283 {
284 $cookie[$key] = $this->xss_clean($val);
285 }
286
287 return $cookie;
288 }
289 else
290 {
291 return $this->xss_clean($_COOKIE[$index]);
292 }
293 }
294 else
295 {
296 return $_COOKIE[$index];
297 }
298 }
299
300 // --------------------------------------------------------------------
301
302 /**
303 * Fetch an item from the SERVER array
304 *
305 * @access public
306 * @param string
307 * @param bool
308 * @return string
309 */
310 function server($index = '', $xss_clean = FALSE)
311 {
312 if ( ! isset($_SERVER[$index]))
313 {
314 return FALSE;
315 }
316
317 if ($xss_clean === TRUE)
318 {
319 return $this->xss_clean($_SERVER[$index]);
320 }
321
322 return $_SERVER[$index];
323 }
324
325 // --------------------------------------------------------------------
326
327 /**
328 * Fetch the IP Address
329 *
330 * @access public
331 * @return string
332 */
333 function ip_address()
334 {
335 if ($this->ip_address !== FALSE)
336 {
337 return $this->ip_address;
338 }
339
340 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
341 {
342 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
343 }
344 elseif ($this->server('REMOTE_ADDR'))
345 {
346 $this->ip_address = $_SERVER['REMOTE_ADDR'];
347 }
348 elseif ($this->server('HTTP_CLIENT_IP'))
349 {
350 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
351 }
352 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
353 {
354 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
355 }
356
357 if ($this->ip_address === FALSE)
358 {
359 $this->ip_address = '0.0.0.0';
360 return $this->ip_address;
361 }
362
363 if (strstr($this->ip_address, ','))
364 {
365 $x = explode(',', $this->ip_address);
366 $this->ip_address = end($x);
367 }
368
369 if ( ! $this->valid_ip($this->ip_address))
370 {
371 $this->ip_address = '0.0.0.0';
372 }
373
374 return $this->ip_address;
375 }
376
377 // --------------------------------------------------------------------
378
379 /**
380 * Validate IP Address
381 *
382 * @access public
383 * @param string
384 * @return string
385 */
386 function valid_ip($ip)
387 {
Rick Ellis112569d2007-02-26 19:19:08 +0000388 if ( ! preg_match( "/^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/", $ip))
389 {
390 return FALSE;
391 }
392
393 $octets = explode('.', $ip);
394
395 for ($i = 1; $i <= 4; $i++)
396 {
397 $octet = intval($octets[($i-1)]);
398 if ($i === 1)
399 {
400 if ($octet > 223 OR $octet < 1)
401 return FALSE;
402 }
403 elseif ($i === 4)
404 {
405 if ($octet < 1)
406 return FALSE;
407 }
408 else
409 {
410 if ($octet > 254)
411 return FALSE;
412 }
413 }
414
415 return TRUE;
Derek Allarda72b60d2007-01-31 23:56:11 +0000416 }
417
418 // --------------------------------------------------------------------
419
420 /**
421 * User Agent
422 *
423 * @access public
424 * @return string
425 */
426 function user_agent()
427 {
428 if ($this->user_agent !== FALSE)
429 {
430 return $this->user_agent;
431 }
432
433 $this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
434
435 return $this->user_agent;
436 }
437
438 // --------------------------------------------------------------------
439
440 /**
441 * XSS Clean
442 *
443 * Sanitizes data so that Cross Site Scripting Hacks can be
444 * prevented.  This function does a fair amount of work but
445 * it is extremely thorough, designed to prevent even the
446 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
447 * of course, but I haven't been able to get anything passed
448 * the filter.
449 *
450 * Note: This function should only be used to deal with data
451 * upon submission.  It's not something that should
452 * be used for general runtime processing.
453 *
454 * This function was based in part on some code and ideas I
455 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
456 *
457 * To help develop this script I used this great list of
458 * vulnerabilities along with a few other hacks I've
459 * harvested from examining vulnerabilities in other programs:
460 * http://ha.ckers.org/xss.html
461 *
462 * @access public
463 * @param string
464 * @return string
465 */
466 function xss_clean($str, $charset = 'ISO-8859-1')
467 {
468 /*
469 * Remove Null Characters
470 *
471 * This prevents sandwiching null characters
472 * between ascii characters, like Java\0script.
473 *
474 */
475 $str = preg_replace('/\0+/', '', $str);
476 $str = preg_replace('/(\\\\0)+/', '', $str);
477
478 /*
479 * Validate standard character entities
480 *
481 * Add a semicolon if missing. We do this to enable
482 * the conversion of entities to ASCII later.
483 *
484 */
485 $str = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u',"\\1;",$str);
486
487 /*
488 * Validate UTF16 two byte encoding (x00)
489 *
490 * Just as above, adds a semicolon if missing.
491 *
492 */
493 $str = preg_replace('#(&\#x*)([0-9A-F]+);*#iu',"\\1\\2;",$str);
494
495 /*
496 * URL Decode
497 *
498 * Just in case stuff like this is submitted:
499 *
500 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
501 *
502 * Note: Normally urldecode() would be easier but it removes plus signs
503 *
504 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000505 $str = preg_replace("/(%20)+/", '9u3iovBnRThju941s89rKozm', $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000506 $str = preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str);
Derek Jones01f72ca2007-05-04 18:19:17 +0000507 $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str);
508 $str = str_replace('9u3iovBnRThju941s89rKozm', "%20", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000509
510 /*
511 * Convert character entities to ASCII
512 *
513 * This permits our tests below to work reliably.
514 * We only convert entities that are within tags since
515 * these are the ones that will pose security problems.
516 *
517 */
518 if (preg_match_all("/<(.+?)>/si", $str, $matches))
519 {
520 for ($i = 0; $i < count($matches['0']); $i++)
521 {
522 $str = str_replace($matches['1'][$i],
523 $this->_html_entity_decode($matches['1'][$i], $charset),
524 $str);
525 }
526 }
527
528 /*
529 * Not Allowed Under Any Conditions
530 */
531 $bad = array(
532 'document.cookie' => '[removed]',
533 'document.write' => '[removed]',
534 'window.location' => '[removed]',
535 "javascript\s*:" => '[removed]',
536 "Redirect\s+302" => '[removed]',
537 '<!--' => '&lt;!--',
538 '-->' => '--&gt;'
539 );
540
541 foreach ($bad as $key => $val)
542 {
543 $str = preg_replace("#".$key."#i", $val, $str);
544 }
545
546 /*
547 * Convert all tabs to spaces
548 *
549 * This prevents strings like this: ja vascript
550 * Note: we deal with spaces between characters later.
551 *
552 */
553 $str = preg_replace("#\t+#", " ", $str);
554
555 /*
556 * Makes PHP tags safe
557 *
558 * Note: XML tags are inadvertently replaced too:
559 *
560 * <?xml
561 *
562 * But it doesn't seem to pose a problem.
563 *
564 */
565 $str = str_replace(array('<?php', '<?PHP', '<?', '?>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
566
567 /*
568 * Compact any exploded words
569 *
570 * This corrects words like: j a v a s c r i p t
571 * These words are compacted back to their correct state.
572 *
573 */
574 $words = array('javascript', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
575 foreach ($words as $word)
576 {
577 $temp = '';
578 for ($i = 0; $i < strlen($word); $i++)
579 {
580 $temp .= substr($word, $i, 1)."\s*";
581 }
582
Derek Jones01f72ca2007-05-04 18:19:17 +0000583 // We only want to do this when it is followed by a non-word character
584 // That way valid stuff like "dealer to" does not become "dealerto"
585 $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000586 }
587
588 /*
589 * Remove disallowed Javascript in links or img tags
590 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000591 $str = preg_replace_callback("#<a.*?</a>#si", array($this, '_js_link_removal'), $str);
592 $str = preg_replace_callback("#<img.*?>#si", array($this, '_js_img_removal'), $str);
593 $str = preg_replace("#<(script|xss).*?\>#si", "", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000594
595 /*
596 * Remove JavaScript Event Handlers
597 *
598 * Note: This code is a little blunt. It removes
599 * the event handler and anything up to the closing >,
600 * but it's unlikely to be a problem.
601 *
602 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000603 $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns');
604 $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "&lt;\\1\\2\\3&gt;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000605
606 /*
607 * Sanitize naughty HTML elements
608 *
609 * If a tag containing any of the words in the list
610 * below is found, the tag gets converted to entities.
611 *
612 * So this: <blink>
613 * Becomes: &lt;blink&gt;
614 *
615 */
616 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
617
618 /*
619 * Sanitize naughty scripting elements
620 *
621 * Similar to above, only instead of looking for
622 * tags it looks for PHP and JavaScript commands
623 * that are disallowed. Rather than removing the
624 * code, it simply converts the parenthesis to entities
625 * rendering the code un-executable.
626 *
627 * For example: eval('some code')
628 * Becomes: eval&#40;'some code'&#41;
629 *
630 */
631 $str = preg_replace('#(alert|cmd|passthru|eval|exec|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
632
633 /*
634 * Final clean up
635 *
636 * This adds a bit of extra precaution in case
637 * something got through the above filters
638 *
639 */
640 $bad = array(
641 'document.cookie' => '[removed]',
642 'document.write' => '[removed]',
643 'window.location' => '[removed]',
644 "javascript\s*:" => '[removed]',
645 "Redirect\s+302" => '[removed]',
646 '<!--' => '&lt;!--',
647 '-->' => '--&gt;'
648 );
649
650 foreach ($bad as $key => $val)
651 {
652 $str = preg_replace("#".$key."#i", $val, $str);
653 }
654
655
656 log_message('debug', "XSS Filtering completed");
657 return $str;
658 }
659
660 // --------------------------------------------------------------------
Derek Jones01f72ca2007-05-04 18:19:17 +0000661
662 /**
663 * JS Link Removal
664 *
665 * Callback function for xss_clean() to sanitize links
666 * This limits the PCRE backtracks, making it more performance friendly
667 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
668 * PHP 5.2+ on link-heavy strings
669 *
670 * @access private
671 * @param array
672 * @return string
673 */
674 function _js_link_removal($match)
675 {
676 return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
677 }
678
679 /**
680 * JS Image Removal
681 *
682 * Callback function for xss_clean() to sanitize image tags
683 * This limits the PCRE backtracks, making it more performance friendly
684 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
685 * PHP 5.2+ on image tag heavy strings
686 *
687 * @access private
688 * @param array
689 * @return string
690 */
691 function _js_img_removal($match)
692 {
693 return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
694 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000695
Derek Jones01f72ca2007-05-04 18:19:17 +0000696 // --------------------------------------------------------------------
697
Derek Allarda72b60d2007-01-31 23:56:11 +0000698 /**
699 * HTML Entities Decode
700 *
701 * This function is a replacement for html_entity_decode()
702 *
703 * In some versions of PHP the native function does not work
704 * when UTF-8 is the specified character set, so this gives us
705 * a work-around. More info here:
706 * http://bugs.php.net/bug.php?id=25670
707 *
708 * @access private
709 * @param string
710 * @param string
711 * @return string
712 */
713 /* -------------------------------------------------
714 /* Replacement for html_entity_decode()
715 /* -------------------------------------------------*/
716
717 /*
718 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
719 character set, and the PHP developers said they were not back porting the
720 fix to versions other than PHP 5.x.
721 */
722 function _html_entity_decode($str, $charset='ISO-8859-1')
723 {
724 if (stristr($str, '&') === FALSE) return $str;
725
726 // The reason we are not using html_entity_decode() by itself is because
727 // while it is not technically correct to leave out the semicolon
728 // at the end of an entity most browsers will still interpret the entity
729 // correctly. html_entity_decode() does not convert entities without
730 // semicolons, so we are left with our own little solution here. Bummer.
731
732 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
733 {
734 $str = html_entity_decode($str, ENT_COMPAT, $charset);
735 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
736 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
737 }
738
739 // Numeric Entities
740 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
741 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
742
743 // Literal Entities - Slightly slow so we do another check
744 if (stristr($str, '&') === FALSE)
745 {
746 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
747 }
748
749 return $str;
750 }
751
752}
753// END Input class
adminb0dd10f2006-08-25 17:25:49 +0000754?>