blob: e493f6d33daeb104daa98e9fa75912fd381f27c5 [file] [log] [blame]
Derek Allarda72b60d2007-01-31 23:56:11 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
Derek Allardd2df9bc2007-04-15 17:41:17 +00003 * CodeIgniter
Derek Allarda72b60d2007-01-31 23:56:11 +00004 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author Rick Ellis
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Allarda72b60d2007-01-31 23:56:11 +000010 * @license http://www.codeignitor.com/user_guide/license.html
11 * @link http://www.codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
26 * @author Rick Ellis
27 * @link http://www.codeigniter.com/user_guide/libraries/input.html
28 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
31 var $ip_address = FALSE;
32 var $user_agent = FALSE;
33 var $allow_get_array = FALSE;
34
35 /**
36 * Constructor
37 *
38 * Sets whether to globally enable the XSS processing
39 * and whether to allow the $_GET array
40 *
41 * @access public
42 */
43 function CI_Input()
44 {
45 log_message('debug', "Input Class Initialized");
46
47 $CFG =& load_class('Config');
48 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
49 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
50 $this->_sanitize_globals();
51 }
52
53 // --------------------------------------------------------------------
54
55 /**
56 * Sanitize Globals
57 *
58 * This function does the following:
59 *
60 * Unsets $_GET data (if query strings are not enabled)
61 *
62 * Unsets all globals if register_globals is enabled
63 *
64 * Standardizes newline characters to \n
65 *
66 * @access private
67 * @return void
68 */
69 function _sanitize_globals()
70 {
Rick Ellisbb2041d2007-06-09 00:16:13 +000071 // Unset globals for securiy.
72 // This is effectively the same as register_globals = off
Derek Allarda72b60d2007-01-31 23:56:11 +000073 foreach (array($_GET, $_POST, $_COOKIE) as $global)
74 {
75 if ( ! is_array($global))
76 {
77 global $global;
78 $$global = NULL;
79 }
80 else
81 {
82 foreach ($global as $key => $val)
83 {
84 global $$key;
85 $$key = NULL;
86 }
87 }
88 }
89
90 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
91 if ($this->allow_get_array == FALSE)
92 {
93 $_GET = array();
94 }
Rick Ellis112569d2007-02-26 19:19:08 +000095 else
96 {
97 if (is_array($_GET) AND count($_GET) > 0)
98 {
99 foreach($_GET as $key => $val)
100 {
101 $_GET[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
102 }
103 }
104 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000105
106 // Clean $_POST Data
107 if (is_array($_POST) AND count($_POST) > 0)
108 {
109 foreach($_POST as $key => $val)
110 {
111 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
112 }
113 }
114
115 // Clean $_COOKIE Data
116 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
117 {
118 foreach($_COOKIE as $key => $val)
119 {
120 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
121 }
122 }
123
124 log_message('debug', "Global POST and COOKIE data sanitized");
125 }
126
127 // --------------------------------------------------------------------
128
129 /**
130 * Clean Input Data
131 *
132 * This is a helper function. It escapes data and
133 * standardizes newline characters to \n
134 *
135 * @access private
136 * @param string
137 * @return string
138 */
139 function _clean_input_data($str)
140 {
141 if (is_array($str))
142 {
143 $new_array = array();
144 foreach ($str as $key => $val)
145 {
146 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
147 }
148 return $new_array;
149 }
150
Rick Ellisbb2041d2007-06-09 00:16:13 +0000151 // We strip slashes if magic quotes is on to keep things consistent
152 if (get_magic_quotes_gpc())
153 {
154 $str = stripslashes($str);
155 }
156
157 // Should we filter the input data?
Derek Allarda72b60d2007-01-31 23:56:11 +0000158 if ($this->use_xss_clean === TRUE)
159 {
160 $str = $this->xss_clean($str);
161 }
162
163 // Standardize newlines
164 return preg_replace("/\015\012|\015|\012/", "\n", $str);
165 }
166
167 // --------------------------------------------------------------------
168
169 /**
170 * Clean Keys
171 *
172 * This is a helper function. To prevent malicious users
173 * from trying to exploit keys we make sure that keys are
174 * only named with alpha-numeric text and a few other items.
175 *
176 * @access private
177 * @param string
178 * @return string
179 */
180 function _clean_input_keys($str)
181 {
182 if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
183 {
184 exit('Disallowed Key Characters.');
185 }
Rick Ellisbb2041d2007-06-09 00:16:13 +0000186
Derek Allarda72b60d2007-01-31 23:56:11 +0000187 return $str;
188 }
Rick Ellis112569d2007-02-26 19:19:08 +0000189
190 // --------------------------------------------------------------------
191
192 /**
193 * Fetch an item from the GET array
194 *
195 * @access public
196 * @param string
197 * @param bool
198 * @return string
199 */
Derek Allard87d1eeb2007-03-01 13:20:43 +0000200 function get($index = '', $xss_clean = FALSE)
Rick Ellis112569d2007-02-26 19:19:08 +0000201 {
202 if ( ! isset($_GET[$index]))
203 {
204 return FALSE;
205 }
206
207 if ($xss_clean === TRUE)
208 {
209 if (is_array($_GET[$index]))
210 {
211 foreach($_GET[$index] as $key => $val)
212 {
213 $_GET[$index][$key] = $this->xss_clean($val);
214 }
215 }
216 else
217 {
218 return $this->xss_clean($_GET[$index]);
219 }
220 }
221
222 return $_GET[$index];
223 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000224
225 // --------------------------------------------------------------------
226
227 /**
228 * Fetch an item from the POST array
229 *
230 * @access public
231 * @param string
232 * @param bool
233 * @return string
234 */
235 function post($index = '', $xss_clean = FALSE)
236 {
237 if ( ! isset($_POST[$index]))
238 {
239 return FALSE;
240 }
241
242 if ($xss_clean === TRUE)
243 {
244 if (is_array($_POST[$index]))
245 {
246 foreach($_POST[$index] as $key => $val)
247 {
248 $_POST[$index][$key] = $this->xss_clean($val);
249 }
250 }
251 else
252 {
253 return $this->xss_clean($_POST[$index]);
254 }
255 }
256
257 return $_POST[$index];
258 }
259
260 // --------------------------------------------------------------------
261
262 /**
263 * Fetch an item from the COOKIE array
264 *
265 * @access public
266 * @param string
267 * @param bool
268 * @return string
269 */
270 function cookie($index = '', $xss_clean = FALSE)
271 {
272 if ( ! isset($_COOKIE[$index]))
273 {
274 return FALSE;
275 }
276
277 if ($xss_clean === TRUE)
278 {
279 if (is_array($_COOKIE[$index]))
280 {
281 $cookie = array();
282 foreach($_COOKIE[$index] as $key => $val)
283 {
284 $cookie[$key] = $this->xss_clean($val);
285 }
286
287 return $cookie;
288 }
289 else
290 {
291 return $this->xss_clean($_COOKIE[$index]);
292 }
293 }
294 else
295 {
296 return $_COOKIE[$index];
297 }
298 }
299
300 // --------------------------------------------------------------------
301
302 /**
303 * Fetch an item from the SERVER array
304 *
305 * @access public
306 * @param string
307 * @param bool
308 * @return string
309 */
310 function server($index = '', $xss_clean = FALSE)
311 {
312 if ( ! isset($_SERVER[$index]))
313 {
314 return FALSE;
315 }
316
317 if ($xss_clean === TRUE)
318 {
319 return $this->xss_clean($_SERVER[$index]);
320 }
321
322 return $_SERVER[$index];
323 }
324
325 // --------------------------------------------------------------------
326
327 /**
328 * Fetch the IP Address
329 *
330 * @access public
331 * @return string
332 */
333 function ip_address()
334 {
335 if ($this->ip_address !== FALSE)
336 {
337 return $this->ip_address;
338 }
339
340 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
341 {
342 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
343 }
344 elseif ($this->server('REMOTE_ADDR'))
345 {
346 $this->ip_address = $_SERVER['REMOTE_ADDR'];
347 }
348 elseif ($this->server('HTTP_CLIENT_IP'))
349 {
350 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
351 }
352 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
353 {
354 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
355 }
356
357 if ($this->ip_address === FALSE)
358 {
359 $this->ip_address = '0.0.0.0';
360 return $this->ip_address;
361 }
362
363 if (strstr($this->ip_address, ','))
364 {
365 $x = explode(',', $this->ip_address);
366 $this->ip_address = end($x);
367 }
368
369 if ( ! $this->valid_ip($this->ip_address))
370 {
371 $this->ip_address = '0.0.0.0';
372 }
373
374 return $this->ip_address;
375 }
376
377 // --------------------------------------------------------------------
378
379 /**
380 * Validate IP Address
381 *
Rick Ellise666afc2007-06-11 05:03:11 +0000382 * Updated version suggested by Geert De Deckere
383 *
Derek Allarda72b60d2007-01-31 23:56:11 +0000384 * @access public
385 * @param string
386 * @return string
387 */
388 function valid_ip($ip)
389 {
Rick Ellise666afc2007-06-11 05:03:11 +0000390 $ip_segments = explode('.', $ip);
391
392 // Always 4 segments needed
393 if (count($ip_segments) != 4)
Rick Ellis112569d2007-02-26 19:19:08 +0000394 {
395 return FALSE;
396 }
Rick Ellise666afc2007-06-11 05:03:11 +0000397 // IP cannot start with 0
398 if (substr($ip_segments[0], 0, 1) == 0)
Rick Ellis112569d2007-02-26 19:19:08 +0000399 {
Rick Ellise666afc2007-06-11 05:03:11 +0000400 return FALSE;
401 }
402 // Check each segment
403 foreach ($ip_segments as $segment)
404 {
405 // IP segments must be digits and can not be
406 // longer than 3 digits or greater then 255
407 if ( ! ctype_digit($segment) OR $segment > 255 OR strlen($segment) > 3)
Rick Ellis112569d2007-02-26 19:19:08 +0000408 {
Rick Ellise666afc2007-06-11 05:03:11 +0000409 return FALSE;
Rick Ellis112569d2007-02-26 19:19:08 +0000410 }
411 }
412
413 return TRUE;
Derek Allarda72b60d2007-01-31 23:56:11 +0000414 }
415
416 // --------------------------------------------------------------------
417
418 /**
419 * User Agent
420 *
421 * @access public
422 * @return string
423 */
424 function user_agent()
425 {
426 if ($this->user_agent !== FALSE)
427 {
428 return $this->user_agent;
429 }
430
431 $this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
432
433 return $this->user_agent;
434 }
435
436 // --------------------------------------------------------------------
437
438 /**
439 * XSS Clean
440 *
441 * Sanitizes data so that Cross Site Scripting Hacks can be
442 * prevented.  This function does a fair amount of work but
443 * it is extremely thorough, designed to prevent even the
444 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
445 * of course, but I haven't been able to get anything passed
446 * the filter.
447 *
448 * Note: This function should only be used to deal with data
449 * upon submission.  It's not something that should
450 * be used for general runtime processing.
451 *
452 * This function was based in part on some code and ideas I
453 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
454 *
455 * To help develop this script I used this great list of
456 * vulnerabilities along with a few other hacks I've
457 * harvested from examining vulnerabilities in other programs:
458 * http://ha.ckers.org/xss.html
459 *
460 * @access public
461 * @param string
462 * @return string
463 */
464 function xss_clean($str, $charset = 'ISO-8859-1')
465 {
466 /*
467 * Remove Null Characters
468 *
469 * This prevents sandwiching null characters
470 * between ascii characters, like Java\0script.
471 *
472 */
473 $str = preg_replace('/\0+/', '', $str);
474 $str = preg_replace('/(\\\\0)+/', '', $str);
475
476 /*
477 * Validate standard character entities
478 *
479 * Add a semicolon if missing. We do this to enable
480 * the conversion of entities to ASCII later.
481 *
482 */
483 $str = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u',"\\1;",$str);
484
485 /*
486 * Validate UTF16 two byte encoding (x00)
487 *
488 * Just as above, adds a semicolon if missing.
489 *
490 */
491 $str = preg_replace('#(&\#x*)([0-9A-F]+);*#iu',"\\1\\2;",$str);
492
493 /*
494 * URL Decode
495 *
496 * Just in case stuff like this is submitted:
497 *
498 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
499 *
500 * Note: Normally urldecode() would be easier but it removes plus signs
501 *
502 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000503 $str = preg_replace("/(%20)+/", '9u3iovBnRThju941s89rKozm', $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000504 $str = preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str);
Derek Jones01f72ca2007-05-04 18:19:17 +0000505 $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str);
506 $str = str_replace('9u3iovBnRThju941s89rKozm', "%20", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000507
508 /*
509 * Convert character entities to ASCII
510 *
511 * This permits our tests below to work reliably.
512 * We only convert entities that are within tags since
513 * these are the ones that will pose security problems.
514 *
515 */
516 if (preg_match_all("/<(.+?)>/si", $str, $matches))
517 {
518 for ($i = 0; $i < count($matches['0']); $i++)
519 {
520 $str = str_replace($matches['1'][$i],
521 $this->_html_entity_decode($matches['1'][$i], $charset),
522 $str);
523 }
524 }
525
526 /*
527 * Not Allowed Under Any Conditions
528 */
529 $bad = array(
530 'document.cookie' => '[removed]',
531 'document.write' => '[removed]',
532 'window.location' => '[removed]',
533 "javascript\s*:" => '[removed]',
534 "Redirect\s+302" => '[removed]',
535 '<!--' => '&lt;!--',
536 '-->' => '--&gt;'
537 );
538
539 foreach ($bad as $key => $val)
540 {
541 $str = preg_replace("#".$key."#i", $val, $str);
542 }
543
544 /*
545 * Convert all tabs to spaces
546 *
547 * This prevents strings like this: ja vascript
548 * Note: we deal with spaces between characters later.
549 *
550 */
551 $str = preg_replace("#\t+#", " ", $str);
552
553 /*
554 * Makes PHP tags safe
555 *
556 * Note: XML tags are inadvertently replaced too:
557 *
558 * <?xml
559 *
560 * But it doesn't seem to pose a problem.
561 *
562 */
563 $str = str_replace(array('<?php', '<?PHP', '<?', '?>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
564
565 /*
566 * Compact any exploded words
567 *
568 * This corrects words like: j a v a s c r i p t
569 * These words are compacted back to their correct state.
570 *
571 */
572 $words = array('javascript', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
573 foreach ($words as $word)
574 {
575 $temp = '';
576 for ($i = 0; $i < strlen($word); $i++)
577 {
578 $temp .= substr($word, $i, 1)."\s*";
579 }
580
Derek Jones01f72ca2007-05-04 18:19:17 +0000581 // We only want to do this when it is followed by a non-word character
582 // That way valid stuff like "dealer to" does not become "dealerto"
583 $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000584 }
585
586 /*
587 * Remove disallowed Javascript in links or img tags
588 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000589 $str = preg_replace_callback("#<a.*?</a>#si", array($this, '_js_link_removal'), $str);
590 $str = preg_replace_callback("#<img.*?>#si", array($this, '_js_img_removal'), $str);
591 $str = preg_replace("#<(script|xss).*?\>#si", "", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000592
593 /*
594 * Remove JavaScript Event Handlers
595 *
596 * Note: This code is a little blunt. It removes
597 * the event handler and anything up to the closing >,
598 * but it's unlikely to be a problem.
599 *
600 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000601 $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns');
602 $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "&lt;\\1\\2\\3&gt;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000603
604 /*
605 * Sanitize naughty HTML elements
606 *
607 * If a tag containing any of the words in the list
608 * below is found, the tag gets converted to entities.
609 *
610 * So this: <blink>
611 * Becomes: &lt;blink&gt;
612 *
613 */
614 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
615
616 /*
617 * Sanitize naughty scripting elements
618 *
619 * Similar to above, only instead of looking for
620 * tags it looks for PHP and JavaScript commands
621 * that are disallowed. Rather than removing the
622 * code, it simply converts the parenthesis to entities
623 * rendering the code un-executable.
624 *
625 * For example: eval('some code')
626 * Becomes: eval&#40;'some code'&#41;
627 *
628 */
629 $str = preg_replace('#(alert|cmd|passthru|eval|exec|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
630
631 /*
632 * Final clean up
633 *
634 * This adds a bit of extra precaution in case
635 * something got through the above filters
636 *
637 */
638 $bad = array(
639 'document.cookie' => '[removed]',
640 'document.write' => '[removed]',
641 'window.location' => '[removed]',
642 "javascript\s*:" => '[removed]',
643 "Redirect\s+302" => '[removed]',
644 '<!--' => '&lt;!--',
645 '-->' => '--&gt;'
646 );
647
648 foreach ($bad as $key => $val)
649 {
650 $str = preg_replace("#".$key."#i", $val, $str);
651 }
652
653
654 log_message('debug', "XSS Filtering completed");
655 return $str;
656 }
657
658 // --------------------------------------------------------------------
Derek Jones01f72ca2007-05-04 18:19:17 +0000659
660 /**
661 * JS Link Removal
662 *
663 * Callback function for xss_clean() to sanitize links
664 * This limits the PCRE backtracks, making it more performance friendly
665 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
666 * PHP 5.2+ on link-heavy strings
667 *
668 * @access private
669 * @param array
670 * @return string
671 */
672 function _js_link_removal($match)
673 {
674 return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
675 }
676
677 /**
678 * JS Image Removal
679 *
680 * Callback function for xss_clean() to sanitize image tags
681 * This limits the PCRE backtracks, making it more performance friendly
682 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
683 * PHP 5.2+ on image tag heavy strings
684 *
685 * @access private
686 * @param array
687 * @return string
688 */
689 function _js_img_removal($match)
690 {
691 return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
692 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000693
Derek Jones01f72ca2007-05-04 18:19:17 +0000694 // --------------------------------------------------------------------
695
Derek Allarda72b60d2007-01-31 23:56:11 +0000696 /**
697 * HTML Entities Decode
698 *
699 * This function is a replacement for html_entity_decode()
700 *
701 * In some versions of PHP the native function does not work
702 * when UTF-8 is the specified character set, so this gives us
703 * a work-around. More info here:
704 * http://bugs.php.net/bug.php?id=25670
705 *
706 * @access private
707 * @param string
708 * @param string
709 * @return string
710 */
711 /* -------------------------------------------------
712 /* Replacement for html_entity_decode()
713 /* -------------------------------------------------*/
714
715 /*
716 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
717 character set, and the PHP developers said they were not back porting the
718 fix to versions other than PHP 5.x.
719 */
720 function _html_entity_decode($str, $charset='ISO-8859-1')
721 {
722 if (stristr($str, '&') === FALSE) return $str;
723
724 // The reason we are not using html_entity_decode() by itself is because
725 // while it is not technically correct to leave out the semicolon
726 // at the end of an entity most browsers will still interpret the entity
727 // correctly. html_entity_decode() does not convert entities without
728 // semicolons, so we are left with our own little solution here. Bummer.
729
730 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
731 {
732 $str = html_entity_decode($str, ENT_COMPAT, $charset);
733 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
734 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
735 }
736
737 // Numeric Entities
738 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
739 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
740
741 // Literal Entities - Slightly slow so we do another check
742 if (stristr($str, '&') === FALSE)
743 {
744 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
745 }
746
747 return $str;
748 }
749
750}
751// END Input class
adminb0dd10f2006-08-25 17:25:49 +0000752?>