blob: 0d3c87b49f87c40cfdcbd936799a3b76546c0183 [file] [log] [blame]
adminb0dd10f2006-08-25 17:25:49 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * Code Igniter
4 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author Rick Ellis
9 * @copyright Copyright (c) 2006, pMachine, Inc.
10 * @license http://www.codeignitor.com/user_guide/license.html
11 * @link http://www.codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
26 * @author Rick Ellis
27 * @link http://www.codeigniter.com/user_guide/libraries/input.html
28 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
31 var $ip_address = FALSE;
32 var $user_agent = FALSE;
33 var $allow_get_array = FALSE;
34
35 /**
36 * Constructor
37 *
38 * Sets whether to globally enable the XSS processing
39 * and whether to allow the $_GET array
40 *
41 * @access public
42 */
43 function CI_Input()
44 {
admin04ea44e2006-10-03 19:17:59 +000045 log_message('debug', "Input Class Initialized");
46
admin33de9a12006-09-28 06:50:16 +000047 $CFG =& _load_class('Config');
adminb0dd10f2006-08-25 17:25:49 +000048 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
admin04ea44e2006-10-03 19:17:59 +000049 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
adminb0dd10f2006-08-25 17:25:49 +000050 $this->_sanitize_globals();
51 }
adminb0dd10f2006-08-25 17:25:49 +000052
53 // --------------------------------------------------------------------
54
55 /**
56 * Sanitize Globals
57 *
58 * This function does the folowing:
59 *
60 * Unsets $_GET data (if query strings are not enabled)
61 *
62 * Unsets all globals if register_globals is enabled
63 *
64 * Standardizes newline characters to \n
65 *
66 * @access private
67 * @return void
68 */
69 function _sanitize_globals()
70 {
71 // Unset globals. This is effectively the same as register_globals = off
72 foreach (array($_GET, $_POST, $_COOKIE) as $global)
73 {
74 if ( ! is_array($global))
75 {
76 unset($$global);
77 }
78 else
79 {
80 foreach ($global as $key => $val)
81 {
82 unset($$key);
admin04ea44e2006-10-03 19:17:59 +000083 }
adminb0dd10f2006-08-25 17:25:49 +000084 }
85 }
86
admin04ea44e2006-10-03 19:17:59 +000087 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
adminb0dd10f2006-08-25 17:25:49 +000088 if ($this->allow_get_array == FALSE)
89 {
90 $_GET = array();
91 }
92
93 // Clean $_POST Data
94 if (is_array($_POST) AND count($_POST) > 0)
95 {
96 foreach($_POST as $key => $val)
admin04ea44e2006-10-03 19:17:59 +000097 {
98 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
99 }
adminb0dd10f2006-08-25 17:25:49 +0000100 }
101
102 // Clean $_COOKIE Data
103 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
104 {
105 foreach($_COOKIE as $key => $val)
admin04ea44e2006-10-03 19:17:59 +0000106 {
adminb0dd10f2006-08-25 17:25:49 +0000107 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
admin04ea44e2006-10-03 19:17:59 +0000108 }
adminb0dd10f2006-08-25 17:25:49 +0000109 }
110
111 log_message('debug', "Global POST and COOKIE data sanitized");
112 }
adminb0dd10f2006-08-25 17:25:49 +0000113
114 // --------------------------------------------------------------------
115
116 /**
117 * Clean Intput Data
118 *
119 * This is a helper function. It escapes data and
120 * standardizes newline characters to \n
121 *
122 * @access private
123 * @param string
124 * @return string
125 */
126 function _clean_input_data($str)
127 {
128 if (is_array($str))
129 {
130 $new_array = array();
131 foreach ($str as $key => $val)
132 {
admin04ea44e2006-10-03 19:17:59 +0000133 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
adminb0dd10f2006-08-25 17:25:49 +0000134 }
135 return $new_array;
136 }
137
138 if ($this->use_xss_clean === TRUE)
139 {
140 $str = $this->xss_clean($str);
141 }
142
admin04ea44e2006-10-03 19:17:59 +0000143 // Standardize newlines
adminb0dd10f2006-08-25 17:25:49 +0000144 return preg_replace("/\015\012|\015|\012/", "\n", $str);
145 }
adminb0dd10f2006-08-25 17:25:49 +0000146
147 // --------------------------------------------------------------------
148
149 /**
150 * Clean Keys
151 *
152 * This is a helper function. To prevent malicious users
153 * from trying to exploit keys we make sure that keys are
154 * only named with alpha-numeric text and a few other items.
155 *
156 * @access private
157 * @param string
158 * @return string
159 */
160 function _clean_input_keys($str)
admin04ea44e2006-10-03 19:17:59 +0000161 {
adminb0dd10f2006-08-25 17:25:49 +0000162 if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
163 {
164 exit('Disallowed Key Characters: '.$str);
165 }
166
167 if ( ! get_magic_quotes_gpc())
168 {
169 return addslashes($str);
170 }
171
172 return $str;
173 }
adminb0dd10f2006-08-25 17:25:49 +0000174
175 // --------------------------------------------------------------------
176
177 /**
178 * Fetch an item from the POST array
179 *
180 * @access public
181 * @param string
admin10c3f412006-10-08 07:21:12 +0000182 * @param bool
adminb0dd10f2006-08-25 17:25:49 +0000183 * @return string
184 */
185 function post($index = '', $xss_clean = FALSE)
186 {
187 if ( ! isset($_POST[$index]))
188 {
189 return FALSE;
190 }
admin04ea44e2006-10-03 19:17:59 +0000191
192 if ($xss_clean === TRUE)
adminb0dd10f2006-08-25 17:25:49 +0000193 {
admin04ea44e2006-10-03 19:17:59 +0000194 if (is_array($_POST[$index]))
adminb0dd10f2006-08-25 17:25:49 +0000195 {
admin04ea44e2006-10-03 19:17:59 +0000196 foreach($_POST[$index] as $key => $val)
197 {
198 $_POST[$index][$key] = $this->xss_clean($val);
199 }
adminb0dd10f2006-08-25 17:25:49 +0000200 }
201 else
202 {
admin04ea44e2006-10-03 19:17:59 +0000203 return $this->xss_clean($_POST[$index]);
adminb0dd10f2006-08-25 17:25:49 +0000204 }
205 }
admin04ea44e2006-10-03 19:17:59 +0000206
207 return $_POST[$index];
adminb0dd10f2006-08-25 17:25:49 +0000208 }
adminb0dd10f2006-08-25 17:25:49 +0000209
210 // --------------------------------------------------------------------
211
212 /**
213 * Fetch an item from the COOKIE array
214 *
215 * @access public
216 * @param string
admin10c3f412006-10-08 07:21:12 +0000217 * @param bool
adminb0dd10f2006-08-25 17:25:49 +0000218 * @return string
219 */
220 function cookie($index = '', $xss_clean = FALSE)
221 {
222 if ( ! isset($_COOKIE[$index]))
223 {
224 return FALSE;
225 }
admin04ea44e2006-10-03 19:17:59 +0000226
227 if ($xss_clean === TRUE)
228 {
229 if (is_array($_COOKIE[$index]))
adminb0dd10f2006-08-25 17:25:49 +0000230 {
admin04ea44e2006-10-03 19:17:59 +0000231 $cookie = array();
232 foreach($_COOKIE[$index] as $key => $val)
admin2fcd16b2006-10-03 16:41:54 +0000233 {
admin04ea44e2006-10-03 19:17:59 +0000234 $cookie[$key] = $this->xss_clean($val);
admin2fcd16b2006-10-03 16:41:54 +0000235 }
admin04ea44e2006-10-03 19:17:59 +0000236
237 return $cookie;
adminb0dd10f2006-08-25 17:25:49 +0000238 }
239 else
240 {
admin04ea44e2006-10-03 19:17:59 +0000241 return $this->xss_clean($_COOKIE[$index]);
adminb0dd10f2006-08-25 17:25:49 +0000242 }
243 }
admin04ea44e2006-10-03 19:17:59 +0000244 else
245 {
246 return $_COOKIE[$index];
247 }
adminb0dd10f2006-08-25 17:25:49 +0000248 }
admin10c3f412006-10-08 07:21:12 +0000249
250 // --------------------------------------------------------------------
251
252 /**
253 * Fetch an item from the SERVER array
254 *
255 * @access public
256 * @param string
257 * @param bool
258 * @return string
259 */
260 function server($index = '', $xss_clean = FALSE)
261 {
262 if ( ! isset($_SERVER[$index]))
263 {
264 return FALSE;
265 }
266
267 if ($xss_clean === TRUE)
268 {
269 return $this->xss_clean($_SERVER[$index]);
270 }
271
272 return $_SERVER[$index];
273 }
adminb0dd10f2006-08-25 17:25:49 +0000274
275 // --------------------------------------------------------------------
276
277 /**
278 * Fetch the IP Address
279 *
280 * @access public
281 * @return string
282 */
283 function ip_address()
284 {
285 if ($this->ip_address !== FALSE)
286 {
287 return $this->ip_address;
288 }
admin10c3f412006-10-08 07:21:12 +0000289
290 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
291 {
292 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
293 }
294 elseif ($this->server('REMOTE_ADDR'))
295 {
296 $this->ip_address = $_SERVER['REMOTE_ADDR'];
297 }
298 elseif ($this->server('HTTP_CLIENT_IP'))
299 {
300 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
301 }
302 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
303 {
304 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
305 }
306
307 if ($this->ip_address === FALSE)
308 {
309 return $this->ip_address = '0.0.0.0';
310 }
adminb0dd10f2006-08-25 17:25:49 +0000311
312 if (strstr($this->ip_address, ','))
313 {
314 $x = explode(',', $this->ip_address);
315 $this->ip_address = end($x);
316 }
317
318 if ( ! $this->valid_ip($this->ip_address))
319 {
320 $this->ip_address = '0.0.0.0';
321 }
admin10c3f412006-10-08 07:21:12 +0000322
adminb0dd10f2006-08-25 17:25:49 +0000323 return $this->ip_address;
324 }
adminb0dd10f2006-08-25 17:25:49 +0000325
326 // --------------------------------------------------------------------
327
328 /**
329 * Validate IP Address
330 *
331 * @access public
332 * @param string
333 * @return string
334 */
335 function valid_ip($ip)
336 {
337 return ( ! preg_match( "/^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/", $ip)) ? FALSE : TRUE;
338 }
adminb0dd10f2006-08-25 17:25:49 +0000339
340 // --------------------------------------------------------------------
341
342 /**
343 * User Agent
344 *
345 * @access public
346 * @return string
347 */
348 function user_agent()
349 {
350 if ($this->user_agent !== FALSE)
351 {
352 return $this->user_agent;
353 }
354
355 $this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
356
357 return $this->user_agent;
358 }
adminb0dd10f2006-08-25 17:25:49 +0000359
360 // --------------------------------------------------------------------
361
362 /**
363 * XSS Clean
364 *
365 * Sanitizes data so that Cross Site Scripting Hacks can be
366 * prevented.Ê This function does a fair amount of work but
367 * it is extremely thorough, designed to prevent even the
368 * most obscure XSS attempts.Ê Nothing is ever 100% foolproof,
369 * of course, but I haven't been able to get anything passed
370 * the filter.
371 *
372 * Note: This function should only be used to deal with data
373 * upon submission.Ê It's not something that should
374 * be used for general runtime processing.
375 *
376 * This function was based in part on some code and ideas I
377 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
378 *
379 * To help develop this script I used this great list of
380 * vulnerabilities along with a few other hacks I've
381 * harvested from examining vulnerabilities in other programs:
382 * http://ha.ckers.org/xss.html
383 *
384 * @access public
385 * @param string
386 * @return string
387 */
388 function xss_clean($str, $charset = 'ISO-8859-1')
389 {
390 /*
391 * Remove Null Characters
392 *
393 * This prevents sandwiching null characters
394 * between ascii characters, like Java\0script.
395 *
396 */
397 $str = preg_replace('/\0+/', '', $str);
398 $str = preg_replace('/(\\\\0)+/', '', $str);
399
400 /*
401 * Validate standard character entites
402 *
403 * Add a semicolon if missing. We do this to enable
404 * the conversion of entities to ASCII later.
405 *
406 */
407 $str = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u',"\\1;",$str);
408
409 /*
410 * Validate UTF16 two byte encodeing (x00)
411 *
412 * Just as above, adds a semicolon if missing.
413 *
414 */
415 $str = preg_replace('#(&\#x*)([0-9A-F]+);*#iu',"\\1\\2;",$str);
416
417 /*
418 * URL Decode
419 *
420 * Just in case stuff like this is submitted:
421 *
422 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
423 *
424 * Note: Normally urldecode() would be easier but it removes plus signs
425 *
426 */
427 $str = preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str);
admin04ea44e2006-10-03 19:17:59 +0000428 $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str);
429
adminb0dd10f2006-08-25 17:25:49 +0000430 /*
431 * Convert character entities to ASCII
432 *
433 * This permits our tests below to work reliably.
434 * We only convert entities that are within tags since
435 * these are the ones that will pose security problems.
436 *
437 */
438
admin04ea44e2006-10-03 19:17:59 +0000439 if (preg_match_all("/<(.+?)>/si", $str, $matches))
440 {
adminb0dd10f2006-08-25 17:25:49 +0000441 for ($i = 0; $i < count($matches['0']); $i++)
442 {
443 $str = str_replace($matches['1'][$i],
444 $this->_html_entity_decode($matches['1'][$i], $charset),
445 $str);
446 }
447 }
448
449 /*
450 * Convert all tabs to spaces
451 *
452 * This prevents strings like this: ja vascript
453 * Note: we deal with spaces between characters later.
454 *
455 */
456 $str = preg_replace("#\t+#", " ", $str);
457
458 /*
459 * Makes PHP tags safe
460 *
461 * Note: XML tags are inadvertently replaced too:
462 *
463 * <?xml
464 *
465 * But it doesn't seem to pose a problem.
466 *
467 */
adminbc042dd2006-09-21 02:46:59 +0000468 $str = str_replace(array('<?php', '<?PHP', '<?', '?>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
adminb0dd10f2006-08-25 17:25:49 +0000469
470 /*
471 * Compact any exploded words
472 *
473 * This corrects words like: j a v a s c r i p t
474 * These words are compacted back to their correct state.
475 *
476 */
477 $words = array('javascript', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
478 foreach ($words as $word)
479 {
480 $temp = '';
481 for ($i = 0; $i < strlen($word); $i++)
482 {
483 $temp .= substr($word, $i, 1)."\s*";
484 }
485
486 $temp = substr($temp, 0, -3);
487 $str = preg_replace('#'.$temp.'#s', $word, $str);
488 $str = preg_replace('#'.ucfirst($temp).'#s', ucfirst($word), $str);
489 }
490
491 /*
492 * Remove disallowed Javascript in links or img tags
493 */
494 $str = preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $str);
495 $str = preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $str);
496 $str = preg_replace("#<(script|xss).*?\>#si", "", $str);
497
498 /*
499 * Remove JavaScript Event Handlers
500 *
501 * Note: This code is a little blunt. It removes
502 * the event handler and anything up to the closing >,
503 * but it's unlkely to be a problem.
504 *
505 */
506 $str = preg_replace('#(<[^>]+.*?)(onblur|onchange|onclick|onfocus|onload|onmouseover|onmouseup|onmousedown|onselect|onsubmit|onunload|onkeypress|onkeydown|onkeyup|onresize)[^>]*>#iU',"\\1>",$str);
507
508 /*
509 * Sanitize naughty HTML elements
510 *
511 * If a tag containing any of the words in the list
512 * below is found, the tag gets converted to entities.
513 *
514 * So this: <blink>
515 * Becomes: &lt;blink&gt;
516 *
517 */
518 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
519
520 /*
521 * Sanitize naughty scripting elements
522 *
523 * Similar to above, only instead of looking for
524 * tags it looks for PHP and JavaScript commands
525 * that are disallowed. Rather than removing the
526 * code, it simply converts the parenthesis to entities
527 * rendering the code unexecutable.
528 *
529 * For example: eval('some code')
530 * Becomes: eval&#40;'some code'&#41;
531 *
532 */
533 $str = preg_replace('#(alert|cmd|passthru|eval|exec|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
534
535 /*
536 * Final clean up
537 *
538 * This adds a bit of extra precaution in case
539 * something got through the above filters
540 *
541 */
542 $bad = array(
543 'document.cookie' => '',
544 'document.write' => '',
545 'window.location' => '',
546 "javascript\s*:" => '',
547 "Redirect\s+302" => '',
548 '<!--' => '&lt;!--',
549 '-->' => '--&gt;'
550 );
551
552 foreach ($bad as $key => $val)
553 {
554 $str = preg_replace("#".$key."#i", $val, $str);
555 }
556
admin04ea44e2006-10-03 19:17:59 +0000557
adminb0dd10f2006-08-25 17:25:49 +0000558 log_message('debug', "XSS Filtering completed");
559 return $str;
560 }
adminb0dd10f2006-08-25 17:25:49 +0000561
admin04ea44e2006-10-03 19:17:59 +0000562 // --------------------------------------------------------------------
adminb0dd10f2006-08-25 17:25:49 +0000563
564 /**
565 * HTML Entities Decode
566 *
567 * This function is a replacement for html_entity_decode()
568 *
569 * In some versions of PHP the native function does not work
570 * when UTF-8 is the specified character set, so this gives us
571 * a work-around. More info here:
572 * http://bugs.php.net/bug.php?id=25670
573 *
574 * @access private
575 * @param string
576 * @param string
577 * @return string
578 */
579 /* -------------------------------------------------
admin04ea44e2006-10-03 19:17:59 +0000580 /* Replacement for html_entity_decode()
581 /* -------------------------------------------------*/
582
583 /*
584 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
585 character set, and the PHP developers said they were not back porting the
586 fix to versions other than PHP 5.x.
587 */
adminb0dd10f2006-08-25 17:25:49 +0000588 function _html_entity_decode($str, $charset='ISO-8859-1')
589 {
590 if (stristr($str, '&') === FALSE) return $str;
591
592 // The reason we are not using html_entity_decode() by itself is because
593 // while it is not technically correct to leave out the semicolon
594 // at the end of an entity most browsers will still interpret the entity
595 // correctly. html_entity_decode() does not convert entities without
596 // semicolons, so we are left with our own little solution here. Bummer.
597
598 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
599 {
600 $str = html_entity_decode($str, ENT_COMPAT, $charset);
601 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
602 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
603 }
604
605 // Numeric Entities
606 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
607 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
608
609 // Literal Entities - Slightly slow so we do another check
610 if (stristr($str, '&') === FALSE)
611 {
612 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
613 }
614
615 return $str;
616 }
617
618}
619// END Input class
620?>