blob: 8408b16fc3877eda6cfaedaa908a707f6359a265 [file] [log] [blame]
adminb0dd10f2006-08-25 17:25:49 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * Code Igniter
4 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author Rick Ellis
9 * @copyright Copyright (c) 2006, pMachine, Inc.
admine334c472006-10-21 19:44:22 +000010 * @license http://www.codeignitor.com/user_guide/license.html
adminb0dd10f2006-08-25 17:25:49 +000011 * @link http://www.codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
admine334c472006-10-21 19:44:22 +000015
adminb0dd10f2006-08-25 17:25:49 +000016// ------------------------------------------------------------------------
17
18/**
19 * Input Class
admine334c472006-10-21 19:44:22 +000020 *
adminb0dd10f2006-08-25 17:25:49 +000021 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
26 * @author Rick Ellis
27 * @link http://www.codeigniter.com/user_guide/libraries/input.html
28 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
31 var $ip_address = FALSE;
32 var $user_agent = FALSE;
33 var $allow_get_array = FALSE;
34
35 /**
36 * Constructor
37 *
38 * Sets whether to globally enable the XSS processing
39 * and whether to allow the $_GET array
40 *
41 * @access public
42 */
43 function CI_Input()
44 {
admin04ea44e2006-10-03 19:17:59 +000045 log_message('debug', "Input Class Initialized");
46
admin7099a582006-10-10 17:47:59 +000047 $CFG =& load_class('Config');
adminb0dd10f2006-08-25 17:25:49 +000048 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
admin04ea44e2006-10-03 19:17:59 +000049 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
adminb0dd10f2006-08-25 17:25:49 +000050 $this->_sanitize_globals();
51 }
adminb0dd10f2006-08-25 17:25:49 +000052
53 // --------------------------------------------------------------------
54
55 /**
56 * Sanitize Globals
57 *
adminbd6bee72006-10-21 19:39:00 +000058 * This function does the following:
adminb0dd10f2006-08-25 17:25:49 +000059 *
60 * Unsets $_GET data (if query strings are not enabled)
61 *
62 * Unsets all globals if register_globals is enabled
63 *
64 * Standardizes newline characters to \n
65 *
66 * @access private
67 * @return void
68 */
69 function _sanitize_globals()
70 {
71 // Unset globals. This is effectively the same as register_globals = off
72 foreach (array($_GET, $_POST, $_COOKIE) as $global)
73 {
74 if ( ! is_array($global))
75 {
76 unset($$global);
77 }
78 else
79 {
80 foreach ($global as $key => $val)
81 {
82 unset($$key);
admin04ea44e2006-10-03 19:17:59 +000083 }
adminb0dd10f2006-08-25 17:25:49 +000084 }
85 }
86
admin04ea44e2006-10-03 19:17:59 +000087 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
adminb0dd10f2006-08-25 17:25:49 +000088 if ($this->allow_get_array == FALSE)
89 {
90 $_GET = array();
91 }
92
93 // Clean $_POST Data
94 if (is_array($_POST) AND count($_POST) > 0)
95 {
96 foreach($_POST as $key => $val)
admin04ea44e2006-10-03 19:17:59 +000097 {
98 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
99 }
adminb0dd10f2006-08-25 17:25:49 +0000100 }
101
102 // Clean $_COOKIE Data
103 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
104 {
105 foreach($_COOKIE as $key => $val)
admine334c472006-10-21 19:44:22 +0000106 {
adminb0dd10f2006-08-25 17:25:49 +0000107 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
admin04ea44e2006-10-03 19:17:59 +0000108 }
adminb0dd10f2006-08-25 17:25:49 +0000109 }
110
111 log_message('debug', "Global POST and COOKIE data sanitized");
112 }
adminb0dd10f2006-08-25 17:25:49 +0000113
114 // --------------------------------------------------------------------
115
116 /**
adminbd6bee72006-10-21 19:39:00 +0000117 * Clean Input Data
adminb0dd10f2006-08-25 17:25:49 +0000118 *
admine334c472006-10-21 19:44:22 +0000119 * This is a helper function. It escapes data and
adminb0dd10f2006-08-25 17:25:49 +0000120 * standardizes newline characters to \n
121 *
122 * @access private
123 * @param string
124 * @return string
125 */
126 function _clean_input_data($str)
127 {
128 if (is_array($str))
129 {
130 $new_array = array();
131 foreach ($str as $key => $val)
132 {
admin04ea44e2006-10-03 19:17:59 +0000133 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
adminb0dd10f2006-08-25 17:25:49 +0000134 }
135 return $new_array;
136 }
137
138 if ($this->use_xss_clean === TRUE)
139 {
140 $str = $this->xss_clean($str);
141 }
142
admin04ea44e2006-10-03 19:17:59 +0000143 // Standardize newlines
adminb0dd10f2006-08-25 17:25:49 +0000144 return preg_replace("/\015\012|\015|\012/", "\n", $str);
145 }
adminb0dd10f2006-08-25 17:25:49 +0000146
147 // --------------------------------------------------------------------
148
149 /**
150 * Clean Keys
151 *
admine334c472006-10-21 19:44:22 +0000152 * This is a helper function. To prevent malicious users
153 * from trying to exploit keys we make sure that keys are
adminb0dd10f2006-08-25 17:25:49 +0000154 * only named with alpha-numeric text and a few other items.
155 *
156 * @access private
157 * @param string
158 * @return string
159 */
160 function _clean_input_keys($str)
admin04ea44e2006-10-03 19:17:59 +0000161 {
adminb0dd10f2006-08-25 17:25:49 +0000162 if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
admine334c472006-10-21 19:44:22 +0000163 {
adminb0dd10f2006-08-25 17:25:49 +0000164 exit('Disallowed Key Characters: '.$str);
165 }
166
167 if ( ! get_magic_quotes_gpc())
168 {
169 return addslashes($str);
170 }
171
172 return $str;
173 }
adminb0dd10f2006-08-25 17:25:49 +0000174
175 // --------------------------------------------------------------------
176
177 /**
178 * Fetch an item from the POST array
179 *
180 * @access public
181 * @param string
admin10c3f412006-10-08 07:21:12 +0000182 * @param bool
adminb0dd10f2006-08-25 17:25:49 +0000183 * @return string
184 */
185 function post($index = '', $xss_clean = FALSE)
186 {
187 if ( ! isset($_POST[$index]))
188 {
189 return FALSE;
190 }
admin04ea44e2006-10-03 19:17:59 +0000191
192 if ($xss_clean === TRUE)
adminb0dd10f2006-08-25 17:25:49 +0000193 {
admin04ea44e2006-10-03 19:17:59 +0000194 if (is_array($_POST[$index]))
adminb0dd10f2006-08-25 17:25:49 +0000195 {
admin04ea44e2006-10-03 19:17:59 +0000196 foreach($_POST[$index] as $key => $val)
197 {
198 $_POST[$index][$key] = $this->xss_clean($val);
199 }
adminb0dd10f2006-08-25 17:25:49 +0000200 }
201 else
202 {
admin04ea44e2006-10-03 19:17:59 +0000203 return $this->xss_clean($_POST[$index]);
adminb0dd10f2006-08-25 17:25:49 +0000204 }
205 }
admin04ea44e2006-10-03 19:17:59 +0000206
207 return $_POST[$index];
adminb0dd10f2006-08-25 17:25:49 +0000208 }
adminb0dd10f2006-08-25 17:25:49 +0000209
210 // --------------------------------------------------------------------
211
212 /**
213 * Fetch an item from the COOKIE array
214 *
215 * @access public
216 * @param string
admin10c3f412006-10-08 07:21:12 +0000217 * @param bool
adminb0dd10f2006-08-25 17:25:49 +0000218 * @return string
219 */
220 function cookie($index = '', $xss_clean = FALSE)
221 {
222 if ( ! isset($_COOKIE[$index]))
223 {
224 return FALSE;
225 }
admin04ea44e2006-10-03 19:17:59 +0000226
227 if ($xss_clean === TRUE)
228 {
229 if (is_array($_COOKIE[$index]))
adminb0dd10f2006-08-25 17:25:49 +0000230 {
admin04ea44e2006-10-03 19:17:59 +0000231 $cookie = array();
232 foreach($_COOKIE[$index] as $key => $val)
admin2fcd16b2006-10-03 16:41:54 +0000233 {
admin04ea44e2006-10-03 19:17:59 +0000234 $cookie[$key] = $this->xss_clean($val);
admin2fcd16b2006-10-03 16:41:54 +0000235 }
admin04ea44e2006-10-03 19:17:59 +0000236
237 return $cookie;
adminb0dd10f2006-08-25 17:25:49 +0000238 }
239 else
240 {
admin04ea44e2006-10-03 19:17:59 +0000241 return $this->xss_clean($_COOKIE[$index]);
adminb0dd10f2006-08-25 17:25:49 +0000242 }
243 }
admin04ea44e2006-10-03 19:17:59 +0000244 else
245 {
246 return $_COOKIE[$index];
247 }
adminb0dd10f2006-08-25 17:25:49 +0000248 }
admin10c3f412006-10-08 07:21:12 +0000249
250 // --------------------------------------------------------------------
251
252 /**
253 * Fetch an item from the SERVER array
254 *
255 * @access public
256 * @param string
257 * @param bool
258 * @return string
259 */
260 function server($index = '', $xss_clean = FALSE)
261 {
262 if ( ! isset($_SERVER[$index]))
263 {
264 return FALSE;
265 }
266
267 if ($xss_clean === TRUE)
268 {
269 return $this->xss_clean($_SERVER[$index]);
270 }
271
272 return $_SERVER[$index];
273 }
adminb0dd10f2006-08-25 17:25:49 +0000274
275 // --------------------------------------------------------------------
276
277 /**
278 * Fetch the IP Address
279 *
280 * @access public
281 * @return string
282 */
283 function ip_address()
284 {
285 if ($this->ip_address !== FALSE)
286 {
287 return $this->ip_address;
288 }
admin10c3f412006-10-08 07:21:12 +0000289
290 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
291 {
292 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
293 }
294 elseif ($this->server('REMOTE_ADDR'))
295 {
296 $this->ip_address = $_SERVER['REMOTE_ADDR'];
297 }
298 elseif ($this->server('HTTP_CLIENT_IP'))
299 {
300 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
301 }
302 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
303 {
304 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
305 }
306
307 if ($this->ip_address === FALSE)
308 {
admin7099a582006-10-10 17:47:59 +0000309 $this->ip_address = '0.0.0.0';
310 return $this->ip_address;
admin10c3f412006-10-08 07:21:12 +0000311 }
adminb0dd10f2006-08-25 17:25:49 +0000312
313 if (strstr($this->ip_address, ','))
314 {
315 $x = explode(',', $this->ip_address);
316 $this->ip_address = end($x);
317 }
318
319 if ( ! $this->valid_ip($this->ip_address))
320 {
321 $this->ip_address = '0.0.0.0';
322 }
admin10c3f412006-10-08 07:21:12 +0000323
adminb0dd10f2006-08-25 17:25:49 +0000324 return $this->ip_address;
325 }
adminb0dd10f2006-08-25 17:25:49 +0000326
327 // --------------------------------------------------------------------
328
329 /**
330 * Validate IP Address
331 *
332 * @access public
333 * @param string
334 * @return string
335 */
336 function valid_ip($ip)
337 {
338 return ( ! preg_match( "/^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/", $ip)) ? FALSE : TRUE;
339 }
adminb0dd10f2006-08-25 17:25:49 +0000340
341 // --------------------------------------------------------------------
342
343 /**
344 * User Agent
345 *
346 * @access public
347 * @return string
348 */
349 function user_agent()
350 {
351 if ($this->user_agent !== FALSE)
352 {
353 return $this->user_agent;
354 }
355
356 $this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
357
358 return $this->user_agent;
359 }
adminb0dd10f2006-08-25 17:25:49 +0000360
361 // --------------------------------------------------------------------
362
363 /**
364 * XSS Clean
365 *
366 * Sanitizes data so that Cross Site Scripting Hacks can be
367 * prevented.Ê This function does a fair amount of work but
368 * it is extremely thorough, designed to prevent even the
369 * most obscure XSS attempts.Ê Nothing is ever 100% foolproof,
370 * of course, but I haven't been able to get anything passed
371 * the filter.
372 *
373 * Note: This function should only be used to deal with data
374 * upon submission.Ê It's not something that should
375 * be used for general runtime processing.
376 *
377 * This function was based in part on some code and ideas I
378 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
379 *
380 * To help develop this script I used this great list of
admine334c472006-10-21 19:44:22 +0000381 * vulnerabilities along with a few other hacks I've
adminb0dd10f2006-08-25 17:25:49 +0000382 * harvested from examining vulnerabilities in other programs:
383 * http://ha.ckers.org/xss.html
384 *
385 * @access public
386 * @param string
387 * @return string
388 */
389 function xss_clean($str, $charset = 'ISO-8859-1')
390 {
391 /*
392 * Remove Null Characters
393 *
394 * This prevents sandwiching null characters
395 * between ascii characters, like Java\0script.
396 *
397 */
398 $str = preg_replace('/\0+/', '', $str);
399 $str = preg_replace('/(\\\\0)+/', '', $str);
400
401 /*
adminbd6bee72006-10-21 19:39:00 +0000402 * Validate standard character entities
adminb0dd10f2006-08-25 17:25:49 +0000403 *
404 * Add a semicolon if missing. We do this to enable
405 * the conversion of entities to ASCII later.
406 *
407 */
408 $str = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u',"\\1;",$str);
409
410 /*
admine334c472006-10-21 19:44:22 +0000411 * Validate UTF16 two byte encoding (x00)
adminb0dd10f2006-08-25 17:25:49 +0000412 *
413 * Just as above, adds a semicolon if missing.
414 *
415 */
416 $str = preg_replace('#(&\#x*)([0-9A-F]+);*#iu',"\\1\\2;",$str);
417
418 /*
419 * URL Decode
420 *
421 * Just in case stuff like this is submitted:
422 *
423 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
424 *
425 * Note: Normally urldecode() would be easier but it removes plus signs
426 *
427 */
428 $str = preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str);
admin04ea44e2006-10-03 19:17:59 +0000429 $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str);
430
adminb0dd10f2006-08-25 17:25:49 +0000431 /*
admine334c472006-10-21 19:44:22 +0000432 * Convert character entities to ASCII
adminb0dd10f2006-08-25 17:25:49 +0000433 *
434 * This permits our tests below to work reliably.
435 * We only convert entities that are within tags since
436 * these are the ones that will pose security problems.
437 *
438 */
admine334c472006-10-21 19:44:22 +0000439
admin04ea44e2006-10-03 19:17:59 +0000440 if (preg_match_all("/<(.+?)>/si", $str, $matches))
441 {
adminb0dd10f2006-08-25 17:25:49 +0000442 for ($i = 0; $i < count($matches['0']); $i++)
443 {
admine334c472006-10-21 19:44:22 +0000444 $str = str_replace($matches['1'][$i],
445 $this->_html_entity_decode($matches['1'][$i], $charset),
adminb0dd10f2006-08-25 17:25:49 +0000446 $str);
447 }
448 }
449
450 /*
451 * Convert all tabs to spaces
452 *
453 * This prevents strings like this: ja vascript
454 * Note: we deal with spaces between characters later.
455 *
456 */
457 $str = preg_replace("#\t+#", " ", $str);
458
459 /*
460 * Makes PHP tags safe
461 *
462 * Note: XML tags are inadvertently replaced too:
463 *
464 * <?xml
465 *
466 * But it doesn't seem to pose a problem.
467 *
468 */
adminbc042dd2006-09-21 02:46:59 +0000469 $str = str_replace(array('<?php', '<?PHP', '<?', '?>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
adminb0dd10f2006-08-25 17:25:49 +0000470
471 /*
472 * Compact any exploded words
473 *
474 * This corrects words like: j a v a s c r i p t
475 * These words are compacted back to their correct state.
476 *
477 */
478 $words = array('javascript', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
479 foreach ($words as $word)
480 {
481 $temp = '';
482 for ($i = 0; $i < strlen($word); $i++)
483 {
484 $temp .= substr($word, $i, 1)."\s*";
485 }
486
487 $temp = substr($temp, 0, -3);
488 $str = preg_replace('#'.$temp.'#s', $word, $str);
489 $str = preg_replace('#'.ucfirst($temp).'#s', ucfirst($word), $str);
490 }
491
492 /*
493 * Remove disallowed Javascript in links or img tags
494 */
495 $str = preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $str);
496 $str = preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $str);
497 $str = preg_replace("#<(script|xss).*?\>#si", "", $str);
498
499 /*
500 * Remove JavaScript Event Handlers
501 *
502 * Note: This code is a little blunt. It removes
admine334c472006-10-21 19:44:22 +0000503 * the event handler and anything up to the closing >,
adminbd6bee72006-10-21 19:39:00 +0000504 * but it's unlikely to be a problem.
adminb0dd10f2006-08-25 17:25:49 +0000505 *
506 */
507 $str = preg_replace('#(<[^>]+.*?)(onblur|onchange|onclick|onfocus|onload|onmouseover|onmouseup|onmousedown|onselect|onsubmit|onunload|onkeypress|onkeydown|onkeyup|onresize)[^>]*>#iU',"\\1>",$str);
508
509 /*
510 * Sanitize naughty HTML elements
511 *
admine334c472006-10-21 19:44:22 +0000512 * If a tag containing any of the words in the list
adminb0dd10f2006-08-25 17:25:49 +0000513 * below is found, the tag gets converted to entities.
514 *
515 * So this: <blink>
516 * Becomes: &lt;blink&gt;
517 *
518 */
519 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
520
521 /*
522 * Sanitize naughty scripting elements
523 *
524 * Similar to above, only instead of looking for
525 * tags it looks for PHP and JavaScript commands
526 * that are disallowed. Rather than removing the
527 * code, it simply converts the parenthesis to entities
adminbd6bee72006-10-21 19:39:00 +0000528 * rendering the code un-executable.
adminb0dd10f2006-08-25 17:25:49 +0000529 *
530 * For example: eval('some code')
531 * Becomes: eval&#40;'some code'&#41;
532 *
533 */
534 $str = preg_replace('#(alert|cmd|passthru|eval|exec|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
535
536 /*
537 * Final clean up
538 *
539 * This adds a bit of extra precaution in case
540 * something got through the above filters
541 *
542 */
543 $bad = array(
544 'document.cookie' => '',
545 'document.write' => '',
546 'window.location' => '',
547 "javascript\s*:" => '',
548 "Redirect\s+302" => '',
549 '<!--' => '&lt;!--',
550 '-->' => '--&gt;'
551 );
552
553 foreach ($bad as $key => $val)
554 {
admine334c472006-10-21 19:44:22 +0000555 $str = preg_replace("#".$key."#i", $val, $str);
adminb0dd10f2006-08-25 17:25:49 +0000556 }
557
admin04ea44e2006-10-03 19:17:59 +0000558
adminb0dd10f2006-08-25 17:25:49 +0000559 log_message('debug', "XSS Filtering completed");
560 return $str;
561 }
adminb0dd10f2006-08-25 17:25:49 +0000562
admin04ea44e2006-10-03 19:17:59 +0000563 // --------------------------------------------------------------------
adminb0dd10f2006-08-25 17:25:49 +0000564
565 /**
566 * HTML Entities Decode
567 *
568 * This function is a replacement for html_entity_decode()
569 *
570 * In some versions of PHP the native function does not work
571 * when UTF-8 is the specified character set, so this gives us
572 * a work-around. More info here:
573 * http://bugs.php.net/bug.php?id=25670
574 *
575 * @access private
576 * @param string
577 * @param string
578 * @return string
579 */
580 /* -------------------------------------------------
admin04ea44e2006-10-03 19:17:59 +0000581 /* Replacement for html_entity_decode()
582 /* -------------------------------------------------*/
583
584 /*
admine334c472006-10-21 19:44:22 +0000585 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
admin04ea44e2006-10-03 19:17:59 +0000586 character set, and the PHP developers said they were not back porting the
587 fix to versions other than PHP 5.x.
588 */
admine334c472006-10-21 19:44:22 +0000589 function _html_entity_decode($str, $charset='ISO-8859-1')
adminb0dd10f2006-08-25 17:25:49 +0000590 {
591 if (stristr($str, '&') === FALSE) return $str;
592
593 // The reason we are not using html_entity_decode() by itself is because
594 // while it is not technically correct to leave out the semicolon
595 // at the end of an entity most browsers will still interpret the entity
596 // correctly. html_entity_decode() does not convert entities without
597 // semicolons, so we are left with our own little solution here. Bummer.
598
599 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
600 {
601 $str = html_entity_decode($str, ENT_COMPAT, $charset);
602 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
603 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
604 }
605
606 // Numeric Entities
607 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
608 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
609
610 // Literal Entities - Slightly slow so we do another check
611 if (stristr($str, '&') === FALSE)
612 {
613 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
614 }
615
616 return $str;
617 }
618
619}
620// END Input class
621?>