blob: 9b012d320e233eb1fbde5a5d4c37f8d449629ddc [file] [log] [blame]
Derek Allarda72b60d2007-01-31 23:56:11 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
Derek Allardd2df9bc2007-04-15 17:41:17 +00003 * CodeIgniter
Derek Allarda72b60d2007-01-31 23:56:11 +00004 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
Derek Allard3d879d52008-01-18 19:41:32 +00008 * @author ExpressionEngine Dev Team
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Jones7a9193a2008-01-21 18:39:20 +000010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
Derek Allarda72b60d2007-01-31 23:56:11 +000012 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
Derek Allard3d879d52008-01-18 19:41:32 +000026 * @author ExpressionEngine Dev Team
Derek Jones7a9193a2008-01-21 18:39:20 +000027 * @link http://codeigniter.com/user_guide/libraries/input.html
Derek Allarda72b60d2007-01-31 23:56:11 +000028 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
Derek Jones53437de2008-05-12 18:07:08 +000031 var $xss_hash = '';
Derek Allarda72b60d2007-01-31 23:56:11 +000032 var $ip_address = FALSE;
33 var $user_agent = FALSE;
34 var $allow_get_array = FALSE;
35
36 /**
37 * Constructor
38 *
39 * Sets whether to globally enable the XSS processing
40 * and whether to allow the $_GET array
41 *
42 * @access public
43 */
44 function CI_Input()
45 {
46 log_message('debug', "Input Class Initialized");
47
48 $CFG =& load_class('Config');
49 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
50 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
51 $this->_sanitize_globals();
52 }
53
54 // --------------------------------------------------------------------
55
56 /**
57 * Sanitize Globals
58 *
59 * This function does the following:
60 *
61 * Unsets $_GET data (if query strings are not enabled)
62 *
63 * Unsets all globals if register_globals is enabled
64 *
65 * Standardizes newline characters to \n
66 *
67 * @access private
68 * @return void
69 */
70 function _sanitize_globals()
71 {
Derek Jonesd85a11e2008-01-24 20:48:07 +000072 // Would kind of be "wrong" to unset any of these GLOBALS
73 $protected = array('_SERVER', '_GET', '_POST', '_FILES', '_REQUEST', '_SESSION', '_ENV', 'GLOBALS', 'HTTP_RAW_POST_DATA',
74 'system_folder', 'application_folder', 'BM', 'EXT', 'CFG', 'URI', 'RTR', 'OUT', 'IN');
paulburdick8816aaa2007-06-27 23:07:36 +000075
Derek Jones0ea06fd2008-02-05 15:23:51 +000076 // Unset globals for security.
Rick Ellisbb2041d2007-06-09 00:16:13 +000077 // This is effectively the same as register_globals = off
Derek Jones0ea06fd2008-02-05 15:23:51 +000078 foreach (array($_GET, $_POST, $_COOKIE, $_SERVER, $_FILES, $_ENV, (isset($_SESSION) && is_array($_SESSION)) ? $_SESSION : array()) as $global)
Derek Allarda72b60d2007-01-31 23:56:11 +000079 {
Derek Allard73274992008-05-05 16:39:18 +000080 if (! is_array($global))
Derek Allarda72b60d2007-01-31 23:56:11 +000081 {
Derek Allard73274992008-05-05 16:39:18 +000082 if (! in_array($global, $protected))
paulburdick8816aaa2007-06-27 23:07:36 +000083 {
Derek Jones0ea06fd2008-02-05 15:23:51 +000084 unset($GLOBALS[$global]);
paulburdick8816aaa2007-06-27 23:07:36 +000085 }
Derek Allarda72b60d2007-01-31 23:56:11 +000086 }
87 else
88 {
89 foreach ($global as $key => $val)
90 {
Derek Allard73274992008-05-05 16:39:18 +000091 if (! in_array($key, $protected))
paulburdick8816aaa2007-06-27 23:07:36 +000092 {
Derek Jones0ea06fd2008-02-05 15:23:51 +000093 unset($GLOBALS[$key]);
94 }
95
96 if (is_array($val))
97 {
98 foreach($val as $k => $v)
99 {
Derek Allard73274992008-05-05 16:39:18 +0000100 if (! in_array($k, $protected))
Derek Jones0ea06fd2008-02-05 15:23:51 +0000101 {
102 unset($GLOBALS[$k]);
103 }
104 }
paulburdick8816aaa2007-06-27 23:07:36 +0000105 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000106 }
107 }
108 }
109
110 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
111 if ($this->allow_get_array == FALSE)
112 {
113 $_GET = array();
114 }
Rick Ellis112569d2007-02-26 19:19:08 +0000115 else
116 {
117 if (is_array($_GET) AND count($_GET) > 0)
118 {
119 foreach($_GET as $key => $val)
120 {
121 $_GET[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
122 }
123 }
124 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000125
126 // Clean $_POST Data
127 if (is_array($_POST) AND count($_POST) > 0)
128 {
129 foreach($_POST as $key => $val)
130 {
131 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
132 }
133 }
134
135 // Clean $_COOKIE Data
136 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
137 {
138 foreach($_COOKIE as $key => $val)
139 {
140 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
141 }
142 }
143
144 log_message('debug', "Global POST and COOKIE data sanitized");
145 }
146
147 // --------------------------------------------------------------------
148
149 /**
150 * Clean Input Data
151 *
152 * This is a helper function. It escapes data and
153 * standardizes newline characters to \n
154 *
155 * @access private
156 * @param string
157 * @return string
158 */
159 function _clean_input_data($str)
160 {
161 if (is_array($str))
162 {
163 $new_array = array();
164 foreach ($str as $key => $val)
165 {
166 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
167 }
168 return $new_array;
169 }
170
Rick Ellisbb2041d2007-06-09 00:16:13 +0000171 // We strip slashes if magic quotes is on to keep things consistent
172 if (get_magic_quotes_gpc())
173 {
174 $str = stripslashes($str);
175 }
176
177 // Should we filter the input data?
Derek Allarda72b60d2007-01-31 23:56:11 +0000178 if ($this->use_xss_clean === TRUE)
179 {
180 $str = $this->xss_clean($str);
181 }
182
183 // Standardize newlines
184 return preg_replace("/\015\012|\015|\012/", "\n", $str);
185 }
186
187 // --------------------------------------------------------------------
188
189 /**
190 * Clean Keys
191 *
192 * This is a helper function. To prevent malicious users
193 * from trying to exploit keys we make sure that keys are
194 * only named with alpha-numeric text and a few other items.
195 *
196 * @access private
197 * @param string
198 * @return string
199 */
200 function _clean_input_keys($str)
201 {
Derek Allard73274992008-05-05 16:39:18 +0000202 if (! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
Derek Allarda72b60d2007-01-31 23:56:11 +0000203 {
204 exit('Disallowed Key Characters.');
205 }
Rick Ellisbb2041d2007-06-09 00:16:13 +0000206
Derek Allarda72b60d2007-01-31 23:56:11 +0000207 return $str;
208 }
Rick Ellis112569d2007-02-26 19:19:08 +0000209
210 // --------------------------------------------------------------------
211
212 /**
213 * Fetch an item from the GET array
214 *
215 * @access public
216 * @param string
217 * @param bool
218 * @return string
219 */
Derek Allard87d1eeb2007-03-01 13:20:43 +0000220 function get($index = '', $xss_clean = FALSE)
Rick Ellis112569d2007-02-26 19:19:08 +0000221 {
Derek Allard73274992008-05-05 16:39:18 +0000222 if (! isset($_GET[$index]))
Rick Ellis112569d2007-02-26 19:19:08 +0000223 {
224 return FALSE;
225 }
226
227 if ($xss_clean === TRUE)
228 {
229 if (is_array($_GET[$index]))
230 {
231 foreach($_GET[$index] as $key => $val)
232 {
233 $_GET[$index][$key] = $this->xss_clean($val);
234 }
235 }
236 else
237 {
238 return $this->xss_clean($_GET[$index]);
239 }
240 }
241
242 return $_GET[$index];
243 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000244
245 // --------------------------------------------------------------------
246
247 /**
248 * Fetch an item from the POST array
249 *
250 * @access public
251 * @param string
252 * @param bool
253 * @return string
254 */
255 function post($index = '', $xss_clean = FALSE)
256 {
Derek Allard73274992008-05-05 16:39:18 +0000257 if (! isset($_POST[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000258 {
259 return FALSE;
260 }
261
262 if ($xss_clean === TRUE)
263 {
264 if (is_array($_POST[$index]))
265 {
266 foreach($_POST[$index] as $key => $val)
267 {
268 $_POST[$index][$key] = $this->xss_clean($val);
269 }
270 }
271 else
272 {
273 return $this->xss_clean($_POST[$index]);
274 }
275 }
276
277 return $_POST[$index];
278 }
279
280 // --------------------------------------------------------------------
281
282 /**
283 * Fetch an item from the COOKIE array
284 *
285 * @access public
286 * @param string
287 * @param bool
288 * @return string
289 */
290 function cookie($index = '', $xss_clean = FALSE)
291 {
Derek Allard73274992008-05-05 16:39:18 +0000292 if (! isset($_COOKIE[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000293 {
294 return FALSE;
295 }
296
297 if ($xss_clean === TRUE)
298 {
299 if (is_array($_COOKIE[$index]))
300 {
301 $cookie = array();
302 foreach($_COOKIE[$index] as $key => $val)
303 {
304 $cookie[$key] = $this->xss_clean($val);
305 }
306
307 return $cookie;
308 }
309 else
310 {
311 return $this->xss_clean($_COOKIE[$index]);
312 }
313 }
314 else
315 {
316 return $_COOKIE[$index];
317 }
318 }
319
320 // --------------------------------------------------------------------
321
322 /**
323 * Fetch an item from the SERVER array
324 *
325 * @access public
326 * @param string
327 * @param bool
328 * @return string
329 */
330 function server($index = '', $xss_clean = FALSE)
331 {
Derek Allard73274992008-05-05 16:39:18 +0000332 if (! isset($_SERVER[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000333 {
334 return FALSE;
335 }
336
337 if ($xss_clean === TRUE)
338 {
339 return $this->xss_clean($_SERVER[$index]);
340 }
341
342 return $_SERVER[$index];
343 }
344
345 // --------------------------------------------------------------------
346
347 /**
348 * Fetch the IP Address
349 *
350 * @access public
351 * @return string
352 */
353 function ip_address()
354 {
355 if ($this->ip_address !== FALSE)
356 {
357 return $this->ip_address;
358 }
359
360 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
361 {
362 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
363 }
364 elseif ($this->server('REMOTE_ADDR'))
365 {
366 $this->ip_address = $_SERVER['REMOTE_ADDR'];
367 }
368 elseif ($this->server('HTTP_CLIENT_IP'))
369 {
370 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
371 }
372 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
373 {
374 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
375 }
376
377 if ($this->ip_address === FALSE)
378 {
379 $this->ip_address = '0.0.0.0';
380 return $this->ip_address;
381 }
382
383 if (strstr($this->ip_address, ','))
384 {
385 $x = explode(',', $this->ip_address);
386 $this->ip_address = end($x);
387 }
388
Derek Allard73274992008-05-05 16:39:18 +0000389 if (! $this->valid_ip($this->ip_address))
Derek Allarda72b60d2007-01-31 23:56:11 +0000390 {
391 $this->ip_address = '0.0.0.0';
392 }
393
394 return $this->ip_address;
395 }
396
397 // --------------------------------------------------------------------
398
399 /**
400 * Validate IP Address
401 *
Rick Ellise666afc2007-06-11 05:03:11 +0000402 * Updated version suggested by Geert De Deckere
403 *
Derek Allarda72b60d2007-01-31 23:56:11 +0000404 * @access public
405 * @param string
406 * @return string
407 */
408 function valid_ip($ip)
409 {
Rick Ellise666afc2007-06-11 05:03:11 +0000410 $ip_segments = explode('.', $ip);
411
412 // Always 4 segments needed
413 if (count($ip_segments) != 4)
Rick Ellis112569d2007-02-26 19:19:08 +0000414 {
415 return FALSE;
416 }
Rick Ellis65e8f0e2007-06-12 03:53:21 +0000417 // IP can not start with 0
Rick Ellis39213142007-06-12 03:53:12 +0000418 if (substr($ip_segments[0], 0, 1) == '0')
Rick Ellis112569d2007-02-26 19:19:08 +0000419 {
Rick Ellise666afc2007-06-11 05:03:11 +0000420 return FALSE;
421 }
422 // Check each segment
423 foreach ($ip_segments as $segment)
424 {
425 // IP segments must be digits and can not be
426 // longer than 3 digits or greater then 255
Rick Ellisba648932007-06-12 03:39:38 +0000427 if (preg_match("/[^0-9]/", $segment) OR $segment > 255 OR strlen($segment) > 3)
Rick Ellis112569d2007-02-26 19:19:08 +0000428 {
Rick Ellise666afc2007-06-11 05:03:11 +0000429 return FALSE;
Rick Ellis112569d2007-02-26 19:19:08 +0000430 }
431 }
432
433 return TRUE;
Derek Allarda72b60d2007-01-31 23:56:11 +0000434 }
435
436 // --------------------------------------------------------------------
437
438 /**
439 * User Agent
440 *
441 * @access public
442 * @return string
443 */
444 function user_agent()
445 {
446 if ($this->user_agent !== FALSE)
447 {
448 return $this->user_agent;
449 }
450
Derek Allard73274992008-05-05 16:39:18 +0000451 $this->user_agent = (! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
Derek Allarda72b60d2007-01-31 23:56:11 +0000452
453 return $this->user_agent;
454 }
455
456 // --------------------------------------------------------------------
457
458 /**
paulburdick763064b2007-06-27 23:25:55 +0000459 * Filename Security
460 *
461 * @access public
462 * @param string
463 * @return string
464 */
465 function filename_security($str)
466 {
467 $bad = array(
468 "../",
469 "./",
470 "<!--",
471 "-->",
472 "<",
473 ">",
474 "'",
475 '"',
476 '&',
477 '$',
478 '#',
479 '{',
480 '}',
481 '[',
482 ']',
483 '=',
484 ';',
485 '?',
paulburdick763064b2007-06-27 23:25:55 +0000486 "%20",
487 "%22",
488 "%3c", // <
489 "%253c", // <
490 "%3e", // >
491 "%0e", // >
492 "%28", // (
493 "%29", // )
494 "%2528", // (
495 "%26", // &
496 "%24", // $
497 "%3f", // ?
498 "%3b", // ;
499 "%3d" // =
500 );
501
502 return stripslashes(str_replace($bad, '', $str));
503 }
504
505 // --------------------------------------------------------------------
506
507 /**
Derek Allarda72b60d2007-01-31 23:56:11 +0000508 * XSS Clean
509 *
510 * Sanitizes data so that Cross Site Scripting Hacks can be
511 * prevented.  This function does a fair amount of work but
512 * it is extremely thorough, designed to prevent even the
513 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
514 * of course, but I haven't been able to get anything passed
515 * the filter.
516 *
517 * Note: This function should only be used to deal with data
518 * upon submission.  It's not something that should
519 * be used for general runtime processing.
520 *
521 * This function was based in part on some code and ideas I
522 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
523 *
524 * To help develop this script I used this great list of
525 * vulnerabilities along with a few other hacks I've
526 * harvested from examining vulnerabilities in other programs:
527 * http://ha.ckers.org/xss.html
528 *
529 * @access public
530 * @param string
531 * @return string
532 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000533 function xss_clean($str)
Derek Jones53437de2008-05-12 18:07:08 +0000534 {
535 /*
536 * Is the string an array?
537 *
538 */
539 if (is_array($str))
540 {
541 while (list($key) = each($str))
542 {
543 $str[$key] = $this->xss_clean($str[$key]);
544 }
545
546 return $str;
547 }
548
Derek Allarda72b60d2007-01-31 23:56:11 +0000549 /*
550 * Remove Null Characters
551 *
552 * This prevents sandwiching null characters
553 * between ascii characters, like Java\0script.
554 *
555 */
556 $str = preg_replace('/\0+/', '', $str);
557 $str = preg_replace('/(\\\\0)+/', '', $str);
558
559 /*
Derek Jones53437de2008-05-12 18:07:08 +0000560 * Protect GET variables in URLs
561 */
562
563 // 901119URL5918AMP18930PROTECT8198
564
565 $str = preg_replace('|\&([a-z\_0-9]+)\=([a-z\_0-9]+)|i', $this->xss_hash()."\\1=\\2", $str);
566
567 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000568 * Validate standard character entities
569 *
570 * Add a semicolon if missing. We do this to enable
571 * the conversion of entities to ASCII later.
572 *
573 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000574 $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000575
576 /*
Derek Jones48bb32a2007-07-12 13:10:42 +0000577 * Validate UTF16 two byte encoding (x00)
Derek Allarda72b60d2007-01-31 23:56:11 +0000578 *
579 * Just as above, adds a semicolon if missing.
580 *
581 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000582 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000583
584 /*
Derek Jones53437de2008-05-12 18:07:08 +0000585 * Un-Protect GET variables in URLs
586 */
587
588 $str = str_replace($this->xss_hash(), '&', $str);
589
590 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000591 * URL Decode
592 *
593 * Just in case stuff like this is submitted:
594 *
595 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
596 *
Derek Jonesab32a422008-02-04 22:02:11 +0000597 * Note: Use rawurldecode() so it does not remove plus signs
Derek Allarda72b60d2007-01-31 23:56:11 +0000598 *
599 */
Derek Jonesab32a422008-02-04 22:02:11 +0000600 $str = rawurldecode($str);
601
Derek Allarda72b60d2007-01-31 23:56:11 +0000602 /*
Derek Jones303c9cb2007-07-12 19:12:37 +0000603 * Convert character entities to ASCII
Derek Allarda72b60d2007-01-31 23:56:11 +0000604 *
605 * This permits our tests below to work reliably.
606 * We only convert entities that are within tags since
607 * these are the ones that will pose security problems.
608 *
609 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000610
611 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
612
613 $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
614
615 /*
616
617 Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
618
619 if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches))
620 {
621 for ($i = 0; $i < count($matches[0]); $i++)
Derek Allarda72b60d2007-01-31 23:56:11 +0000622 {
Derek Jones303c9cb2007-07-12 19:12:37 +0000623 if (stristr($matches[0][$i], '>'))
624 {
625 $str = str_replace( $matches['0'][$i],
626 str_replace('>', '&lt;', $matches[0][$i]),
627 $str);
628 }
629 }
630 }
631
632 if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches))
633 {
634 for ($i = 0; $i < count($matches[0]); $i++)
635 {
636 $str = str_replace($matches[0][$i],
637 $this->_html_entity_decode($matches[0][$i], $charset),
Derek Allarda72b60d2007-01-31 23:56:11 +0000638 $str);
639 }
640 }
Derek Jones303c9cb2007-07-12 19:12:37 +0000641 */
642
Derek Jones48bb32a2007-07-12 13:10:42 +0000643 /*
644 * Convert all tabs to spaces
645 *
646 * This prevents strings like this: ja vascript
647 * NOTE: we deal with spaces between characters later.
648 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
649 * so we use str_replace.
650 *
651 */
652
653 $str = str_replace("\t", " ", $str);
654
Derek Allarda72b60d2007-01-31 23:56:11 +0000655 /*
656 * Not Allowed Under Any Conditions
657 */
658 $bad = array(
659 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000660 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000661 '.parentNode' => '[removed]',
662 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000663 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000664 '-moz-binding' => '[removed]',
665 '<!--' => '&lt;!--',
666 '-->' => '--&gt;',
667 '<!CDATA[' => '&lt;![CDATA['
668 );
669
670 foreach ($bad as $key => $val)
671 {
672 $str = str_replace($key, $val, $str);
673 }
674
675 $bad = array(
Derek Allarda72b60d2007-01-31 23:56:11 +0000676 "javascript\s*:" => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000677 "expression\s*\(" => '[removed]', // CSS and IE
Derek Jones48bb32a2007-07-12 13:10:42 +0000678 "Redirect\s+302" => '[removed]'
Derek Allarda72b60d2007-01-31 23:56:11 +0000679 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000680
Derek Allarda72b60d2007-01-31 23:56:11 +0000681 foreach ($bad as $key => $val)
682 {
683 $str = preg_replace("#".$key."#i", $val, $str);
684 }
685
686 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000687 * Makes PHP tags safe
688 *
689 * Note: XML tags are inadvertently replaced too:
690 *
691 * <?xml
692 *
693 * But it doesn't seem to pose a problem.
694 *
695 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000696 $str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000697
698 /*
699 * Compact any exploded words
700 *
701 * This corrects words like: j a v a s c r i p t
702 * These words are compacted back to their correct state.
703 *
704 */
paulburdickb614d392007-06-26 21:58:56 +0000705 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
Derek Allarda72b60d2007-01-31 23:56:11 +0000706 foreach ($words as $word)
707 {
708 $temp = '';
709 for ($i = 0; $i < strlen($word); $i++)
710 {
711 $temp .= substr($word, $i, 1)."\s*";
712 }
713
Derek Jones01f72ca2007-05-04 18:19:17 +0000714 // We only want to do this when it is followed by a non-word character
715 // That way valid stuff like "dealer to" does not become "dealerto"
716 $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000717 }
718
719 /*
720 * Remove disallowed Javascript in links or img tags
paulburdick391eb032007-06-27 22:58:24 +0000721 */
722 do
723 {
724 $original = $str;
Derek Jones48bb32a2007-07-12 13:10:42 +0000725
726 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '</a>') !== FALSE) OR
727 preg_match("/<\/a>/i", $str))
728 {
729 $str = preg_replace_callback("#<a.*?</a>#si", array($this, '_js_link_removal'), $str);
730 }
731
732 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '<img') !== FALSE) OR
733 preg_match("/img/i", $str))
734 {
735 $str = preg_replace_callback("#<img.*?".">#si", array($this, '_js_img_removal'), $str);
736 }
737
738 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && (stripos($str, 'script') !== FALSE OR stripos($str, 'xss') !== FALSE)) OR
739 preg_match("/(script|xss)/i", $str))
740 {
741 $str = preg_replace("#</*(script|xss).*?\>#si", "", $str);
742 }
paulburdick391eb032007-06-27 22:58:24 +0000743 }
744 while($original != $str);
745
746 unset($original);
Derek Allarda72b60d2007-01-31 23:56:11 +0000747
748 /*
749 * Remove JavaScript Event Handlers
750 *
751 * Note: This code is a little blunt. It removes
752 * the event handler and anything up to the closing >,
753 * but it's unlikely to be a problem.
754 *
755 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000756 $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns');
757 $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "&lt;\\1\\2\\3&gt;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000758
759 /*
760 * Sanitize naughty HTML elements
761 *
762 * If a tag containing any of the words in the list
763 * below is found, the tag gets converted to entities.
764 *
765 * So this: <blink>
766 * Becomes: &lt;blink&gt;
767 *
768 */
769 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
770
771 /*
772 * Sanitize naughty scripting elements
773 *
774 * Similar to above, only instead of looking for
775 * tags it looks for PHP and JavaScript commands
776 * that are disallowed. Rather than removing the
777 * code, it simply converts the parenthesis to entities
778 * rendering the code un-executable.
779 *
780 * For example: eval('some code')
781 * Becomes: eval&#40;'some code'&#41;
782 *
783 */
paulburdick033ef022007-06-26 21:52:52 +0000784 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000785
786 /*
787 * Final clean up
788 *
789 * This adds a bit of extra precaution in case
790 * something got through the above filters
791 *
792 */
793 $bad = array(
794 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000795 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000796 '.parentNode' => '[removed]',
797 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000798 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000799 '-moz-binding' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000800 '<!--' => '&lt;!--',
Derek Jones48bb32a2007-07-12 13:10:42 +0000801 '-->' => '--&gt;',
802 '<!CDATA[' => '&lt;![CDATA['
Derek Allarda72b60d2007-01-31 23:56:11 +0000803 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000804
Derek Allarda72b60d2007-01-31 23:56:11 +0000805 foreach ($bad as $key => $val)
806 {
Derek Jones48bb32a2007-07-12 13:10:42 +0000807 $str = str_replace($key, $val, $str);
808 }
809
810 $bad = array(
811 "javascript\s*:" => '[removed]',
812 "expression\s*\(" => '[removed]', // CSS and IE
813 "Redirect\s+302" => '[removed]'
814 );
815
816 foreach ($bad as $key => $val)
817 {
818 $str = preg_replace("#".$key."#i", $val, $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000819 }
820
821
822 log_message('debug', "XSS Filtering completed");
823 return $str;
824 }
825
826 // --------------------------------------------------------------------
Derek Jones01f72ca2007-05-04 18:19:17 +0000827
828 /**
Derek Jones53437de2008-05-12 18:07:08 +0000829 * Random Hash for protecting URLs
830 *
831 * @access public
832 * @return string
833 */
834 function xss_hash()
835 {
836 if ($this->xss_hash == '')
837 {
838 if (phpversion() >= 4.2)
839 mt_srand();
840 else
841 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
842
843 $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
844 }
845
846 return $this->xss_hash;
847 }
848
849 // --------------------------------------------------------------------
850
851 /**
Derek Jones01f72ca2007-05-04 18:19:17 +0000852 * JS Link Removal
853 *
854 * Callback function for xss_clean() to sanitize links
855 * This limits the PCRE backtracks, making it more performance friendly
856 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
857 * PHP 5.2+ on link-heavy strings
858 *
859 * @access private
860 * @param array
861 * @return string
862 */
863 function _js_link_removal($match)
864 {
865 return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
866 }
867
868 /**
869 * JS Image Removal
870 *
871 * Callback function for xss_clean() to sanitize image tags
872 * This limits the PCRE backtracks, making it more performance friendly
873 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
874 * PHP 5.2+ on image tag heavy strings
875 *
876 * @access private
877 * @param array
878 * @return string
879 */
880 function _js_img_removal($match)
881 {
882 return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
883 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000884
Derek Jones01f72ca2007-05-04 18:19:17 +0000885 // --------------------------------------------------------------------
Derek Jones303c9cb2007-07-12 19:12:37 +0000886
887 /**
888 * Attribute Conversion
889 *
890 * Used as a callback for XSS Clean
891 *
892 * @access public
893 * @param array
894 * @return string
895 */
896 function _attribute_conversion($match)
897 {
898 return str_replace('>', '&lt;', $match[0]);
899 }
900
901 // --------------------------------------------------------------------
902
903 /**
904 * HTML Entity Decode Callback
905 *
906 * Used as a callback for XSS Clean
907 *
908 * @access public
909 * @param array
910 * @return string
911 */
912 function _html_entity_decode_callback($match)
913 {
Derek Jones6159d1d2007-07-16 13:04:46 +0000914 global $CFG;
915 $charset = $CFG->item('charset');
Derek Jones303c9cb2007-07-12 19:12:37 +0000916
917 return $this->_html_entity_decode($match[0], strtoupper($charset));
918 }
919
920 // --------------------------------------------------------------------
921
Derek Allarda72b60d2007-01-31 23:56:11 +0000922 /**
923 * HTML Entities Decode
924 *
925 * This function is a replacement for html_entity_decode()
926 *
927 * In some versions of PHP the native function does not work
928 * when UTF-8 is the specified character set, so this gives us
929 * a work-around. More info here:
930 * http://bugs.php.net/bug.php?id=25670
931 *
932 * @access private
933 * @param string
934 * @param string
935 * @return string
936 */
937 /* -------------------------------------------------
938 /* Replacement for html_entity_decode()
939 /* -------------------------------------------------*/
940
941 /*
942 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
943 character set, and the PHP developers said they were not back porting the
944 fix to versions other than PHP 5.x.
945 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000946 function _html_entity_decode($str, $charset='UTF-8')
Derek Allarda72b60d2007-01-31 23:56:11 +0000947 {
948 if (stristr($str, '&') === FALSE) return $str;
Derek Jones303c9cb2007-07-12 19:12:37 +0000949
Derek Allarda72b60d2007-01-31 23:56:11 +0000950 // The reason we are not using html_entity_decode() by itself is because
951 // while it is not technically correct to leave out the semicolon
952 // at the end of an entity most browsers will still interpret the entity
953 // correctly. html_entity_decode() does not convert entities without
954 // semicolons, so we are left with our own little solution here. Bummer.
955
956 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
957 {
958 $str = html_entity_decode($str, ENT_COMPAT, $charset);
959 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
960 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
961 }
962
963 // Numeric Entities
964 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
965 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
966
967 // Literal Entities - Slightly slow so we do another check
968 if (stristr($str, '&') === FALSE)
969 {
970 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
971 }
972
973 return $str;
974 }
975
976}
977// END Input class
Derek Jones53437de2008-05-12 18:07:08 +0000978
979/* End of file Input.php */
Derek Jonesa3ffbbb2008-05-11 18:18:29 +0000980/* Location: ./system/libraries/Input.php */