blob: 978d1ff349a9c8905d0dab81ae9e34661a7d5941 [file] [log] [blame]
Derek Jones0b59f272008-05-13 04:22:33 +00001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
Derek Allarda72b60d2007-01-31 23:56:11 +00002/**
Derek Allardd2df9bc2007-04-15 17:41:17 +00003 * CodeIgniter
Derek Allarda72b60d2007-01-31 23:56:11 +00004 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
Derek Allard3d879d52008-01-18 19:41:32 +00008 * @author ExpressionEngine Dev Team
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Jones7a9193a2008-01-21 18:39:20 +000010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
Derek Allarda72b60d2007-01-31 23:56:11 +000012 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
Derek Allard3d879d52008-01-18 19:41:32 +000026 * @author ExpressionEngine Dev Team
Derek Jones7a9193a2008-01-21 18:39:20 +000027 * @link http://codeigniter.com/user_guide/libraries/input.html
Derek Allarda72b60d2007-01-31 23:56:11 +000028 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
Derek Jones53437de2008-05-12 18:07:08 +000031 var $xss_hash = '';
Derek Allarda72b60d2007-01-31 23:56:11 +000032 var $ip_address = FALSE;
33 var $user_agent = FALSE;
34 var $allow_get_array = FALSE;
Derek Allard15dcf492008-05-12 21:37:04 +000035
Derek Allarda72b60d2007-01-31 23:56:11 +000036 /**
37 * Constructor
38 *
39 * Sets whether to globally enable the XSS processing
40 * and whether to allow the $_GET array
41 *
42 * @access public
Derek Allard15dcf492008-05-12 21:37:04 +000043 */
Derek Allarda72b60d2007-01-31 23:56:11 +000044 function CI_Input()
Derek Allard15dcf492008-05-12 21:37:04 +000045 {
Derek Allarda72b60d2007-01-31 23:56:11 +000046 log_message('debug', "Input Class Initialized");
Derek Allard15dcf492008-05-12 21:37:04 +000047
Derek Allarda72b60d2007-01-31 23:56:11 +000048 $CFG =& load_class('Config');
49 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
Derek Allard15dcf492008-05-12 21:37:04 +000050 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
Derek Allarda72b60d2007-01-31 23:56:11 +000051 $this->_sanitize_globals();
52 }
Derek Allard15dcf492008-05-12 21:37:04 +000053
Derek Allarda72b60d2007-01-31 23:56:11 +000054 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +000055
Derek Allarda72b60d2007-01-31 23:56:11 +000056 /**
57 * Sanitize Globals
58 *
59 * This function does the following:
60 *
61 * Unsets $_GET data (if query strings are not enabled)
62 *
63 * Unsets all globals if register_globals is enabled
64 *
65 * Standardizes newline characters to \n
66 *
67 * @access private
68 * @return void
69 */
70 function _sanitize_globals()
71 {
Derek Jonesd85a11e2008-01-24 20:48:07 +000072 // Would kind of be "wrong" to unset any of these GLOBALS
73 $protected = array('_SERVER', '_GET', '_POST', '_FILES', '_REQUEST', '_SESSION', '_ENV', 'GLOBALS', 'HTTP_RAW_POST_DATA',
74 'system_folder', 'application_folder', 'BM', 'EXT', 'CFG', 'URI', 'RTR', 'OUT', 'IN');
Derek Allard15dcf492008-05-12 21:37:04 +000075
Derek Jones0ea06fd2008-02-05 15:23:51 +000076 // Unset globals for security.
Rick Ellisbb2041d2007-06-09 00:16:13 +000077 // This is effectively the same as register_globals = off
Derek Jones0ea06fd2008-02-05 15:23:51 +000078 foreach (array($_GET, $_POST, $_COOKIE, $_SERVER, $_FILES, $_ENV, (isset($_SESSION) && is_array($_SESSION)) ? $_SESSION : array()) as $global)
Derek Allarda72b60d2007-01-31 23:56:11 +000079 {
Derek Jones0b59f272008-05-13 04:22:33 +000080 if ( ! is_array($global))
Derek Allarda72b60d2007-01-31 23:56:11 +000081 {
Derek Jones0b59f272008-05-13 04:22:33 +000082 if ( ! in_array($global, $protected))
paulburdick8816aaa2007-06-27 23:07:36 +000083 {
Derek Jones0ea06fd2008-02-05 15:23:51 +000084 unset($GLOBALS[$global]);
paulburdick8816aaa2007-06-27 23:07:36 +000085 }
Derek Allarda72b60d2007-01-31 23:56:11 +000086 }
87 else
88 {
89 foreach ($global as $key => $val)
90 {
Derek Jones0b59f272008-05-13 04:22:33 +000091 if ( ! in_array($key, $protected))
paulburdick8816aaa2007-06-27 23:07:36 +000092 {
Derek Jones0ea06fd2008-02-05 15:23:51 +000093 unset($GLOBALS[$key]);
94 }
Derek Allard15dcf492008-05-12 21:37:04 +000095
Derek Jones0ea06fd2008-02-05 15:23:51 +000096 if (is_array($val))
97 {
98 foreach($val as $k => $v)
99 {
Derek Jones0b59f272008-05-13 04:22:33 +0000100 if ( ! in_array($k, $protected))
Derek Jones0ea06fd2008-02-05 15:23:51 +0000101 {
102 unset($GLOBALS[$k]);
103 }
104 }
paulburdick8816aaa2007-06-27 23:07:36 +0000105 }
Derek Allard15dcf492008-05-12 21:37:04 +0000106 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000107 }
108 }
109
110 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
111 if ($this->allow_get_array == FALSE)
112 {
113 $_GET = array();
114 }
Rick Ellis112569d2007-02-26 19:19:08 +0000115 else
116 {
117 if (is_array($_GET) AND count($_GET) > 0)
118 {
119 foreach($_GET as $key => $val)
120 {
121 $_GET[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
122 }
123 }
124 }
Derek Allard15dcf492008-05-12 21:37:04 +0000125
Derek Allarda72b60d2007-01-31 23:56:11 +0000126 // Clean $_POST Data
127 if (is_array($_POST) AND count($_POST) > 0)
128 {
129 foreach($_POST as $key => $val)
Derek Allard15dcf492008-05-12 21:37:04 +0000130 {
Derek Allarda72b60d2007-01-31 23:56:11 +0000131 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
Derek Allard15dcf492008-05-12 21:37:04 +0000132 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000133 }
Derek Allard15dcf492008-05-12 21:37:04 +0000134
Derek Allarda72b60d2007-01-31 23:56:11 +0000135 // Clean $_COOKIE Data
136 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
137 {
138 foreach($_COOKIE as $key => $val)
Derek Allard15dcf492008-05-12 21:37:04 +0000139 {
Derek Allarda72b60d2007-01-31 23:56:11 +0000140 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
Derek Allard15dcf492008-05-12 21:37:04 +0000141 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000142 }
Derek Allard15dcf492008-05-12 21:37:04 +0000143
Derek Allarda72b60d2007-01-31 23:56:11 +0000144 log_message('debug', "Global POST and COOKIE data sanitized");
Derek Allard15dcf492008-05-12 21:37:04 +0000145 }
146
Derek Allarda72b60d2007-01-31 23:56:11 +0000147 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000148
Derek Allarda72b60d2007-01-31 23:56:11 +0000149 /**
150 * Clean Input Data
151 *
152 * This is a helper function. It escapes data and
153 * standardizes newline characters to \n
154 *
155 * @access private
156 * @param string
157 * @return string
Derek Allard15dcf492008-05-12 21:37:04 +0000158 */
Derek Allarda72b60d2007-01-31 23:56:11 +0000159 function _clean_input_data($str)
160 {
161 if (is_array($str))
162 {
163 $new_array = array();
164 foreach ($str as $key => $val)
165 {
166 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
167 }
168 return $new_array;
169 }
Derek Allard15dcf492008-05-12 21:37:04 +0000170
Rick Ellisbb2041d2007-06-09 00:16:13 +0000171 // We strip slashes if magic quotes is on to keep things consistent
172 if (get_magic_quotes_gpc())
173 {
174 $str = stripslashes($str);
175 }
Derek Allard15dcf492008-05-12 21:37:04 +0000176
Rick Ellisbb2041d2007-06-09 00:16:13 +0000177 // Should we filter the input data?
Derek Allarda72b60d2007-01-31 23:56:11 +0000178 if ($this->use_xss_clean === TRUE)
179 {
180 $str = $this->xss_clean($str);
181 }
Derek Allard15dcf492008-05-12 21:37:04 +0000182
Derek Allarda72b60d2007-01-31 23:56:11 +0000183 // Standardize newlines
Derek Jones0b59f272008-05-13 04:22:33 +0000184 if (strpos($str, "\r") !== FALSE)
185 {
186 $str = str_replace(array("\r\n", "\r"), "\n", $str);
187 }
188
189 return $str;
Derek Allarda72b60d2007-01-31 23:56:11 +0000190 }
Derek Allard15dcf492008-05-12 21:37:04 +0000191
Derek Allarda72b60d2007-01-31 23:56:11 +0000192 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000193
Derek Allarda72b60d2007-01-31 23:56:11 +0000194 /**
195 * Clean Keys
196 *
197 * This is a helper function. To prevent malicious users
198 * from trying to exploit keys we make sure that keys are
199 * only named with alpha-numeric text and a few other items.
200 *
201 * @access private
202 * @param string
203 * @return string
204 */
205 function _clean_input_keys($str)
Derek Allard15dcf492008-05-12 21:37:04 +0000206 {
Derek Jones0b59f272008-05-13 04:22:33 +0000207 if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
Derek Allarda72b60d2007-01-31 23:56:11 +0000208 {
209 exit('Disallowed Key Characters.');
210 }
Rick Ellisbb2041d2007-06-09 00:16:13 +0000211
Derek Allarda72b60d2007-01-31 23:56:11 +0000212 return $str;
213 }
Rick Ellis112569d2007-02-26 19:19:08 +0000214
215 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000216
Rick Ellis112569d2007-02-26 19:19:08 +0000217 /**
218 * Fetch an item from the GET array
219 *
220 * @access public
221 * @param string
222 * @param bool
223 * @return string
224 */
Derek Allard87d1eeb2007-03-01 13:20:43 +0000225 function get($index = '', $xss_clean = FALSE)
Derek Allard15dcf492008-05-12 21:37:04 +0000226 {
Derek Jones0b59f272008-05-13 04:22:33 +0000227 if ( ! isset($_GET[$index]))
Rick Ellis112569d2007-02-26 19:19:08 +0000228 {
229 return FALSE;
230 }
231
232 if ($xss_clean === TRUE)
233 {
234 if (is_array($_GET[$index]))
235 {
236 foreach($_GET[$index] as $key => $val)
Derek Allard15dcf492008-05-12 21:37:04 +0000237 {
Rick Ellis112569d2007-02-26 19:19:08 +0000238 $_GET[$index][$key] = $this->xss_clean($val);
239 }
240 }
241 else
242 {
243 return $this->xss_clean($_GET[$index]);
244 }
245 }
246
247 return $_GET[$index];
248 }
Derek Allard15dcf492008-05-12 21:37:04 +0000249
Derek Allarda72b60d2007-01-31 23:56:11 +0000250 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000251
Derek Allarda72b60d2007-01-31 23:56:11 +0000252 /**
253 * Fetch an item from the POST array
254 *
255 * @access public
256 * @param string
257 * @param bool
258 * @return string
259 */
260 function post($index = '', $xss_clean = FALSE)
Derek Allard15dcf492008-05-12 21:37:04 +0000261 {
Derek Jones0b59f272008-05-13 04:22:33 +0000262 if ( ! isset($_POST[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000263 {
264 return FALSE;
265 }
266
267 if ($xss_clean === TRUE)
268 {
269 if (is_array($_POST[$index]))
270 {
271 foreach($_POST[$index] as $key => $val)
Derek Allard15dcf492008-05-12 21:37:04 +0000272 {
Derek Allarda72b60d2007-01-31 23:56:11 +0000273 $_POST[$index][$key] = $this->xss_clean($val);
274 }
275 }
276 else
277 {
278 return $this->xss_clean($_POST[$index]);
279 }
280 }
281
282 return $_POST[$index];
283 }
Derek Allard15dcf492008-05-12 21:37:04 +0000284
Derek Allarda72b60d2007-01-31 23:56:11 +0000285 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000286
Derek Allarda72b60d2007-01-31 23:56:11 +0000287 /**
288 * Fetch an item from the COOKIE array
289 *
290 * @access public
291 * @param string
292 * @param bool
293 * @return string
294 */
295 function cookie($index = '', $xss_clean = FALSE)
296 {
Derek Jones0b59f272008-05-13 04:22:33 +0000297 if ( ! isset($_COOKIE[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000298 {
299 return FALSE;
300 }
301
302 if ($xss_clean === TRUE)
303 {
304 if (is_array($_COOKIE[$index]))
305 {
306 $cookie = array();
307 foreach($_COOKIE[$index] as $key => $val)
308 {
309 $cookie[$key] = $this->xss_clean($val);
310 }
Derek Allard15dcf492008-05-12 21:37:04 +0000311
Derek Allarda72b60d2007-01-31 23:56:11 +0000312 return $cookie;
313 }
314 else
315 {
316 return $this->xss_clean($_COOKIE[$index]);
317 }
318 }
319 else
320 {
321 return $_COOKIE[$index];
322 }
323 }
324
325 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000326
Derek Allarda72b60d2007-01-31 23:56:11 +0000327 /**
328 * Fetch an item from the SERVER array
329 *
330 * @access public
331 * @param string
332 * @param bool
333 * @return string
334 */
335 function server($index = '', $xss_clean = FALSE)
Derek Allard15dcf492008-05-12 21:37:04 +0000336 {
Derek Jones0b59f272008-05-13 04:22:33 +0000337 if ( ! isset($_SERVER[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000338 {
339 return FALSE;
340 }
341
342 if ($xss_clean === TRUE)
343 {
344 return $this->xss_clean($_SERVER[$index]);
345 }
Derek Allard15dcf492008-05-12 21:37:04 +0000346
Derek Allarda72b60d2007-01-31 23:56:11 +0000347 return $_SERVER[$index];
348 }
Derek Allard15dcf492008-05-12 21:37:04 +0000349
Derek Allarda72b60d2007-01-31 23:56:11 +0000350 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000351
Derek Allarda72b60d2007-01-31 23:56:11 +0000352 /**
353 * Fetch the IP Address
354 *
355 * @access public
356 * @return string
357 */
358 function ip_address()
359 {
360 if ($this->ip_address !== FALSE)
361 {
362 return $this->ip_address;
363 }
Derek Allard15dcf492008-05-12 21:37:04 +0000364
Derek Allarda72b60d2007-01-31 23:56:11 +0000365 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
366 {
367 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
368 }
369 elseif ($this->server('REMOTE_ADDR'))
370 {
371 $this->ip_address = $_SERVER['REMOTE_ADDR'];
372 }
373 elseif ($this->server('HTTP_CLIENT_IP'))
374 {
375 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
376 }
377 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
378 {
379 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
380 }
Derek Allard15dcf492008-05-12 21:37:04 +0000381
Derek Allarda72b60d2007-01-31 23:56:11 +0000382 if ($this->ip_address === FALSE)
383 {
384 $this->ip_address = '0.0.0.0';
385 return $this->ip_address;
386 }
Derek Allard15dcf492008-05-12 21:37:04 +0000387
Derek Allarda72b60d2007-01-31 23:56:11 +0000388 if (strstr($this->ip_address, ','))
389 {
390 $x = explode(',', $this->ip_address);
391 $this->ip_address = end($x);
392 }
Derek Allard15dcf492008-05-12 21:37:04 +0000393
Derek Jones0b59f272008-05-13 04:22:33 +0000394 if ( ! $this->valid_ip($this->ip_address))
Derek Allarda72b60d2007-01-31 23:56:11 +0000395 {
396 $this->ip_address = '0.0.0.0';
397 }
Derek Allard15dcf492008-05-12 21:37:04 +0000398
Derek Allarda72b60d2007-01-31 23:56:11 +0000399 return $this->ip_address;
400 }
Derek Allard15dcf492008-05-12 21:37:04 +0000401
Derek Allarda72b60d2007-01-31 23:56:11 +0000402 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000403
Derek Allarda72b60d2007-01-31 23:56:11 +0000404 /**
405 * Validate IP Address
406 *
Rick Ellise666afc2007-06-11 05:03:11 +0000407 * Updated version suggested by Geert De Deckere
408 *
Derek Allarda72b60d2007-01-31 23:56:11 +0000409 * @access public
410 * @param string
411 * @return string
412 */
413 function valid_ip($ip)
414 {
Rick Ellise666afc2007-06-11 05:03:11 +0000415 $ip_segments = explode('.', $ip);
Derek Allard15dcf492008-05-12 21:37:04 +0000416
Rick Ellise666afc2007-06-11 05:03:11 +0000417 // Always 4 segments needed
418 if (count($ip_segments) != 4)
Rick Ellis112569d2007-02-26 19:19:08 +0000419 {
420 return FALSE;
421 }
Rick Ellis65e8f0e2007-06-12 03:53:21 +0000422 // IP can not start with 0
Rick Ellis39213142007-06-12 03:53:12 +0000423 if (substr($ip_segments[0], 0, 1) == '0')
Rick Ellis112569d2007-02-26 19:19:08 +0000424 {
Rick Ellise666afc2007-06-11 05:03:11 +0000425 return FALSE;
426 }
427 // Check each segment
428 foreach ($ip_segments as $segment)
429 {
430 // IP segments must be digits and can not be
431 // longer than 3 digits or greater then 255
Rick Ellisba648932007-06-12 03:39:38 +0000432 if (preg_match("/[^0-9]/", $segment) OR $segment > 255 OR strlen($segment) > 3)
Rick Ellis112569d2007-02-26 19:19:08 +0000433 {
Rick Ellise666afc2007-06-11 05:03:11 +0000434 return FALSE;
Rick Ellis112569d2007-02-26 19:19:08 +0000435 }
436 }
Derek Allard15dcf492008-05-12 21:37:04 +0000437
Rick Ellis112569d2007-02-26 19:19:08 +0000438 return TRUE;
Derek Allarda72b60d2007-01-31 23:56:11 +0000439 }
Derek Allard15dcf492008-05-12 21:37:04 +0000440
Derek Allarda72b60d2007-01-31 23:56:11 +0000441 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000442
Derek Allarda72b60d2007-01-31 23:56:11 +0000443 /**
444 * User Agent
445 *
446 * @access public
447 * @return string
448 */
449 function user_agent()
450 {
451 if ($this->user_agent !== FALSE)
452 {
453 return $this->user_agent;
454 }
Derek Allard15dcf492008-05-12 21:37:04 +0000455
Derek Jones0b59f272008-05-13 04:22:33 +0000456 $this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
Derek Allard15dcf492008-05-12 21:37:04 +0000457
Derek Allarda72b60d2007-01-31 23:56:11 +0000458 return $this->user_agent;
459 }
Derek Allard15dcf492008-05-12 21:37:04 +0000460
Derek Allarda72b60d2007-01-31 23:56:11 +0000461 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000462
Derek Allarda72b60d2007-01-31 23:56:11 +0000463 /**
paulburdick763064b2007-06-27 23:25:55 +0000464 * Filename Security
465 *
466 * @access public
467 * @param string
468 * @return string
469 */
470 function filename_security($str)
471 {
472 $bad = array(
473 "../",
474 "./",
475 "<!--",
476 "-->",
477 "<",
478 ">",
479 "'",
480 '"',
481 '&',
482 '$',
483 '#',
484 '{',
485 '}',
486 '[',
487 ']',
488 '=',
489 ';',
490 '?',
paulburdick763064b2007-06-27 23:25:55 +0000491 "%20",
492 "%22",
493 "%3c", // <
494 "%253c", // <
495 "%3e", // >
496 "%0e", // >
497 "%28", // (
498 "%29", // )
499 "%2528", // (
500 "%26", // &
501 "%24", // $
502 "%3f", // ?
503 "%3b", // ;
504 "%3d" // =
Derek Allard15dcf492008-05-12 21:37:04 +0000505 );
506
507 return stripslashes(str_replace($bad, '', $str));
paulburdick763064b2007-06-27 23:25:55 +0000508 }
Derek Allard15dcf492008-05-12 21:37:04 +0000509
paulburdick763064b2007-06-27 23:25:55 +0000510 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000511
paulburdick763064b2007-06-27 23:25:55 +0000512 /**
Derek Allarda72b60d2007-01-31 23:56:11 +0000513 * XSS Clean
514 *
515 * Sanitizes data so that Cross Site Scripting Hacks can be
516 * prevented.  This function does a fair amount of work but
517 * it is extremely thorough, designed to prevent even the
518 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
519 * of course, but I haven't been able to get anything passed
520 * the filter.
521 *
522 * Note: This function should only be used to deal with data
523 * upon submission.  It's not something that should
524 * be used for general runtime processing.
525 *
526 * This function was based in part on some code and ideas I
527 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
528 *
529 * To help develop this script I used this great list of
530 * vulnerabilities along with a few other hacks I've
531 * harvested from examining vulnerabilities in other programs:
532 * http://ha.ckers.org/xss.html
533 *
534 * @access public
535 * @param string
536 * @return string
537 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000538 function xss_clean($str)
Derek Jones53437de2008-05-12 18:07:08 +0000539 {
540 /*
541 * Is the string an array?
542 *
543 */
544 if (is_array($str))
545 {
546 while (list($key) = each($str))
547 {
548 $str[$key] = $this->xss_clean($str[$key]);
549 }
Derek Allard15dcf492008-05-12 21:37:04 +0000550
Derek Jones53437de2008-05-12 18:07:08 +0000551 return $str;
552 }
Derek Allard15dcf492008-05-12 21:37:04 +0000553
Derek Allarda72b60d2007-01-31 23:56:11 +0000554 /*
555 * Remove Null Characters
556 *
557 * This prevents sandwiching null characters
558 * between ascii characters, like Java\0script.
559 *
560 */
561 $str = preg_replace('/\0+/', '', $str);
562 $str = preg_replace('/(\\\\0)+/', '', $str);
563
564 /*
Derek Jones53437de2008-05-12 18:07:08 +0000565 * Protect GET variables in URLs
566 */
567
568 // 901119URL5918AMP18930PROTECT8198
569
570 $str = preg_replace('|\&([a-z\_0-9]+)\=([a-z\_0-9]+)|i', $this->xss_hash()."\\1=\\2", $str);
571
572 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000573 * Validate standard character entities
574 *
575 * Add a semicolon if missing. We do this to enable
576 * the conversion of entities to ASCII later.
577 *
578 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000579 $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000580
Derek Allarda72b60d2007-01-31 23:56:11 +0000581 /*
Derek Jones48bb32a2007-07-12 13:10:42 +0000582 * Validate UTF16 two byte encoding (x00)
Derek Allarda72b60d2007-01-31 23:56:11 +0000583 *
584 * Just as above, adds a semicolon if missing.
585 *
586 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000587 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000588
589 /*
Derek Jones53437de2008-05-12 18:07:08 +0000590 * Un-Protect GET variables in URLs
591 */
592
593 $str = str_replace($this->xss_hash(), '&', $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000594
Derek Jones53437de2008-05-12 18:07:08 +0000595 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000596 * URL Decode
597 *
598 * Just in case stuff like this is submitted:
599 *
600 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
601 *
Derek Jonesab32a422008-02-04 22:02:11 +0000602 * Note: Use rawurldecode() so it does not remove plus signs
Derek Allarda72b60d2007-01-31 23:56:11 +0000603 *
Derek Allard15dcf492008-05-12 21:37:04 +0000604 */
Derek Jonesab32a422008-02-04 22:02:11 +0000605 $str = rawurldecode($str);
Derek Allard15dcf492008-05-12 21:37:04 +0000606
Derek Allarda72b60d2007-01-31 23:56:11 +0000607 /*
Derek Jones303c9cb2007-07-12 19:12:37 +0000608 * Convert character entities to ASCII
Derek Allarda72b60d2007-01-31 23:56:11 +0000609 *
610 * This permits our tests below to work reliably.
611 * We only convert entities that are within tags since
612 * these are the ones that will pose security problems.
613 *
614 */
Derek Allard15dcf492008-05-12 21:37:04 +0000615
Derek Jones303c9cb2007-07-12 19:12:37 +0000616 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
617
618 $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
619
620 /*
Derek Allard15dcf492008-05-12 21:37:04 +0000621
Derek Jones303c9cb2007-07-12 19:12:37 +0000622 Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
Derek Allard15dcf492008-05-12 21:37:04 +0000623
Derek Jones303c9cb2007-07-12 19:12:37 +0000624 if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches))
625 {
626 for ($i = 0; $i < count($matches[0]); $i++)
Derek Allarda72b60d2007-01-31 23:56:11 +0000627 {
Derek Jones303c9cb2007-07-12 19:12:37 +0000628 if (stristr($matches[0][$i], '>'))
629 {
630 $str = str_replace( $matches['0'][$i],
631 str_replace('>', '&lt;', $matches[0][$i]),
632 $str);
633 }
634 }
635 }
636
637 if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches))
638 {
639 for ($i = 0; $i < count($matches[0]); $i++)
640 {
641 $str = str_replace($matches[0][$i],
642 $this->_html_entity_decode($matches[0][$i], $charset),
Derek Allarda72b60d2007-01-31 23:56:11 +0000643 $str);
644 }
645 }
Derek Jones303c9cb2007-07-12 19:12:37 +0000646 */
Derek Allard15dcf492008-05-12 21:37:04 +0000647
Derek Jones48bb32a2007-07-12 13:10:42 +0000648 /*
649 * Convert all tabs to spaces
650 *
651 * This prevents strings like this: ja vascript
652 * NOTE: we deal with spaces between characters later.
653 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
654 * so we use str_replace.
655 *
656 */
Derek Allard15dcf492008-05-12 21:37:04 +0000657
Derek Jones0b59f272008-05-13 04:22:33 +0000658 if (strpos($str, "\t") !== FALSE)
659 {
660 $str = str_replace("\t", ' ', $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000661 }
Derek Jones48bb32a2007-07-12 13:10:42 +0000662
Derek Allarda72b60d2007-01-31 23:56:11 +0000663 /*
664 * Not Allowed Under Any Conditions
Derek Allard15dcf492008-05-12 21:37:04 +0000665 */
Derek Allarda72b60d2007-01-31 23:56:11 +0000666 $bad = array(
667 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000668 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000669 '.parentNode' => '[removed]',
670 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000671 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000672 '-moz-binding' => '[removed]',
673 '<!--' => '&lt;!--',
674 '-->' => '--&gt;',
Derek Allard751506e2008-05-12 21:43:54 +0000675 '<![CDATA[' => '&lt;![CDATA['
Derek Jones48bb32a2007-07-12 13:10:42 +0000676 );
677
678 foreach ($bad as $key => $val)
679 {
680 $str = str_replace($key, $val, $str);
681 }
682
683 $bad = array(
Derek Allarda72b60d2007-01-31 23:56:11 +0000684 "javascript\s*:" => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000685 "expression\s*\(" => '[removed]', // CSS and IE
Derek Jones48bb32a2007-07-12 13:10:42 +0000686 "Redirect\s+302" => '[removed]'
Derek Allarda72b60d2007-01-31 23:56:11 +0000687 );
Derek Allard15dcf492008-05-12 21:37:04 +0000688
Derek Allarda72b60d2007-01-31 23:56:11 +0000689 foreach ($bad as $key => $val)
690 {
691 $str = preg_replace("#".$key."#i", $val, $str);
692 }
Derek Allard15dcf492008-05-12 21:37:04 +0000693
Derek Allarda72b60d2007-01-31 23:56:11 +0000694 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000695 * Makes PHP tags safe
696 *
697 * Note: XML tags are inadvertently replaced too:
698 *
699 * <?xml
700 *
701 * But it doesn't seem to pose a problem.
702 *
Derek Allard15dcf492008-05-12 21:37:04 +0000703 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000704 $str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000705
Derek Allarda72b60d2007-01-31 23:56:11 +0000706 /*
707 * Compact any exploded words
708 *
709 * This corrects words like: j a v a s c r i p t
710 * These words are compacted back to their correct state.
711 *
Derek Allard15dcf492008-05-12 21:37:04 +0000712 */
paulburdickb614d392007-06-26 21:58:56 +0000713 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
Derek Allarda72b60d2007-01-31 23:56:11 +0000714 foreach ($words as $word)
715 {
716 $temp = '';
717 for ($i = 0; $i < strlen($word); $i++)
718 {
719 $temp .= substr($word, $i, 1)."\s*";
720 }
Derek Allard15dcf492008-05-12 21:37:04 +0000721
Derek Jones01f72ca2007-05-04 18:19:17 +0000722 // We only want to do this when it is followed by a non-word character
723 // That way valid stuff like "dealer to" does not become "dealerto"
724 $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000725 }
Derek Allard15dcf492008-05-12 21:37:04 +0000726
Derek Allarda72b60d2007-01-31 23:56:11 +0000727 /*
728 * Remove disallowed Javascript in links or img tags
paulburdick391eb032007-06-27 22:58:24 +0000729 */
730 do
731 {
732 $original = $str;
Derek Allard15dcf492008-05-12 21:37:04 +0000733
Derek Jones48bb32a2007-07-12 13:10:42 +0000734 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '</a>') !== FALSE) OR
735 preg_match("/<\/a>/i", $str))
736 {
737 $str = preg_replace_callback("#<a.*?</a>#si", array($this, '_js_link_removal'), $str);
738 }
Derek Allard15dcf492008-05-12 21:37:04 +0000739
Derek Jones48bb32a2007-07-12 13:10:42 +0000740 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '<img') !== FALSE) OR
741 preg_match("/img/i", $str))
742 {
743 $str = preg_replace_callback("#<img.*?".">#si", array($this, '_js_img_removal'), $str);
744 }
Derek Allard15dcf492008-05-12 21:37:04 +0000745
Derek Jones48bb32a2007-07-12 13:10:42 +0000746 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && (stripos($str, 'script') !== FALSE OR stripos($str, 'xss') !== FALSE)) OR
747 preg_match("/(script|xss)/i", $str))
748 {
749 $str = preg_replace("#</*(script|xss).*?\>#si", "", $str);
750 }
paulburdick391eb032007-06-27 22:58:24 +0000751 }
752 while($original != $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000753
paulburdick391eb032007-06-27 22:58:24 +0000754 unset($original);
Derek Allarda72b60d2007-01-31 23:56:11 +0000755
756 /*
757 * Remove JavaScript Event Handlers
758 *
759 * Note: This code is a little blunt. It removes
760 * the event handler and anything up to the closing >,
761 * but it's unlikely to be a problem.
762 *
Derek Allard15dcf492008-05-12 21:37:04 +0000763 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000764 $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns');
765 $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "&lt;\\1\\2\\3&gt;", $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000766
Derek Allarda72b60d2007-01-31 23:56:11 +0000767 /*
768 * Sanitize naughty HTML elements
769 *
770 * If a tag containing any of the words in the list
771 * below is found, the tag gets converted to entities.
772 *
773 * So this: <blink>
774 * Becomes: &lt;blink&gt;
775 *
Derek Allard15dcf492008-05-12 21:37:04 +0000776 */
Derek Allarda72b60d2007-01-31 23:56:11 +0000777 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000778
Derek Allarda72b60d2007-01-31 23:56:11 +0000779 /*
780 * Sanitize naughty scripting elements
781 *
782 * Similar to above, only instead of looking for
783 * tags it looks for PHP and JavaScript commands
784 * that are disallowed. Rather than removing the
785 * code, it simply converts the parenthesis to entities
786 * rendering the code un-executable.
787 *
788 * For example: eval('some code')
789 * Becomes: eval&#40;'some code'&#41;
790 *
791 */
paulburdick033ef022007-06-26 21:52:52 +0000792 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000793
Derek Allarda72b60d2007-01-31 23:56:11 +0000794 /*
795 * Final clean up
796 *
797 * This adds a bit of extra precaution in case
798 * something got through the above filters
799 *
Derek Allard15dcf492008-05-12 21:37:04 +0000800 */
Derek Allarda72b60d2007-01-31 23:56:11 +0000801 $bad = array(
802 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000803 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000804 '.parentNode' => '[removed]',
805 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000806 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000807 '-moz-binding' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000808 '<!--' => '&lt;!--',
Derek Jones48bb32a2007-07-12 13:10:42 +0000809 '-->' => '--&gt;',
810 '<!CDATA[' => '&lt;![CDATA['
Derek Allarda72b60d2007-01-31 23:56:11 +0000811 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000812
Derek Allarda72b60d2007-01-31 23:56:11 +0000813 foreach ($bad as $key => $val)
814 {
Derek Jones48bb32a2007-07-12 13:10:42 +0000815 $str = str_replace($key, $val, $str);
816 }
817
818 $bad = array(
819 "javascript\s*:" => '[removed]',
820 "expression\s*\(" => '[removed]', // CSS and IE
821 "Redirect\s+302" => '[removed]'
822 );
Derek Allard15dcf492008-05-12 21:37:04 +0000823
Derek Jones48bb32a2007-07-12 13:10:42 +0000824 foreach ($bad as $key => $val)
825 {
826 $str = preg_replace("#".$key."#i", $val, $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000827 }
Derek Allard15dcf492008-05-12 21:37:04 +0000828
829
Derek Allarda72b60d2007-01-31 23:56:11 +0000830 log_message('debug', "XSS Filtering completed");
831 return $str;
832 }
833
834 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000835
Derek Jones01f72ca2007-05-04 18:19:17 +0000836 /**
Derek Jones53437de2008-05-12 18:07:08 +0000837 * Random Hash for protecting URLs
838 *
839 * @access public
840 * @return string
841 */
842 function xss_hash()
Derek Allard15dcf492008-05-12 21:37:04 +0000843 {
Derek Jones53437de2008-05-12 18:07:08 +0000844 if ($this->xss_hash == '')
845 {
846 if (phpversion() >= 4.2)
847 mt_srand();
848 else
849 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
Derek Allard15dcf492008-05-12 21:37:04 +0000850
Derek Jones53437de2008-05-12 18:07:08 +0000851 $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
852 }
Derek Allard15dcf492008-05-12 21:37:04 +0000853
Derek Jones53437de2008-05-12 18:07:08 +0000854 return $this->xss_hash;
855 }
856
857 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000858
Derek Jones53437de2008-05-12 18:07:08 +0000859 /**
Derek Jones01f72ca2007-05-04 18:19:17 +0000860 * JS Link Removal
861 *
862 * Callback function for xss_clean() to sanitize links
863 * This limits the PCRE backtracks, making it more performance friendly
864 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
865 * PHP 5.2+ on link-heavy strings
866 *
867 * @access private
868 * @param array
869 * @return string
870 */
871 function _js_link_removal($match)
872 {
873 return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
874 }
Derek Allard15dcf492008-05-12 21:37:04 +0000875
Derek Jones01f72ca2007-05-04 18:19:17 +0000876 /**
877 * JS Image Removal
878 *
879 * Callback function for xss_clean() to sanitize image tags
880 * This limits the PCRE backtracks, making it more performance friendly
881 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
882 * PHP 5.2+ on image tag heavy strings
883 *
884 * @access private
885 * @param array
886 * @return string
887 */
888 function _js_img_removal($match)
889 {
890 return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
891 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000892
Derek Jones01f72ca2007-05-04 18:19:17 +0000893 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000894
Derek Jones303c9cb2007-07-12 19:12:37 +0000895 /**
896 * Attribute Conversion
897 *
898 * Used as a callback for XSS Clean
899 *
900 * @access public
901 * @param array
902 * @return string
903 */
904 function _attribute_conversion($match)
905 {
906 return str_replace('>', '&lt;', $match[0]);
907 }
Derek Allard15dcf492008-05-12 21:37:04 +0000908
Derek Jones303c9cb2007-07-12 19:12:37 +0000909 // --------------------------------------------------------------------
910
911 /**
912 * HTML Entity Decode Callback
913 *
914 * Used as a callback for XSS Clean
915 *
916 * @access public
917 * @param array
918 * @return string
919 */
920 function _html_entity_decode_callback($match)
921 {
Derek Jones6159d1d2007-07-16 13:04:46 +0000922 global $CFG;
923 $charset = $CFG->item('charset');
Derek Jones303c9cb2007-07-12 19:12:37 +0000924
925 return $this->_html_entity_decode($match[0], strtoupper($charset));
926 }
927
928 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000929
Derek Allarda72b60d2007-01-31 23:56:11 +0000930 /**
931 * HTML Entities Decode
932 *
933 * This function is a replacement for html_entity_decode()
934 *
935 * In some versions of PHP the native function does not work
936 * when UTF-8 is the specified character set, so this gives us
937 * a work-around. More info here:
938 * http://bugs.php.net/bug.php?id=25670
939 *
940 * @access private
941 * @param string
942 * @param string
943 * @return string
944 */
945 /* -------------------------------------------------
946 /* Replacement for html_entity_decode()
947 /* -------------------------------------------------*/
Derek Allard15dcf492008-05-12 21:37:04 +0000948
Derek Allarda72b60d2007-01-31 23:56:11 +0000949 /*
950 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
951 character set, and the PHP developers said they were not back porting the
952 fix to versions other than PHP 5.x.
953 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000954 function _html_entity_decode($str, $charset='UTF-8')
Derek Allarda72b60d2007-01-31 23:56:11 +0000955 {
956 if (stristr($str, '&') === FALSE) return $str;
Derek Allard15dcf492008-05-12 21:37:04 +0000957
Derek Allarda72b60d2007-01-31 23:56:11 +0000958 // The reason we are not using html_entity_decode() by itself is because
959 // while it is not technically correct to leave out the semicolon
960 // at the end of an entity most browsers will still interpret the entity
961 // correctly. html_entity_decode() does not convert entities without
962 // semicolons, so we are left with our own little solution here. Bummer.
Derek Allard15dcf492008-05-12 21:37:04 +0000963
Derek Allarda72b60d2007-01-31 23:56:11 +0000964 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
965 {
966 $str = html_entity_decode($str, ENT_COMPAT, $charset);
967 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
968 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
969 }
Derek Allard15dcf492008-05-12 21:37:04 +0000970
Derek Allarda72b60d2007-01-31 23:56:11 +0000971 // Numeric Entities
972 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
973 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000974
Derek Allarda72b60d2007-01-31 23:56:11 +0000975 // Literal Entities - Slightly slow so we do another check
976 if (stristr($str, '&') === FALSE)
977 {
978 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
979 }
Derek Allard15dcf492008-05-12 21:37:04 +0000980
Derek Allarda72b60d2007-01-31 23:56:11 +0000981 return $str;
982 }
983
984}
985// END Input class
Derek Jones53437de2008-05-12 18:07:08 +0000986
987/* End of file Input.php */
Derek Jonesa3ffbbb2008-05-11 18:18:29 +0000988/* Location: ./system/libraries/Input.php */