blob: f9875794707bac5dbadca5284ef3ad946fbd90dc [file] [log] [blame]
Derek Allarda72b60d2007-01-31 23:56:11 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
Derek Allardd2df9bc2007-04-15 17:41:17 +00003 * CodeIgniter
Derek Allarda72b60d2007-01-31 23:56:11 +00004 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
Derek Allard3d879d52008-01-18 19:41:32 +00008 * @author ExpressionEngine Dev Team
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Jones7a9193a2008-01-21 18:39:20 +000010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
Derek Allarda72b60d2007-01-31 23:56:11 +000012 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
Derek Allard3d879d52008-01-18 19:41:32 +000026 * @author ExpressionEngine Dev Team
Derek Jones7a9193a2008-01-21 18:39:20 +000027 * @link http://codeigniter.com/user_guide/libraries/input.html
Derek Allarda72b60d2007-01-31 23:56:11 +000028 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
Derek Jones53437de2008-05-12 18:07:08 +000031 var $xss_hash = '';
Derek Allarda72b60d2007-01-31 23:56:11 +000032 var $ip_address = FALSE;
33 var $user_agent = FALSE;
34 var $allow_get_array = FALSE;
Derek Allard15dcf492008-05-12 21:37:04 +000035
Derek Allarda72b60d2007-01-31 23:56:11 +000036 /**
37 * Constructor
38 *
39 * Sets whether to globally enable the XSS processing
40 * and whether to allow the $_GET array
41 *
42 * @access public
Derek Allard15dcf492008-05-12 21:37:04 +000043 */
Derek Allarda72b60d2007-01-31 23:56:11 +000044 function CI_Input()
Derek Allard15dcf492008-05-12 21:37:04 +000045 {
Derek Allarda72b60d2007-01-31 23:56:11 +000046 log_message('debug', "Input Class Initialized");
Derek Allard15dcf492008-05-12 21:37:04 +000047
Derek Allarda72b60d2007-01-31 23:56:11 +000048 $CFG =& load_class('Config');
49 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
Derek Allard15dcf492008-05-12 21:37:04 +000050 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
Derek Allarda72b60d2007-01-31 23:56:11 +000051 $this->_sanitize_globals();
52 }
Derek Allard15dcf492008-05-12 21:37:04 +000053
Derek Allarda72b60d2007-01-31 23:56:11 +000054 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +000055
Derek Allarda72b60d2007-01-31 23:56:11 +000056 /**
57 * Sanitize Globals
58 *
59 * This function does the following:
60 *
61 * Unsets $_GET data (if query strings are not enabled)
62 *
63 * Unsets all globals if register_globals is enabled
64 *
65 * Standardizes newline characters to \n
66 *
67 * @access private
68 * @return void
69 */
70 function _sanitize_globals()
71 {
Derek Jonesd85a11e2008-01-24 20:48:07 +000072 // Would kind of be "wrong" to unset any of these GLOBALS
73 $protected = array('_SERVER', '_GET', '_POST', '_FILES', '_REQUEST', '_SESSION', '_ENV', 'GLOBALS', 'HTTP_RAW_POST_DATA',
74 'system_folder', 'application_folder', 'BM', 'EXT', 'CFG', 'URI', 'RTR', 'OUT', 'IN');
Derek Allard15dcf492008-05-12 21:37:04 +000075
Derek Jones0ea06fd2008-02-05 15:23:51 +000076 // Unset globals for security.
Rick Ellisbb2041d2007-06-09 00:16:13 +000077 // This is effectively the same as register_globals = off
Derek Jones0ea06fd2008-02-05 15:23:51 +000078 foreach (array($_GET, $_POST, $_COOKIE, $_SERVER, $_FILES, $_ENV, (isset($_SESSION) && is_array($_SESSION)) ? $_SESSION : array()) as $global)
Derek Allarda72b60d2007-01-31 23:56:11 +000079 {
Derek Allard73274992008-05-05 16:39:18 +000080 if (! is_array($global))
Derek Allarda72b60d2007-01-31 23:56:11 +000081 {
Derek Allard73274992008-05-05 16:39:18 +000082 if (! in_array($global, $protected))
paulburdick8816aaa2007-06-27 23:07:36 +000083 {
Derek Jones0ea06fd2008-02-05 15:23:51 +000084 unset($GLOBALS[$global]);
paulburdick8816aaa2007-06-27 23:07:36 +000085 }
Derek Allarda72b60d2007-01-31 23:56:11 +000086 }
87 else
88 {
89 foreach ($global as $key => $val)
90 {
Derek Allard73274992008-05-05 16:39:18 +000091 if (! in_array($key, $protected))
paulburdick8816aaa2007-06-27 23:07:36 +000092 {
Derek Jones0ea06fd2008-02-05 15:23:51 +000093 unset($GLOBALS[$key]);
94 }
Derek Allard15dcf492008-05-12 21:37:04 +000095
Derek Jones0ea06fd2008-02-05 15:23:51 +000096 if (is_array($val))
97 {
98 foreach($val as $k => $v)
99 {
Derek Allard73274992008-05-05 16:39:18 +0000100 if (! in_array($k, $protected))
Derek Jones0ea06fd2008-02-05 15:23:51 +0000101 {
102 unset($GLOBALS[$k]);
103 }
104 }
paulburdick8816aaa2007-06-27 23:07:36 +0000105 }
Derek Allard15dcf492008-05-12 21:37:04 +0000106 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000107 }
108 }
109
110 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
111 if ($this->allow_get_array == FALSE)
112 {
113 $_GET = array();
114 }
Rick Ellis112569d2007-02-26 19:19:08 +0000115 else
116 {
117 if (is_array($_GET) AND count($_GET) > 0)
118 {
119 foreach($_GET as $key => $val)
120 {
121 $_GET[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
122 }
123 }
124 }
Derek Allard15dcf492008-05-12 21:37:04 +0000125
Derek Allarda72b60d2007-01-31 23:56:11 +0000126 // Clean $_POST Data
127 if (is_array($_POST) AND count($_POST) > 0)
128 {
129 foreach($_POST as $key => $val)
Derek Allard15dcf492008-05-12 21:37:04 +0000130 {
Derek Allarda72b60d2007-01-31 23:56:11 +0000131 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
Derek Allard15dcf492008-05-12 21:37:04 +0000132 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000133 }
Derek Allard15dcf492008-05-12 21:37:04 +0000134
Derek Allarda72b60d2007-01-31 23:56:11 +0000135 // Clean $_COOKIE Data
136 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
137 {
138 foreach($_COOKIE as $key => $val)
Derek Allard15dcf492008-05-12 21:37:04 +0000139 {
Derek Allarda72b60d2007-01-31 23:56:11 +0000140 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
Derek Allard15dcf492008-05-12 21:37:04 +0000141 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000142 }
Derek Allard15dcf492008-05-12 21:37:04 +0000143
Derek Allarda72b60d2007-01-31 23:56:11 +0000144 log_message('debug', "Global POST and COOKIE data sanitized");
Derek Allard15dcf492008-05-12 21:37:04 +0000145 }
146
Derek Allarda72b60d2007-01-31 23:56:11 +0000147 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000148
Derek Allarda72b60d2007-01-31 23:56:11 +0000149 /**
150 * Clean Input Data
151 *
152 * This is a helper function. It escapes data and
153 * standardizes newline characters to \n
154 *
155 * @access private
156 * @param string
157 * @return string
Derek Allard15dcf492008-05-12 21:37:04 +0000158 */
Derek Allarda72b60d2007-01-31 23:56:11 +0000159 function _clean_input_data($str)
160 {
161 if (is_array($str))
162 {
163 $new_array = array();
164 foreach ($str as $key => $val)
165 {
166 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
167 }
168 return $new_array;
169 }
Derek Allard15dcf492008-05-12 21:37:04 +0000170
Rick Ellisbb2041d2007-06-09 00:16:13 +0000171 // We strip slashes if magic quotes is on to keep things consistent
172 if (get_magic_quotes_gpc())
173 {
174 $str = stripslashes($str);
175 }
Derek Allard15dcf492008-05-12 21:37:04 +0000176
Rick Ellisbb2041d2007-06-09 00:16:13 +0000177 // Should we filter the input data?
Derek Allarda72b60d2007-01-31 23:56:11 +0000178 if ($this->use_xss_clean === TRUE)
179 {
180 $str = $this->xss_clean($str);
181 }
Derek Allard15dcf492008-05-12 21:37:04 +0000182
Derek Allarda72b60d2007-01-31 23:56:11 +0000183 // Standardize newlines
184 return preg_replace("/\015\012|\015|\012/", "\n", $str);
185 }
Derek Allard15dcf492008-05-12 21:37:04 +0000186
Derek Allarda72b60d2007-01-31 23:56:11 +0000187 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000188
Derek Allarda72b60d2007-01-31 23:56:11 +0000189 /**
190 * Clean Keys
191 *
192 * This is a helper function. To prevent malicious users
193 * from trying to exploit keys we make sure that keys are
194 * only named with alpha-numeric text and a few other items.
195 *
196 * @access private
197 * @param string
198 * @return string
199 */
200 function _clean_input_keys($str)
Derek Allard15dcf492008-05-12 21:37:04 +0000201 {
Derek Allard73274992008-05-05 16:39:18 +0000202 if (! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
Derek Allarda72b60d2007-01-31 23:56:11 +0000203 {
204 exit('Disallowed Key Characters.');
205 }
Rick Ellisbb2041d2007-06-09 00:16:13 +0000206
Derek Allarda72b60d2007-01-31 23:56:11 +0000207 return $str;
208 }
Rick Ellis112569d2007-02-26 19:19:08 +0000209
210 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000211
Rick Ellis112569d2007-02-26 19:19:08 +0000212 /**
213 * Fetch an item from the GET array
214 *
215 * @access public
216 * @param string
217 * @param bool
218 * @return string
219 */
Derek Allard87d1eeb2007-03-01 13:20:43 +0000220 function get($index = '', $xss_clean = FALSE)
Derek Allard15dcf492008-05-12 21:37:04 +0000221 {
Derek Allard73274992008-05-05 16:39:18 +0000222 if (! isset($_GET[$index]))
Rick Ellis112569d2007-02-26 19:19:08 +0000223 {
224 return FALSE;
225 }
226
227 if ($xss_clean === TRUE)
228 {
229 if (is_array($_GET[$index]))
230 {
231 foreach($_GET[$index] as $key => $val)
Derek Allard15dcf492008-05-12 21:37:04 +0000232 {
Rick Ellis112569d2007-02-26 19:19:08 +0000233 $_GET[$index][$key] = $this->xss_clean($val);
234 }
235 }
236 else
237 {
238 return $this->xss_clean($_GET[$index]);
239 }
240 }
241
242 return $_GET[$index];
243 }
Derek Allard15dcf492008-05-12 21:37:04 +0000244
Derek Allarda72b60d2007-01-31 23:56:11 +0000245 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000246
Derek Allarda72b60d2007-01-31 23:56:11 +0000247 /**
248 * Fetch an item from the POST array
249 *
250 * @access public
251 * @param string
252 * @param bool
253 * @return string
254 */
255 function post($index = '', $xss_clean = FALSE)
Derek Allard15dcf492008-05-12 21:37:04 +0000256 {
Derek Allard73274992008-05-05 16:39:18 +0000257 if (! isset($_POST[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000258 {
259 return FALSE;
260 }
261
262 if ($xss_clean === TRUE)
263 {
264 if (is_array($_POST[$index]))
265 {
266 foreach($_POST[$index] as $key => $val)
Derek Allard15dcf492008-05-12 21:37:04 +0000267 {
Derek Allarda72b60d2007-01-31 23:56:11 +0000268 $_POST[$index][$key] = $this->xss_clean($val);
269 }
270 }
271 else
272 {
273 return $this->xss_clean($_POST[$index]);
274 }
275 }
276
277 return $_POST[$index];
278 }
Derek Allard15dcf492008-05-12 21:37:04 +0000279
Derek Allarda72b60d2007-01-31 23:56:11 +0000280 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000281
Derek Allarda72b60d2007-01-31 23:56:11 +0000282 /**
283 * Fetch an item from the COOKIE array
284 *
285 * @access public
286 * @param string
287 * @param bool
288 * @return string
289 */
290 function cookie($index = '', $xss_clean = FALSE)
291 {
Derek Allard73274992008-05-05 16:39:18 +0000292 if (! isset($_COOKIE[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000293 {
294 return FALSE;
295 }
296
297 if ($xss_clean === TRUE)
298 {
299 if (is_array($_COOKIE[$index]))
300 {
301 $cookie = array();
302 foreach($_COOKIE[$index] as $key => $val)
303 {
304 $cookie[$key] = $this->xss_clean($val);
305 }
Derek Allard15dcf492008-05-12 21:37:04 +0000306
Derek Allarda72b60d2007-01-31 23:56:11 +0000307 return $cookie;
308 }
309 else
310 {
311 return $this->xss_clean($_COOKIE[$index]);
312 }
313 }
314 else
315 {
316 return $_COOKIE[$index];
317 }
318 }
319
320 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000321
Derek Allarda72b60d2007-01-31 23:56:11 +0000322 /**
323 * Fetch an item from the SERVER array
324 *
325 * @access public
326 * @param string
327 * @param bool
328 * @return string
329 */
330 function server($index = '', $xss_clean = FALSE)
Derek Allard15dcf492008-05-12 21:37:04 +0000331 {
Derek Allard73274992008-05-05 16:39:18 +0000332 if (! isset($_SERVER[$index]))
Derek Allarda72b60d2007-01-31 23:56:11 +0000333 {
334 return FALSE;
335 }
336
337 if ($xss_clean === TRUE)
338 {
339 return $this->xss_clean($_SERVER[$index]);
340 }
Derek Allard15dcf492008-05-12 21:37:04 +0000341
Derek Allarda72b60d2007-01-31 23:56:11 +0000342 return $_SERVER[$index];
343 }
Derek Allard15dcf492008-05-12 21:37:04 +0000344
Derek Allarda72b60d2007-01-31 23:56:11 +0000345 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000346
Derek Allarda72b60d2007-01-31 23:56:11 +0000347 /**
348 * Fetch the IP Address
349 *
350 * @access public
351 * @return string
352 */
353 function ip_address()
354 {
355 if ($this->ip_address !== FALSE)
356 {
357 return $this->ip_address;
358 }
Derek Allard15dcf492008-05-12 21:37:04 +0000359
Derek Allarda72b60d2007-01-31 23:56:11 +0000360 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
361 {
362 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
363 }
364 elseif ($this->server('REMOTE_ADDR'))
365 {
366 $this->ip_address = $_SERVER['REMOTE_ADDR'];
367 }
368 elseif ($this->server('HTTP_CLIENT_IP'))
369 {
370 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
371 }
372 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
373 {
374 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
375 }
Derek Allard15dcf492008-05-12 21:37:04 +0000376
Derek Allarda72b60d2007-01-31 23:56:11 +0000377 if ($this->ip_address === FALSE)
378 {
379 $this->ip_address = '0.0.0.0';
380 return $this->ip_address;
381 }
Derek Allard15dcf492008-05-12 21:37:04 +0000382
Derek Allarda72b60d2007-01-31 23:56:11 +0000383 if (strstr($this->ip_address, ','))
384 {
385 $x = explode(',', $this->ip_address);
386 $this->ip_address = end($x);
387 }
Derek Allard15dcf492008-05-12 21:37:04 +0000388
Derek Allard73274992008-05-05 16:39:18 +0000389 if (! $this->valid_ip($this->ip_address))
Derek Allarda72b60d2007-01-31 23:56:11 +0000390 {
391 $this->ip_address = '0.0.0.0';
392 }
Derek Allard15dcf492008-05-12 21:37:04 +0000393
Derek Allarda72b60d2007-01-31 23:56:11 +0000394 return $this->ip_address;
395 }
Derek Allard15dcf492008-05-12 21:37:04 +0000396
Derek Allarda72b60d2007-01-31 23:56:11 +0000397 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000398
Derek Allarda72b60d2007-01-31 23:56:11 +0000399 /**
400 * Validate IP Address
401 *
Rick Ellise666afc2007-06-11 05:03:11 +0000402 * Updated version suggested by Geert De Deckere
403 *
Derek Allarda72b60d2007-01-31 23:56:11 +0000404 * @access public
405 * @param string
406 * @return string
407 */
408 function valid_ip($ip)
409 {
Rick Ellise666afc2007-06-11 05:03:11 +0000410 $ip_segments = explode('.', $ip);
Derek Allard15dcf492008-05-12 21:37:04 +0000411
Rick Ellise666afc2007-06-11 05:03:11 +0000412 // Always 4 segments needed
413 if (count($ip_segments) != 4)
Rick Ellis112569d2007-02-26 19:19:08 +0000414 {
415 return FALSE;
416 }
Rick Ellis65e8f0e2007-06-12 03:53:21 +0000417 // IP can not start with 0
Rick Ellis39213142007-06-12 03:53:12 +0000418 if (substr($ip_segments[0], 0, 1) == '0')
Rick Ellis112569d2007-02-26 19:19:08 +0000419 {
Rick Ellise666afc2007-06-11 05:03:11 +0000420 return FALSE;
421 }
422 // Check each segment
423 foreach ($ip_segments as $segment)
424 {
425 // IP segments must be digits and can not be
426 // longer than 3 digits or greater then 255
Rick Ellisba648932007-06-12 03:39:38 +0000427 if (preg_match("/[^0-9]/", $segment) OR $segment > 255 OR strlen($segment) > 3)
Rick Ellis112569d2007-02-26 19:19:08 +0000428 {
Rick Ellise666afc2007-06-11 05:03:11 +0000429 return FALSE;
Rick Ellis112569d2007-02-26 19:19:08 +0000430 }
431 }
Derek Allard15dcf492008-05-12 21:37:04 +0000432
Rick Ellis112569d2007-02-26 19:19:08 +0000433 return TRUE;
Derek Allarda72b60d2007-01-31 23:56:11 +0000434 }
Derek Allard15dcf492008-05-12 21:37:04 +0000435
Derek Allarda72b60d2007-01-31 23:56:11 +0000436 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000437
Derek Allarda72b60d2007-01-31 23:56:11 +0000438 /**
439 * User Agent
440 *
441 * @access public
442 * @return string
443 */
444 function user_agent()
445 {
446 if ($this->user_agent !== FALSE)
447 {
448 return $this->user_agent;
449 }
Derek Allard15dcf492008-05-12 21:37:04 +0000450
Derek Allard73274992008-05-05 16:39:18 +0000451 $this->user_agent = (! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
Derek Allard15dcf492008-05-12 21:37:04 +0000452
Derek Allarda72b60d2007-01-31 23:56:11 +0000453 return $this->user_agent;
454 }
Derek Allard15dcf492008-05-12 21:37:04 +0000455
Derek Allarda72b60d2007-01-31 23:56:11 +0000456 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000457
Derek Allarda72b60d2007-01-31 23:56:11 +0000458 /**
paulburdick763064b2007-06-27 23:25:55 +0000459 * Filename Security
460 *
461 * @access public
462 * @param string
463 * @return string
464 */
465 function filename_security($str)
466 {
467 $bad = array(
468 "../",
469 "./",
470 "<!--",
471 "-->",
472 "<",
473 ">",
474 "'",
475 '"',
476 '&',
477 '$',
478 '#',
479 '{',
480 '}',
481 '[',
482 ']',
483 '=',
484 ';',
485 '?',
paulburdick763064b2007-06-27 23:25:55 +0000486 "%20",
487 "%22",
488 "%3c", // <
489 "%253c", // <
490 "%3e", // >
491 "%0e", // >
492 "%28", // (
493 "%29", // )
494 "%2528", // (
495 "%26", // &
496 "%24", // $
497 "%3f", // ?
498 "%3b", // ;
499 "%3d" // =
Derek Allard15dcf492008-05-12 21:37:04 +0000500 );
501
502 return stripslashes(str_replace($bad, '', $str));
paulburdick763064b2007-06-27 23:25:55 +0000503 }
Derek Allard15dcf492008-05-12 21:37:04 +0000504
paulburdick763064b2007-06-27 23:25:55 +0000505 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000506
paulburdick763064b2007-06-27 23:25:55 +0000507 /**
Derek Allarda72b60d2007-01-31 23:56:11 +0000508 * XSS Clean
509 *
510 * Sanitizes data so that Cross Site Scripting Hacks can be
511 * prevented.  This function does a fair amount of work but
512 * it is extremely thorough, designed to prevent even the
513 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
514 * of course, but I haven't been able to get anything passed
515 * the filter.
516 *
517 * Note: This function should only be used to deal with data
518 * upon submission.  It's not something that should
519 * be used for general runtime processing.
520 *
521 * This function was based in part on some code and ideas I
522 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
523 *
524 * To help develop this script I used this great list of
525 * vulnerabilities along with a few other hacks I've
526 * harvested from examining vulnerabilities in other programs:
527 * http://ha.ckers.org/xss.html
528 *
529 * @access public
530 * @param string
531 * @return string
532 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000533 function xss_clean($str)
Derek Jones53437de2008-05-12 18:07:08 +0000534 {
535 /*
536 * Is the string an array?
537 *
538 */
539 if (is_array($str))
540 {
541 while (list($key) = each($str))
542 {
543 $str[$key] = $this->xss_clean($str[$key]);
544 }
Derek Allard15dcf492008-05-12 21:37:04 +0000545
Derek Jones53437de2008-05-12 18:07:08 +0000546 return $str;
547 }
Derek Allard15dcf492008-05-12 21:37:04 +0000548
Derek Allarda72b60d2007-01-31 23:56:11 +0000549 /*
550 * Remove Null Characters
551 *
552 * This prevents sandwiching null characters
553 * between ascii characters, like Java\0script.
554 *
555 */
556 $str = preg_replace('/\0+/', '', $str);
557 $str = preg_replace('/(\\\\0)+/', '', $str);
558
559 /*
Derek Jones53437de2008-05-12 18:07:08 +0000560 * Protect GET variables in URLs
561 */
562
563 // 901119URL5918AMP18930PROTECT8198
564
565 $str = preg_replace('|\&([a-z\_0-9]+)\=([a-z\_0-9]+)|i', $this->xss_hash()."\\1=\\2", $str);
566
567 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000568 * Validate standard character entities
569 *
570 * Add a semicolon if missing. We do this to enable
571 * the conversion of entities to ASCII later.
572 *
573 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000574 $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000575
Derek Allarda72b60d2007-01-31 23:56:11 +0000576 /*
Derek Jones48bb32a2007-07-12 13:10:42 +0000577 * Validate UTF16 two byte encoding (x00)
Derek Allarda72b60d2007-01-31 23:56:11 +0000578 *
579 * Just as above, adds a semicolon if missing.
580 *
581 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000582 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000583
584 /*
Derek Jones53437de2008-05-12 18:07:08 +0000585 * Un-Protect GET variables in URLs
586 */
587
588 $str = str_replace($this->xss_hash(), '&', $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000589
Derek Jones53437de2008-05-12 18:07:08 +0000590 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000591 * URL Decode
592 *
593 * Just in case stuff like this is submitted:
594 *
595 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
596 *
Derek Jonesab32a422008-02-04 22:02:11 +0000597 * Note: Use rawurldecode() so it does not remove plus signs
Derek Allarda72b60d2007-01-31 23:56:11 +0000598 *
Derek Allard15dcf492008-05-12 21:37:04 +0000599 */
Derek Jonesab32a422008-02-04 22:02:11 +0000600 $str = rawurldecode($str);
Derek Allard15dcf492008-05-12 21:37:04 +0000601
Derek Allarda72b60d2007-01-31 23:56:11 +0000602 /*
Derek Jones303c9cb2007-07-12 19:12:37 +0000603 * Convert character entities to ASCII
Derek Allarda72b60d2007-01-31 23:56:11 +0000604 *
605 * This permits our tests below to work reliably.
606 * We only convert entities that are within tags since
607 * these are the ones that will pose security problems.
608 *
609 */
Derek Allard15dcf492008-05-12 21:37:04 +0000610
Derek Jones303c9cb2007-07-12 19:12:37 +0000611 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
612
613 $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
614
615 /*
Derek Allard15dcf492008-05-12 21:37:04 +0000616
Derek Jones303c9cb2007-07-12 19:12:37 +0000617 Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
Derek Allard15dcf492008-05-12 21:37:04 +0000618
Derek Jones303c9cb2007-07-12 19:12:37 +0000619 if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches))
620 {
621 for ($i = 0; $i < count($matches[0]); $i++)
Derek Allarda72b60d2007-01-31 23:56:11 +0000622 {
Derek Jones303c9cb2007-07-12 19:12:37 +0000623 if (stristr($matches[0][$i], '>'))
624 {
625 $str = str_replace( $matches['0'][$i],
626 str_replace('>', '&lt;', $matches[0][$i]),
627 $str);
628 }
629 }
630 }
631
632 if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches))
633 {
634 for ($i = 0; $i < count($matches[0]); $i++)
635 {
636 $str = str_replace($matches[0][$i],
637 $this->_html_entity_decode($matches[0][$i], $charset),
Derek Allarda72b60d2007-01-31 23:56:11 +0000638 $str);
639 }
640 }
Derek Jones303c9cb2007-07-12 19:12:37 +0000641 */
Derek Allard15dcf492008-05-12 21:37:04 +0000642
Derek Jones48bb32a2007-07-12 13:10:42 +0000643 /*
644 * Convert all tabs to spaces
645 *
646 * This prevents strings like this: ja vascript
647 * NOTE: we deal with spaces between characters later.
648 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
649 * so we use str_replace.
650 *
651 */
Derek Allard15dcf492008-05-12 21:37:04 +0000652
653 if (strpos($str, "\t") !== FALSE) {
654 $str = str_replace("\t", " ", $str);
655 }
Derek Jones48bb32a2007-07-12 13:10:42 +0000656
Derek Allarda72b60d2007-01-31 23:56:11 +0000657 /*
658 * Not Allowed Under Any Conditions
Derek Allard15dcf492008-05-12 21:37:04 +0000659 */
Derek Allarda72b60d2007-01-31 23:56:11 +0000660 $bad = array(
661 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000662 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000663 '.parentNode' => '[removed]',
664 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000665 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000666 '-moz-binding' => '[removed]',
667 '<!--' => '&lt;!--',
668 '-->' => '--&gt;',
Derek Allard751506e2008-05-12 21:43:54 +0000669 '<![CDATA[' => '&lt;![CDATA['
Derek Jones48bb32a2007-07-12 13:10:42 +0000670 );
671
672 foreach ($bad as $key => $val)
673 {
674 $str = str_replace($key, $val, $str);
675 }
676
677 $bad = array(
Derek Allarda72b60d2007-01-31 23:56:11 +0000678 "javascript\s*:" => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000679 "expression\s*\(" => '[removed]', // CSS and IE
Derek Jones48bb32a2007-07-12 13:10:42 +0000680 "Redirect\s+302" => '[removed]'
Derek Allarda72b60d2007-01-31 23:56:11 +0000681 );
Derek Allard15dcf492008-05-12 21:37:04 +0000682
Derek Allarda72b60d2007-01-31 23:56:11 +0000683 foreach ($bad as $key => $val)
684 {
685 $str = preg_replace("#".$key."#i", $val, $str);
686 }
Derek Allard15dcf492008-05-12 21:37:04 +0000687
Derek Allarda72b60d2007-01-31 23:56:11 +0000688 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000689 * Makes PHP tags safe
690 *
691 * Note: XML tags are inadvertently replaced too:
692 *
693 * <?xml
694 *
695 * But it doesn't seem to pose a problem.
696 *
Derek Allard15dcf492008-05-12 21:37:04 +0000697 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000698 $str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000699
Derek Allarda72b60d2007-01-31 23:56:11 +0000700 /*
701 * Compact any exploded words
702 *
703 * This corrects words like: j a v a s c r i p t
704 * These words are compacted back to their correct state.
705 *
Derek Allard15dcf492008-05-12 21:37:04 +0000706 */
paulburdickb614d392007-06-26 21:58:56 +0000707 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
Derek Allarda72b60d2007-01-31 23:56:11 +0000708 foreach ($words as $word)
709 {
710 $temp = '';
711 for ($i = 0; $i < strlen($word); $i++)
712 {
713 $temp .= substr($word, $i, 1)."\s*";
714 }
Derek Allard15dcf492008-05-12 21:37:04 +0000715
Derek Jones01f72ca2007-05-04 18:19:17 +0000716 // We only want to do this when it is followed by a non-word character
717 // That way valid stuff like "dealer to" does not become "dealerto"
718 $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000719 }
Derek Allard15dcf492008-05-12 21:37:04 +0000720
Derek Allarda72b60d2007-01-31 23:56:11 +0000721 /*
722 * Remove disallowed Javascript in links or img tags
paulburdick391eb032007-06-27 22:58:24 +0000723 */
724 do
725 {
726 $original = $str;
Derek Allard15dcf492008-05-12 21:37:04 +0000727
Derek Jones48bb32a2007-07-12 13:10:42 +0000728 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '</a>') !== FALSE) OR
729 preg_match("/<\/a>/i", $str))
730 {
731 $str = preg_replace_callback("#<a.*?</a>#si", array($this, '_js_link_removal'), $str);
732 }
Derek Allard15dcf492008-05-12 21:37:04 +0000733
Derek Jones48bb32a2007-07-12 13:10:42 +0000734 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '<img') !== FALSE) OR
735 preg_match("/img/i", $str))
736 {
737 $str = preg_replace_callback("#<img.*?".">#si", array($this, '_js_img_removal'), $str);
738 }
Derek Allard15dcf492008-05-12 21:37:04 +0000739
Derek Jones48bb32a2007-07-12 13:10:42 +0000740 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && (stripos($str, 'script') !== FALSE OR stripos($str, 'xss') !== FALSE)) OR
741 preg_match("/(script|xss)/i", $str))
742 {
743 $str = preg_replace("#</*(script|xss).*?\>#si", "", $str);
744 }
paulburdick391eb032007-06-27 22:58:24 +0000745 }
746 while($original != $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000747
paulburdick391eb032007-06-27 22:58:24 +0000748 unset($original);
Derek Allarda72b60d2007-01-31 23:56:11 +0000749
750 /*
751 * Remove JavaScript Event Handlers
752 *
753 * Note: This code is a little blunt. It removes
754 * the event handler and anything up to the closing >,
755 * but it's unlikely to be a problem.
756 *
Derek Allard15dcf492008-05-12 21:37:04 +0000757 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000758 $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns');
759 $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "&lt;\\1\\2\\3&gt;", $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000760
Derek Allarda72b60d2007-01-31 23:56:11 +0000761 /*
762 * Sanitize naughty HTML elements
763 *
764 * If a tag containing any of the words in the list
765 * below is found, the tag gets converted to entities.
766 *
767 * So this: <blink>
768 * Becomes: &lt;blink&gt;
769 *
Derek Allard15dcf492008-05-12 21:37:04 +0000770 */
Derek Allarda72b60d2007-01-31 23:56:11 +0000771 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000772
Derek Allarda72b60d2007-01-31 23:56:11 +0000773 /*
774 * Sanitize naughty scripting elements
775 *
776 * Similar to above, only instead of looking for
777 * tags it looks for PHP and JavaScript commands
778 * that are disallowed. Rather than removing the
779 * code, it simply converts the parenthesis to entities
780 * rendering the code un-executable.
781 *
782 * For example: eval('some code')
783 * Becomes: eval&#40;'some code'&#41;
784 *
785 */
paulburdick033ef022007-06-26 21:52:52 +0000786 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000787
Derek Allarda72b60d2007-01-31 23:56:11 +0000788 /*
789 * Final clean up
790 *
791 * This adds a bit of extra precaution in case
792 * something got through the above filters
793 *
Derek Allard15dcf492008-05-12 21:37:04 +0000794 */
Derek Allarda72b60d2007-01-31 23:56:11 +0000795 $bad = array(
796 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000797 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000798 '.parentNode' => '[removed]',
799 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000800 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000801 '-moz-binding' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000802 '<!--' => '&lt;!--',
Derek Jones48bb32a2007-07-12 13:10:42 +0000803 '-->' => '--&gt;',
804 '<!CDATA[' => '&lt;![CDATA['
Derek Allarda72b60d2007-01-31 23:56:11 +0000805 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000806
Derek Allarda72b60d2007-01-31 23:56:11 +0000807 foreach ($bad as $key => $val)
808 {
Derek Jones48bb32a2007-07-12 13:10:42 +0000809 $str = str_replace($key, $val, $str);
810 }
811
812 $bad = array(
813 "javascript\s*:" => '[removed]',
814 "expression\s*\(" => '[removed]', // CSS and IE
815 "Redirect\s+302" => '[removed]'
816 );
Derek Allard15dcf492008-05-12 21:37:04 +0000817
Derek Jones48bb32a2007-07-12 13:10:42 +0000818 foreach ($bad as $key => $val)
819 {
820 $str = preg_replace("#".$key."#i", $val, $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000821 }
Derek Allard15dcf492008-05-12 21:37:04 +0000822
823
Derek Allarda72b60d2007-01-31 23:56:11 +0000824 log_message('debug', "XSS Filtering completed");
825 return $str;
826 }
827
828 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000829
Derek Jones01f72ca2007-05-04 18:19:17 +0000830 /**
Derek Jones53437de2008-05-12 18:07:08 +0000831 * Random Hash for protecting URLs
832 *
833 * @access public
834 * @return string
835 */
836 function xss_hash()
Derek Allard15dcf492008-05-12 21:37:04 +0000837 {
Derek Jones53437de2008-05-12 18:07:08 +0000838 if ($this->xss_hash == '')
839 {
840 if (phpversion() >= 4.2)
841 mt_srand();
842 else
843 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
Derek Allard15dcf492008-05-12 21:37:04 +0000844
Derek Jones53437de2008-05-12 18:07:08 +0000845 $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
846 }
Derek Allard15dcf492008-05-12 21:37:04 +0000847
Derek Jones53437de2008-05-12 18:07:08 +0000848 return $this->xss_hash;
849 }
850
851 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000852
Derek Jones53437de2008-05-12 18:07:08 +0000853 /**
Derek Jones01f72ca2007-05-04 18:19:17 +0000854 * JS Link Removal
855 *
856 * Callback function for xss_clean() to sanitize links
857 * This limits the PCRE backtracks, making it more performance friendly
858 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
859 * PHP 5.2+ on link-heavy strings
860 *
861 * @access private
862 * @param array
863 * @return string
864 */
865 function _js_link_removal($match)
866 {
867 return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
868 }
Derek Allard15dcf492008-05-12 21:37:04 +0000869
Derek Jones01f72ca2007-05-04 18:19:17 +0000870 /**
871 * JS Image Removal
872 *
873 * Callback function for xss_clean() to sanitize image tags
874 * This limits the PCRE backtracks, making it more performance friendly
875 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
876 * PHP 5.2+ on image tag heavy strings
877 *
878 * @access private
879 * @param array
880 * @return string
881 */
882 function _js_img_removal($match)
883 {
884 return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
885 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000886
Derek Jones01f72ca2007-05-04 18:19:17 +0000887 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000888
Derek Jones303c9cb2007-07-12 19:12:37 +0000889 /**
890 * Attribute Conversion
891 *
892 * Used as a callback for XSS Clean
893 *
894 * @access public
895 * @param array
896 * @return string
897 */
898 function _attribute_conversion($match)
899 {
900 return str_replace('>', '&lt;', $match[0]);
901 }
Derek Allard15dcf492008-05-12 21:37:04 +0000902
Derek Jones303c9cb2007-07-12 19:12:37 +0000903 // --------------------------------------------------------------------
904
905 /**
906 * HTML Entity Decode Callback
907 *
908 * Used as a callback for XSS Clean
909 *
910 * @access public
911 * @param array
912 * @return string
913 */
914 function _html_entity_decode_callback($match)
915 {
Derek Jones6159d1d2007-07-16 13:04:46 +0000916 global $CFG;
917 $charset = $CFG->item('charset');
Derek Jones303c9cb2007-07-12 19:12:37 +0000918
919 return $this->_html_entity_decode($match[0], strtoupper($charset));
920 }
921
922 // --------------------------------------------------------------------
Derek Allard15dcf492008-05-12 21:37:04 +0000923
Derek Allarda72b60d2007-01-31 23:56:11 +0000924 /**
925 * HTML Entities Decode
926 *
927 * This function is a replacement for html_entity_decode()
928 *
929 * In some versions of PHP the native function does not work
930 * when UTF-8 is the specified character set, so this gives us
931 * a work-around. More info here:
932 * http://bugs.php.net/bug.php?id=25670
933 *
934 * @access private
935 * @param string
936 * @param string
937 * @return string
938 */
939 /* -------------------------------------------------
940 /* Replacement for html_entity_decode()
941 /* -------------------------------------------------*/
Derek Allard15dcf492008-05-12 21:37:04 +0000942
Derek Allarda72b60d2007-01-31 23:56:11 +0000943 /*
944 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
945 character set, and the PHP developers said they were not back porting the
946 fix to versions other than PHP 5.x.
947 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000948 function _html_entity_decode($str, $charset='UTF-8')
Derek Allarda72b60d2007-01-31 23:56:11 +0000949 {
950 if (stristr($str, '&') === FALSE) return $str;
Derek Allard15dcf492008-05-12 21:37:04 +0000951
Derek Allarda72b60d2007-01-31 23:56:11 +0000952 // The reason we are not using html_entity_decode() by itself is because
953 // while it is not technically correct to leave out the semicolon
954 // at the end of an entity most browsers will still interpret the entity
955 // correctly. html_entity_decode() does not convert entities without
956 // semicolons, so we are left with our own little solution here. Bummer.
Derek Allard15dcf492008-05-12 21:37:04 +0000957
Derek Allarda72b60d2007-01-31 23:56:11 +0000958 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
959 {
960 $str = html_entity_decode($str, ENT_COMPAT, $charset);
961 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
962 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
963 }
Derek Allard15dcf492008-05-12 21:37:04 +0000964
Derek Allarda72b60d2007-01-31 23:56:11 +0000965 // Numeric Entities
966 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
967 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
Derek Allard15dcf492008-05-12 21:37:04 +0000968
Derek Allarda72b60d2007-01-31 23:56:11 +0000969 // Literal Entities - Slightly slow so we do another check
970 if (stristr($str, '&') === FALSE)
971 {
972 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
973 }
Derek Allard15dcf492008-05-12 21:37:04 +0000974
Derek Allarda72b60d2007-01-31 23:56:11 +0000975 return $str;
976 }
977
978}
979// END Input class
Derek Jones53437de2008-05-12 18:07:08 +0000980
981/* End of file Input.php */
Derek Jonesa3ffbbb2008-05-11 18:18:29 +0000982/* Location: ./system/libraries/Input.php */