blob: 1c5682eb72e4119b78090b6e3405afa92113d151 [file] [log] [blame]
Derek Allarda72b60d2007-01-31 23:56:11 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
Derek Allardd2df9bc2007-04-15 17:41:17 +00003 * CodeIgniter
Derek Allarda72b60d2007-01-31 23:56:11 +00004 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
Derek Allard3d879d52008-01-18 19:41:32 +00008 * @author ExpressionEngine Dev Team
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Jones7a9193a2008-01-21 18:39:20 +000010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
Derek Allarda72b60d2007-01-31 23:56:11 +000012 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
Derek Allard3d879d52008-01-18 19:41:32 +000026 * @author ExpressionEngine Dev Team
Derek Jones7a9193a2008-01-21 18:39:20 +000027 * @link http://codeigniter.com/user_guide/libraries/input.html
Derek Allarda72b60d2007-01-31 23:56:11 +000028 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
31 var $ip_address = FALSE;
32 var $user_agent = FALSE;
33 var $allow_get_array = FALSE;
34
35 /**
36 * Constructor
37 *
38 * Sets whether to globally enable the XSS processing
39 * and whether to allow the $_GET array
40 *
41 * @access public
42 */
43 function CI_Input()
44 {
45 log_message('debug', "Input Class Initialized");
46
47 $CFG =& load_class('Config');
48 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
49 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
50 $this->_sanitize_globals();
51 }
52
53 // --------------------------------------------------------------------
54
55 /**
56 * Sanitize Globals
57 *
58 * This function does the following:
59 *
60 * Unsets $_GET data (if query strings are not enabled)
61 *
62 * Unsets all globals if register_globals is enabled
63 *
64 * Standardizes newline characters to \n
65 *
66 * @access private
67 * @return void
68 */
69 function _sanitize_globals()
70 {
Derek Jonesd85a11e2008-01-24 20:48:07 +000071 // Would kind of be "wrong" to unset any of these GLOBALS
72 $protected = array('_SERVER', '_GET', '_POST', '_FILES', '_REQUEST', '_SESSION', '_ENV', 'GLOBALS', 'HTTP_RAW_POST_DATA',
73 'system_folder', 'application_folder', 'BM', 'EXT', 'CFG', 'URI', 'RTR', 'OUT', 'IN');
paulburdick8816aaa2007-06-27 23:07:36 +000074
Derek Jones0ea06fd2008-02-05 15:23:51 +000075 // Unset globals for security.
Rick Ellisbb2041d2007-06-09 00:16:13 +000076 // This is effectively the same as register_globals = off
Derek Jones0ea06fd2008-02-05 15:23:51 +000077 foreach (array($_GET, $_POST, $_COOKIE, $_SERVER, $_FILES, $_ENV, (isset($_SESSION) && is_array($_SESSION)) ? $_SESSION : array()) as $global)
Derek Allarda72b60d2007-01-31 23:56:11 +000078 {
79 if ( ! is_array($global))
80 {
paulburdick8816aaa2007-06-27 23:07:36 +000081 if ( ! in_array($global, $protected))
82 {
Derek Jones0ea06fd2008-02-05 15:23:51 +000083 unset($GLOBALS[$global]);
paulburdick8816aaa2007-06-27 23:07:36 +000084 }
Derek Allarda72b60d2007-01-31 23:56:11 +000085 }
86 else
87 {
88 foreach ($global as $key => $val)
89 {
paulburdick8816aaa2007-06-27 23:07:36 +000090 if ( ! in_array($key, $protected))
91 {
Derek Jones0ea06fd2008-02-05 15:23:51 +000092 unset($GLOBALS[$key]);
93 }
94
95 if (is_array($val))
96 {
97 foreach($val as $k => $v)
98 {
99 if ( ! in_array($k, $protected))
100 {
101 unset($GLOBALS[$k]);
102 }
103 }
paulburdick8816aaa2007-06-27 23:07:36 +0000104 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000105 }
106 }
107 }
108
109 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
110 if ($this->allow_get_array == FALSE)
111 {
112 $_GET = array();
113 }
Rick Ellis112569d2007-02-26 19:19:08 +0000114 else
115 {
116 if (is_array($_GET) AND count($_GET) > 0)
117 {
118 foreach($_GET as $key => $val)
119 {
120 $_GET[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
121 }
122 }
123 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000124
125 // Clean $_POST Data
126 if (is_array($_POST) AND count($_POST) > 0)
127 {
128 foreach($_POST as $key => $val)
129 {
130 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
131 }
132 }
133
134 // Clean $_COOKIE Data
135 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
136 {
137 foreach($_COOKIE as $key => $val)
138 {
139 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
140 }
141 }
142
143 log_message('debug', "Global POST and COOKIE data sanitized");
144 }
145
146 // --------------------------------------------------------------------
147
148 /**
149 * Clean Input Data
150 *
151 * This is a helper function. It escapes data and
152 * standardizes newline characters to \n
153 *
154 * @access private
155 * @param string
156 * @return string
157 */
158 function _clean_input_data($str)
159 {
160 if (is_array($str))
161 {
162 $new_array = array();
163 foreach ($str as $key => $val)
164 {
165 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
166 }
167 return $new_array;
168 }
169
Rick Ellisbb2041d2007-06-09 00:16:13 +0000170 // We strip slashes if magic quotes is on to keep things consistent
171 if (get_magic_quotes_gpc())
172 {
173 $str = stripslashes($str);
174 }
175
176 // Should we filter the input data?
Derek Allarda72b60d2007-01-31 23:56:11 +0000177 if ($this->use_xss_clean === TRUE)
178 {
179 $str = $this->xss_clean($str);
180 }
181
182 // Standardize newlines
183 return preg_replace("/\015\012|\015|\012/", "\n", $str);
184 }
185
186 // --------------------------------------------------------------------
187
188 /**
189 * Clean Keys
190 *
191 * This is a helper function. To prevent malicious users
192 * from trying to exploit keys we make sure that keys are
193 * only named with alpha-numeric text and a few other items.
194 *
195 * @access private
196 * @param string
197 * @return string
198 */
199 function _clean_input_keys($str)
200 {
201 if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
202 {
203 exit('Disallowed Key Characters.');
204 }
Rick Ellisbb2041d2007-06-09 00:16:13 +0000205
Derek Allarda72b60d2007-01-31 23:56:11 +0000206 return $str;
207 }
Rick Ellis112569d2007-02-26 19:19:08 +0000208
209 // --------------------------------------------------------------------
210
211 /**
212 * Fetch an item from the GET array
213 *
214 * @access public
215 * @param string
216 * @param bool
217 * @return string
218 */
Derek Allard87d1eeb2007-03-01 13:20:43 +0000219 function get($index = '', $xss_clean = FALSE)
Rick Ellis112569d2007-02-26 19:19:08 +0000220 {
221 if ( ! isset($_GET[$index]))
222 {
223 return FALSE;
224 }
225
226 if ($xss_clean === TRUE)
227 {
228 if (is_array($_GET[$index]))
229 {
230 foreach($_GET[$index] as $key => $val)
231 {
232 $_GET[$index][$key] = $this->xss_clean($val);
233 }
234 }
235 else
236 {
237 return $this->xss_clean($_GET[$index]);
238 }
239 }
240
241 return $_GET[$index];
242 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000243
244 // --------------------------------------------------------------------
245
246 /**
247 * Fetch an item from the POST array
248 *
249 * @access public
250 * @param string
251 * @param bool
252 * @return string
253 */
254 function post($index = '', $xss_clean = FALSE)
255 {
256 if ( ! isset($_POST[$index]))
257 {
258 return FALSE;
259 }
260
261 if ($xss_clean === TRUE)
262 {
263 if (is_array($_POST[$index]))
264 {
265 foreach($_POST[$index] as $key => $val)
266 {
267 $_POST[$index][$key] = $this->xss_clean($val);
268 }
269 }
270 else
271 {
272 return $this->xss_clean($_POST[$index]);
273 }
274 }
275
276 return $_POST[$index];
277 }
278
279 // --------------------------------------------------------------------
280
281 /**
282 * Fetch an item from the COOKIE array
283 *
284 * @access public
285 * @param string
286 * @param bool
287 * @return string
288 */
289 function cookie($index = '', $xss_clean = FALSE)
290 {
291 if ( ! isset($_COOKIE[$index]))
292 {
293 return FALSE;
294 }
295
296 if ($xss_clean === TRUE)
297 {
298 if (is_array($_COOKIE[$index]))
299 {
300 $cookie = array();
301 foreach($_COOKIE[$index] as $key => $val)
302 {
303 $cookie[$key] = $this->xss_clean($val);
304 }
305
306 return $cookie;
307 }
308 else
309 {
310 return $this->xss_clean($_COOKIE[$index]);
311 }
312 }
313 else
314 {
315 return $_COOKIE[$index];
316 }
317 }
318
319 // --------------------------------------------------------------------
320
321 /**
322 * Fetch an item from the SERVER array
323 *
324 * @access public
325 * @param string
326 * @param bool
327 * @return string
328 */
329 function server($index = '', $xss_clean = FALSE)
330 {
331 if ( ! isset($_SERVER[$index]))
332 {
333 return FALSE;
334 }
335
336 if ($xss_clean === TRUE)
337 {
338 return $this->xss_clean($_SERVER[$index]);
339 }
340
341 return $_SERVER[$index];
342 }
343
344 // --------------------------------------------------------------------
345
346 /**
347 * Fetch the IP Address
348 *
349 * @access public
350 * @return string
351 */
352 function ip_address()
353 {
354 if ($this->ip_address !== FALSE)
355 {
356 return $this->ip_address;
357 }
358
359 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
360 {
361 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
362 }
363 elseif ($this->server('REMOTE_ADDR'))
364 {
365 $this->ip_address = $_SERVER['REMOTE_ADDR'];
366 }
367 elseif ($this->server('HTTP_CLIENT_IP'))
368 {
369 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
370 }
371 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
372 {
373 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
374 }
375
376 if ($this->ip_address === FALSE)
377 {
378 $this->ip_address = '0.0.0.0';
379 return $this->ip_address;
380 }
381
382 if (strstr($this->ip_address, ','))
383 {
384 $x = explode(',', $this->ip_address);
385 $this->ip_address = end($x);
386 }
387
388 if ( ! $this->valid_ip($this->ip_address))
389 {
390 $this->ip_address = '0.0.0.0';
391 }
392
393 return $this->ip_address;
394 }
395
396 // --------------------------------------------------------------------
397
398 /**
399 * Validate IP Address
400 *
Rick Ellise666afc2007-06-11 05:03:11 +0000401 * Updated version suggested by Geert De Deckere
402 *
Derek Allarda72b60d2007-01-31 23:56:11 +0000403 * @access public
404 * @param string
405 * @return string
406 */
407 function valid_ip($ip)
408 {
Rick Ellise666afc2007-06-11 05:03:11 +0000409 $ip_segments = explode('.', $ip);
410
411 // Always 4 segments needed
412 if (count($ip_segments) != 4)
Rick Ellis112569d2007-02-26 19:19:08 +0000413 {
414 return FALSE;
415 }
Rick Ellis65e8f0e2007-06-12 03:53:21 +0000416 // IP can not start with 0
Rick Ellis39213142007-06-12 03:53:12 +0000417 if (substr($ip_segments[0], 0, 1) == '0')
Rick Ellis112569d2007-02-26 19:19:08 +0000418 {
Rick Ellise666afc2007-06-11 05:03:11 +0000419 return FALSE;
420 }
421 // Check each segment
422 foreach ($ip_segments as $segment)
423 {
424 // IP segments must be digits and can not be
425 // longer than 3 digits or greater then 255
Rick Ellisba648932007-06-12 03:39:38 +0000426 if (preg_match("/[^0-9]/", $segment) OR $segment > 255 OR strlen($segment) > 3)
Rick Ellis112569d2007-02-26 19:19:08 +0000427 {
Rick Ellise666afc2007-06-11 05:03:11 +0000428 return FALSE;
Rick Ellis112569d2007-02-26 19:19:08 +0000429 }
430 }
431
432 return TRUE;
Derek Allarda72b60d2007-01-31 23:56:11 +0000433 }
434
435 // --------------------------------------------------------------------
436
437 /**
438 * User Agent
439 *
440 * @access public
441 * @return string
442 */
443 function user_agent()
444 {
445 if ($this->user_agent !== FALSE)
446 {
447 return $this->user_agent;
448 }
449
450 $this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
451
452 return $this->user_agent;
453 }
454
455 // --------------------------------------------------------------------
456
457 /**
paulburdick763064b2007-06-27 23:25:55 +0000458 * Filename Security
459 *
460 * @access public
461 * @param string
462 * @return string
463 */
464 function filename_security($str)
465 {
466 $bad = array(
467 "../",
468 "./",
469 "<!--",
470 "-->",
471 "<",
472 ">",
473 "'",
474 '"',
475 '&',
476 '$',
477 '#',
478 '{',
479 '}',
480 '[',
481 ']',
482 '=',
483 ';',
484 '?',
paulburdick763064b2007-06-27 23:25:55 +0000485 "%20",
486 "%22",
487 "%3c", // <
488 "%253c", // <
489 "%3e", // >
490 "%0e", // >
491 "%28", // (
492 "%29", // )
493 "%2528", // (
494 "%26", // &
495 "%24", // $
496 "%3f", // ?
497 "%3b", // ;
498 "%3d" // =
499 );
500
501 return stripslashes(str_replace($bad, '', $str));
502 }
503
504 // --------------------------------------------------------------------
505
506 /**
Derek Allarda72b60d2007-01-31 23:56:11 +0000507 * XSS Clean
508 *
509 * Sanitizes data so that Cross Site Scripting Hacks can be
510 * prevented.  This function does a fair amount of work but
511 * it is extremely thorough, designed to prevent even the
512 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
513 * of course, but I haven't been able to get anything passed
514 * the filter.
515 *
516 * Note: This function should only be used to deal with data
517 * upon submission.  It's not something that should
518 * be used for general runtime processing.
519 *
520 * This function was based in part on some code and ideas I
521 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
522 *
523 * To help develop this script I used this great list of
524 * vulnerabilities along with a few other hacks I've
525 * harvested from examining vulnerabilities in other programs:
526 * http://ha.ckers.org/xss.html
527 *
528 * @access public
529 * @param string
530 * @return string
531 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000532 function xss_clean($str)
Derek Allarda72b60d2007-01-31 23:56:11 +0000533 {
534 /*
535 * Remove Null Characters
536 *
537 * This prevents sandwiching null characters
538 * between ascii characters, like Java\0script.
539 *
540 */
541 $str = preg_replace('/\0+/', '', $str);
542 $str = preg_replace('/(\\\\0)+/', '', $str);
543
544 /*
545 * Validate standard character entities
546 *
547 * Add a semicolon if missing. We do this to enable
548 * the conversion of entities to ASCII later.
549 *
550 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000551 $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000552
553 /*
Derek Jones48bb32a2007-07-12 13:10:42 +0000554 * Validate UTF16 two byte encoding (x00)
Derek Allarda72b60d2007-01-31 23:56:11 +0000555 *
556 * Just as above, adds a semicolon if missing.
557 *
558 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000559 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000560
561 /*
562 * URL Decode
563 *
564 * Just in case stuff like this is submitted:
565 *
566 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
567 *
Derek Jonesab32a422008-02-04 22:02:11 +0000568 * Note: Use rawurldecode() so it does not remove plus signs
Derek Allarda72b60d2007-01-31 23:56:11 +0000569 *
570 */
Derek Jonesab32a422008-02-04 22:02:11 +0000571 $str = rawurldecode($str);
572
Derek Allarda72b60d2007-01-31 23:56:11 +0000573 /*
Derek Jones303c9cb2007-07-12 19:12:37 +0000574 * Convert character entities to ASCII
Derek Allarda72b60d2007-01-31 23:56:11 +0000575 *
576 * This permits our tests below to work reliably.
577 * We only convert entities that are within tags since
578 * these are the ones that will pose security problems.
579 *
580 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000581
582 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
583
584 $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
585
586 /*
587
588 Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
589
590 if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches))
591 {
592 for ($i = 0; $i < count($matches[0]); $i++)
Derek Allarda72b60d2007-01-31 23:56:11 +0000593 {
Derek Jones303c9cb2007-07-12 19:12:37 +0000594 if (stristr($matches[0][$i], '>'))
595 {
596 $str = str_replace( $matches['0'][$i],
597 str_replace('>', '&lt;', $matches[0][$i]),
598 $str);
599 }
600 }
601 }
602
603 if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches))
604 {
605 for ($i = 0; $i < count($matches[0]); $i++)
606 {
607 $str = str_replace($matches[0][$i],
608 $this->_html_entity_decode($matches[0][$i], $charset),
Derek Allarda72b60d2007-01-31 23:56:11 +0000609 $str);
610 }
611 }
Derek Jones303c9cb2007-07-12 19:12:37 +0000612 */
613
Derek Jones48bb32a2007-07-12 13:10:42 +0000614 /*
615 * Convert all tabs to spaces
616 *
617 * This prevents strings like this: ja vascript
618 * NOTE: we deal with spaces between characters later.
619 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
620 * so we use str_replace.
621 *
622 */
623
624 $str = str_replace("\t", " ", $str);
625
Derek Allarda72b60d2007-01-31 23:56:11 +0000626 /*
627 * Not Allowed Under Any Conditions
628 */
629 $bad = array(
630 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000631 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000632 '.parentNode' => '[removed]',
633 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000634 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000635 '-moz-binding' => '[removed]',
636 '<!--' => '&lt;!--',
637 '-->' => '--&gt;',
638 '<!CDATA[' => '&lt;![CDATA['
639 );
640
641 foreach ($bad as $key => $val)
642 {
643 $str = str_replace($key, $val, $str);
644 }
645
646 $bad = array(
Derek Allarda72b60d2007-01-31 23:56:11 +0000647 "javascript\s*:" => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000648 "expression\s*\(" => '[removed]', // CSS and IE
Derek Jones48bb32a2007-07-12 13:10:42 +0000649 "Redirect\s+302" => '[removed]'
Derek Allarda72b60d2007-01-31 23:56:11 +0000650 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000651
Derek Allarda72b60d2007-01-31 23:56:11 +0000652 foreach ($bad as $key => $val)
653 {
654 $str = preg_replace("#".$key."#i", $val, $str);
655 }
656
657 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000658 * Makes PHP tags safe
659 *
660 * Note: XML tags are inadvertently replaced too:
661 *
662 * <?xml
663 *
664 * But it doesn't seem to pose a problem.
665 *
666 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000667 $str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000668
669 /*
670 * Compact any exploded words
671 *
672 * This corrects words like: j a v a s c r i p t
673 * These words are compacted back to their correct state.
674 *
675 */
paulburdickb614d392007-06-26 21:58:56 +0000676 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
Derek Allarda72b60d2007-01-31 23:56:11 +0000677 foreach ($words as $word)
678 {
679 $temp = '';
680 for ($i = 0; $i < strlen($word); $i++)
681 {
682 $temp .= substr($word, $i, 1)."\s*";
683 }
684
Derek Jones01f72ca2007-05-04 18:19:17 +0000685 // We only want to do this when it is followed by a non-word character
686 // That way valid stuff like "dealer to" does not become "dealerto"
687 $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000688 }
689
690 /*
691 * Remove disallowed Javascript in links or img tags
paulburdick391eb032007-06-27 22:58:24 +0000692 */
693 do
694 {
695 $original = $str;
Derek Jones48bb32a2007-07-12 13:10:42 +0000696
697 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '</a>') !== FALSE) OR
698 preg_match("/<\/a>/i", $str))
699 {
700 $str = preg_replace_callback("#<a.*?</a>#si", array($this, '_js_link_removal'), $str);
701 }
702
703 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '<img') !== FALSE) OR
704 preg_match("/img/i", $str))
705 {
706 $str = preg_replace_callback("#<img.*?".">#si", array($this, '_js_img_removal'), $str);
707 }
708
709 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && (stripos($str, 'script') !== FALSE OR stripos($str, 'xss') !== FALSE)) OR
710 preg_match("/(script|xss)/i", $str))
711 {
712 $str = preg_replace("#</*(script|xss).*?\>#si", "", $str);
713 }
paulburdick391eb032007-06-27 22:58:24 +0000714 }
715 while($original != $str);
716
717 unset($original);
Derek Allarda72b60d2007-01-31 23:56:11 +0000718
719 /*
720 * Remove JavaScript Event Handlers
721 *
722 * Note: This code is a little blunt. It removes
723 * the event handler and anything up to the closing >,
724 * but it's unlikely to be a problem.
725 *
726 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000727 $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns');
728 $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "&lt;\\1\\2\\3&gt;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000729
730 /*
731 * Sanitize naughty HTML elements
732 *
733 * If a tag containing any of the words in the list
734 * below is found, the tag gets converted to entities.
735 *
736 * So this: <blink>
737 * Becomes: &lt;blink&gt;
738 *
739 */
740 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
741
742 /*
743 * Sanitize naughty scripting elements
744 *
745 * Similar to above, only instead of looking for
746 * tags it looks for PHP and JavaScript commands
747 * that are disallowed. Rather than removing the
748 * code, it simply converts the parenthesis to entities
749 * rendering the code un-executable.
750 *
751 * For example: eval('some code')
752 * Becomes: eval&#40;'some code'&#41;
753 *
754 */
paulburdick033ef022007-06-26 21:52:52 +0000755 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000756
757 /*
758 * Final clean up
759 *
760 * This adds a bit of extra precaution in case
761 * something got through the above filters
762 *
763 */
764 $bad = array(
765 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000766 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000767 '.parentNode' => '[removed]',
768 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000769 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000770 '-moz-binding' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000771 '<!--' => '&lt;!--',
Derek Jones48bb32a2007-07-12 13:10:42 +0000772 '-->' => '--&gt;',
773 '<!CDATA[' => '&lt;![CDATA['
Derek Allarda72b60d2007-01-31 23:56:11 +0000774 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000775
Derek Allarda72b60d2007-01-31 23:56:11 +0000776 foreach ($bad as $key => $val)
777 {
Derek Jones48bb32a2007-07-12 13:10:42 +0000778 $str = str_replace($key, $val, $str);
779 }
780
781 $bad = array(
782 "javascript\s*:" => '[removed]',
783 "expression\s*\(" => '[removed]', // CSS and IE
784 "Redirect\s+302" => '[removed]'
785 );
786
787 foreach ($bad as $key => $val)
788 {
789 $str = preg_replace("#".$key."#i", $val, $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000790 }
791
792
793 log_message('debug', "XSS Filtering completed");
794 return $str;
795 }
796
797 // --------------------------------------------------------------------
Derek Jones01f72ca2007-05-04 18:19:17 +0000798
799 /**
800 * JS Link Removal
801 *
802 * Callback function for xss_clean() to sanitize links
803 * This limits the PCRE backtracks, making it more performance friendly
804 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
805 * PHP 5.2+ on link-heavy strings
806 *
807 * @access private
808 * @param array
809 * @return string
810 */
811 function _js_link_removal($match)
812 {
813 return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
814 }
815
816 /**
817 * JS Image Removal
818 *
819 * Callback function for xss_clean() to sanitize image tags
820 * This limits the PCRE backtracks, making it more performance friendly
821 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
822 * PHP 5.2+ on image tag heavy strings
823 *
824 * @access private
825 * @param array
826 * @return string
827 */
828 function _js_img_removal($match)
829 {
830 return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
831 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000832
Derek Jones01f72ca2007-05-04 18:19:17 +0000833 // --------------------------------------------------------------------
Derek Jones303c9cb2007-07-12 19:12:37 +0000834
835 /**
836 * Attribute Conversion
837 *
838 * Used as a callback for XSS Clean
839 *
840 * @access public
841 * @param array
842 * @return string
843 */
844 function _attribute_conversion($match)
845 {
846 return str_replace('>', '&lt;', $match[0]);
847 }
848
849 // --------------------------------------------------------------------
850
851 /**
852 * HTML Entity Decode Callback
853 *
854 * Used as a callback for XSS Clean
855 *
856 * @access public
857 * @param array
858 * @return string
859 */
860 function _html_entity_decode_callback($match)
861 {
Derek Jones6159d1d2007-07-16 13:04:46 +0000862 global $CFG;
863 $charset = $CFG->item('charset');
Derek Jones303c9cb2007-07-12 19:12:37 +0000864
865 return $this->_html_entity_decode($match[0], strtoupper($charset));
866 }
867
868 // --------------------------------------------------------------------
869
Derek Allarda72b60d2007-01-31 23:56:11 +0000870 /**
871 * HTML Entities Decode
872 *
873 * This function is a replacement for html_entity_decode()
874 *
875 * In some versions of PHP the native function does not work
876 * when UTF-8 is the specified character set, so this gives us
877 * a work-around. More info here:
878 * http://bugs.php.net/bug.php?id=25670
879 *
880 * @access private
881 * @param string
882 * @param string
883 * @return string
884 */
885 /* -------------------------------------------------
886 /* Replacement for html_entity_decode()
887 /* -------------------------------------------------*/
888
889 /*
890 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
891 character set, and the PHP developers said they were not back porting the
892 fix to versions other than PHP 5.x.
893 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000894 function _html_entity_decode($str, $charset='UTF-8')
Derek Allarda72b60d2007-01-31 23:56:11 +0000895 {
896 if (stristr($str, '&') === FALSE) return $str;
Derek Jones303c9cb2007-07-12 19:12:37 +0000897
Derek Allarda72b60d2007-01-31 23:56:11 +0000898 // The reason we are not using html_entity_decode() by itself is because
899 // while it is not technically correct to leave out the semicolon
900 // at the end of an entity most browsers will still interpret the entity
901 // correctly. html_entity_decode() does not convert entities without
902 // semicolons, so we are left with our own little solution here. Bummer.
903
904 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
905 {
906 $str = html_entity_decode($str, ENT_COMPAT, $charset);
907 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
908 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
909 }
910
911 // Numeric Entities
912 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
913 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
914
915 // Literal Entities - Slightly slow so we do another check
916 if (stristr($str, '&') === FALSE)
917 {
918 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
919 }
920
921 return $str;
922 }
923
924}
925// END Input class
adminb0dd10f2006-08-25 17:25:49 +0000926?>