blob: 1874b3790d928043b1926a5adb42f1edafb79af7 [file] [log] [blame]
Derek Allarda72b60d2007-01-31 23:56:11 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
Derek Allardd2df9bc2007-04-15 17:41:17 +00003 * CodeIgniter
Derek Allarda72b60d2007-01-31 23:56:11 +00004 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
Derek Allard3d879d52008-01-18 19:41:32 +00008 * @author ExpressionEngine Dev Team
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Jones7a9193a2008-01-21 18:39:20 +000010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
Derek Allarda72b60d2007-01-31 23:56:11 +000012 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
Derek Allard3d879d52008-01-18 19:41:32 +000026 * @author ExpressionEngine Dev Team
Derek Jones7a9193a2008-01-21 18:39:20 +000027 * @link http://codeigniter.com/user_guide/libraries/input.html
Derek Allarda72b60d2007-01-31 23:56:11 +000028 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
31 var $ip_address = FALSE;
32 var $user_agent = FALSE;
33 var $allow_get_array = FALSE;
34
35 /**
36 * Constructor
37 *
38 * Sets whether to globally enable the XSS processing
39 * and whether to allow the $_GET array
40 *
41 * @access public
42 */
43 function CI_Input()
44 {
45 log_message('debug', "Input Class Initialized");
46
47 $CFG =& load_class('Config');
48 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
49 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
50 $this->_sanitize_globals();
51 }
52
53 // --------------------------------------------------------------------
54
55 /**
56 * Sanitize Globals
57 *
58 * This function does the following:
59 *
60 * Unsets $_GET data (if query strings are not enabled)
61 *
62 * Unsets all globals if register_globals is enabled
63 *
64 * Standardizes newline characters to \n
65 *
66 * @access private
67 * @return void
68 */
69 function _sanitize_globals()
70 {
Derek Jonesd85a11e2008-01-24 20:48:07 +000071 // Would kind of be "wrong" to unset any of these GLOBALS
72 $protected = array('_SERVER', '_GET', '_POST', '_FILES', '_REQUEST', '_SESSION', '_ENV', 'GLOBALS', 'HTTP_RAW_POST_DATA',
73 'system_folder', 'application_folder', 'BM', 'EXT', 'CFG', 'URI', 'RTR', 'OUT', 'IN');
paulburdick8816aaa2007-06-27 23:07:36 +000074
Rick Ellisbb2041d2007-06-09 00:16:13 +000075 // Unset globals for securiy.
76 // This is effectively the same as register_globals = off
Derek Allarda72b60d2007-01-31 23:56:11 +000077 foreach (array($_GET, $_POST, $_COOKIE) as $global)
78 {
79 if ( ! is_array($global))
80 {
paulburdick8816aaa2007-06-27 23:07:36 +000081 if ( ! in_array($global, $protected))
82 {
Derek Jonesd85a11e2008-01-24 20:48:07 +000083 global $$global;
paulburdick8816aaa2007-06-27 23:07:36 +000084 $$global = NULL;
85 }
Derek Allarda72b60d2007-01-31 23:56:11 +000086 }
87 else
88 {
89 foreach ($global as $key => $val)
90 {
paulburdick8816aaa2007-06-27 23:07:36 +000091 if ( ! in_array($key, $protected))
92 {
93 global $$key;
94 $$key = NULL;
95 }
Derek Allarda72b60d2007-01-31 23:56:11 +000096 }
97 }
98 }
99
100 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
101 if ($this->allow_get_array == FALSE)
102 {
103 $_GET = array();
104 }
Rick Ellis112569d2007-02-26 19:19:08 +0000105 else
106 {
107 if (is_array($_GET) AND count($_GET) > 0)
108 {
109 foreach($_GET as $key => $val)
110 {
111 $_GET[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
112 }
113 }
114 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000115
116 // Clean $_POST Data
117 if (is_array($_POST) AND count($_POST) > 0)
118 {
119 foreach($_POST as $key => $val)
120 {
121 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
122 }
123 }
124
125 // Clean $_COOKIE Data
126 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
127 {
128 foreach($_COOKIE as $key => $val)
129 {
130 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
131 }
132 }
133
134 log_message('debug', "Global POST and COOKIE data sanitized");
135 }
136
137 // --------------------------------------------------------------------
138
139 /**
140 * Clean Input Data
141 *
142 * This is a helper function. It escapes data and
143 * standardizes newline characters to \n
144 *
145 * @access private
146 * @param string
147 * @return string
148 */
149 function _clean_input_data($str)
150 {
151 if (is_array($str))
152 {
153 $new_array = array();
154 foreach ($str as $key => $val)
155 {
156 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
157 }
158 return $new_array;
159 }
160
Rick Ellisbb2041d2007-06-09 00:16:13 +0000161 // We strip slashes if magic quotes is on to keep things consistent
162 if (get_magic_quotes_gpc())
163 {
164 $str = stripslashes($str);
165 }
166
167 // Should we filter the input data?
Derek Allarda72b60d2007-01-31 23:56:11 +0000168 if ($this->use_xss_clean === TRUE)
169 {
170 $str = $this->xss_clean($str);
171 }
172
173 // Standardize newlines
174 return preg_replace("/\015\012|\015|\012/", "\n", $str);
175 }
176
177 // --------------------------------------------------------------------
178
179 /**
180 * Clean Keys
181 *
182 * This is a helper function. To prevent malicious users
183 * from trying to exploit keys we make sure that keys are
184 * only named with alpha-numeric text and a few other items.
185 *
186 * @access private
187 * @param string
188 * @return string
189 */
190 function _clean_input_keys($str)
191 {
192 if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
193 {
194 exit('Disallowed Key Characters.');
195 }
Rick Ellisbb2041d2007-06-09 00:16:13 +0000196
Derek Allarda72b60d2007-01-31 23:56:11 +0000197 return $str;
198 }
Rick Ellis112569d2007-02-26 19:19:08 +0000199
200 // --------------------------------------------------------------------
201
202 /**
203 * Fetch an item from the GET array
204 *
205 * @access public
206 * @param string
207 * @param bool
208 * @return string
209 */
Derek Allard87d1eeb2007-03-01 13:20:43 +0000210 function get($index = '', $xss_clean = FALSE)
Rick Ellis112569d2007-02-26 19:19:08 +0000211 {
212 if ( ! isset($_GET[$index]))
213 {
214 return FALSE;
215 }
216
217 if ($xss_clean === TRUE)
218 {
219 if (is_array($_GET[$index]))
220 {
221 foreach($_GET[$index] as $key => $val)
222 {
223 $_GET[$index][$key] = $this->xss_clean($val);
224 }
225 }
226 else
227 {
228 return $this->xss_clean($_GET[$index]);
229 }
230 }
231
232 return $_GET[$index];
233 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000234
235 // --------------------------------------------------------------------
236
237 /**
238 * Fetch an item from the POST array
239 *
240 * @access public
241 * @param string
242 * @param bool
243 * @return string
244 */
245 function post($index = '', $xss_clean = FALSE)
246 {
247 if ( ! isset($_POST[$index]))
248 {
249 return FALSE;
250 }
251
252 if ($xss_clean === TRUE)
253 {
254 if (is_array($_POST[$index]))
255 {
256 foreach($_POST[$index] as $key => $val)
257 {
258 $_POST[$index][$key] = $this->xss_clean($val);
259 }
260 }
261 else
262 {
263 return $this->xss_clean($_POST[$index]);
264 }
265 }
266
267 return $_POST[$index];
268 }
269
270 // --------------------------------------------------------------------
271
272 /**
273 * Fetch an item from the COOKIE array
274 *
275 * @access public
276 * @param string
277 * @param bool
278 * @return string
279 */
280 function cookie($index = '', $xss_clean = FALSE)
281 {
282 if ( ! isset($_COOKIE[$index]))
283 {
284 return FALSE;
285 }
286
287 if ($xss_clean === TRUE)
288 {
289 if (is_array($_COOKIE[$index]))
290 {
291 $cookie = array();
292 foreach($_COOKIE[$index] as $key => $val)
293 {
294 $cookie[$key] = $this->xss_clean($val);
295 }
296
297 return $cookie;
298 }
299 else
300 {
301 return $this->xss_clean($_COOKIE[$index]);
302 }
303 }
304 else
305 {
306 return $_COOKIE[$index];
307 }
308 }
309
310 // --------------------------------------------------------------------
311
312 /**
313 * Fetch an item from the SERVER array
314 *
315 * @access public
316 * @param string
317 * @param bool
318 * @return string
319 */
320 function server($index = '', $xss_clean = FALSE)
321 {
322 if ( ! isset($_SERVER[$index]))
323 {
324 return FALSE;
325 }
326
327 if ($xss_clean === TRUE)
328 {
329 return $this->xss_clean($_SERVER[$index]);
330 }
331
332 return $_SERVER[$index];
333 }
334
335 // --------------------------------------------------------------------
336
337 /**
338 * Fetch the IP Address
339 *
340 * @access public
341 * @return string
342 */
343 function ip_address()
344 {
345 if ($this->ip_address !== FALSE)
346 {
347 return $this->ip_address;
348 }
349
350 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
351 {
352 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
353 }
354 elseif ($this->server('REMOTE_ADDR'))
355 {
356 $this->ip_address = $_SERVER['REMOTE_ADDR'];
357 }
358 elseif ($this->server('HTTP_CLIENT_IP'))
359 {
360 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
361 }
362 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
363 {
364 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
365 }
366
367 if ($this->ip_address === FALSE)
368 {
369 $this->ip_address = '0.0.0.0';
370 return $this->ip_address;
371 }
372
373 if (strstr($this->ip_address, ','))
374 {
375 $x = explode(',', $this->ip_address);
376 $this->ip_address = end($x);
377 }
378
379 if ( ! $this->valid_ip($this->ip_address))
380 {
381 $this->ip_address = '0.0.0.0';
382 }
383
384 return $this->ip_address;
385 }
386
387 // --------------------------------------------------------------------
388
389 /**
390 * Validate IP Address
391 *
Rick Ellise666afc2007-06-11 05:03:11 +0000392 * Updated version suggested by Geert De Deckere
393 *
Derek Allarda72b60d2007-01-31 23:56:11 +0000394 * @access public
395 * @param string
396 * @return string
397 */
398 function valid_ip($ip)
399 {
Rick Ellise666afc2007-06-11 05:03:11 +0000400 $ip_segments = explode('.', $ip);
401
402 // Always 4 segments needed
403 if (count($ip_segments) != 4)
Rick Ellis112569d2007-02-26 19:19:08 +0000404 {
405 return FALSE;
406 }
Rick Ellis65e8f0e2007-06-12 03:53:21 +0000407 // IP can not start with 0
Rick Ellis39213142007-06-12 03:53:12 +0000408 if (substr($ip_segments[0], 0, 1) == '0')
Rick Ellis112569d2007-02-26 19:19:08 +0000409 {
Rick Ellise666afc2007-06-11 05:03:11 +0000410 return FALSE;
411 }
412 // Check each segment
413 foreach ($ip_segments as $segment)
414 {
415 // IP segments must be digits and can not be
416 // longer than 3 digits or greater then 255
Rick Ellisba648932007-06-12 03:39:38 +0000417 if (preg_match("/[^0-9]/", $segment) OR $segment > 255 OR strlen($segment) > 3)
Rick Ellis112569d2007-02-26 19:19:08 +0000418 {
Rick Ellise666afc2007-06-11 05:03:11 +0000419 return FALSE;
Rick Ellis112569d2007-02-26 19:19:08 +0000420 }
421 }
422
423 return TRUE;
Derek Allarda72b60d2007-01-31 23:56:11 +0000424 }
425
426 // --------------------------------------------------------------------
427
428 /**
429 * User Agent
430 *
431 * @access public
432 * @return string
433 */
434 function user_agent()
435 {
436 if ($this->user_agent !== FALSE)
437 {
438 return $this->user_agent;
439 }
440
441 $this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
442
443 return $this->user_agent;
444 }
445
446 // --------------------------------------------------------------------
447
448 /**
paulburdick763064b2007-06-27 23:25:55 +0000449 * Filename Security
450 *
451 * @access public
452 * @param string
453 * @return string
454 */
455 function filename_security($str)
456 {
457 $bad = array(
458 "../",
459 "./",
460 "<!--",
461 "-->",
462 "<",
463 ">",
464 "'",
465 '"',
466 '&',
467 '$',
468 '#',
469 '{',
470 '}',
471 '[',
472 ']',
473 '=',
474 ';',
475 '?',
paulburdick763064b2007-06-27 23:25:55 +0000476 "%20",
477 "%22",
478 "%3c", // <
479 "%253c", // <
480 "%3e", // >
481 "%0e", // >
482 "%28", // (
483 "%29", // )
484 "%2528", // (
485 "%26", // &
486 "%24", // $
487 "%3f", // ?
488 "%3b", // ;
489 "%3d" // =
490 );
491
492 return stripslashes(str_replace($bad, '', $str));
493 }
494
495 // --------------------------------------------------------------------
496
497 /**
Derek Allarda72b60d2007-01-31 23:56:11 +0000498 * XSS Clean
499 *
500 * Sanitizes data so that Cross Site Scripting Hacks can be
501 * prevented.  This function does a fair amount of work but
502 * it is extremely thorough, designed to prevent even the
503 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
504 * of course, but I haven't been able to get anything passed
505 * the filter.
506 *
507 * Note: This function should only be used to deal with data
508 * upon submission.  It's not something that should
509 * be used for general runtime processing.
510 *
511 * This function was based in part on some code and ideas I
512 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
513 *
514 * To help develop this script I used this great list of
515 * vulnerabilities along with a few other hacks I've
516 * harvested from examining vulnerabilities in other programs:
517 * http://ha.ckers.org/xss.html
518 *
519 * @access public
520 * @param string
521 * @return string
522 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000523 function xss_clean($str)
Derek Allarda72b60d2007-01-31 23:56:11 +0000524 {
525 /*
526 * Remove Null Characters
527 *
528 * This prevents sandwiching null characters
529 * between ascii characters, like Java\0script.
530 *
531 */
532 $str = preg_replace('/\0+/', '', $str);
533 $str = preg_replace('/(\\\\0)+/', '', $str);
534
535 /*
536 * Validate standard character entities
537 *
538 * Add a semicolon if missing. We do this to enable
539 * the conversion of entities to ASCII later.
540 *
541 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000542 $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000543
544 /*
Derek Jones48bb32a2007-07-12 13:10:42 +0000545 * Validate UTF16 two byte encoding (x00)
Derek Allarda72b60d2007-01-31 23:56:11 +0000546 *
547 * Just as above, adds a semicolon if missing.
548 *
549 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000550 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000551
552 /*
553 * URL Decode
554 *
555 * Just in case stuff like this is submitted:
556 *
557 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
558 *
559 * Note: Normally urldecode() would be easier but it removes plus signs
560 *
561 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000562 $str = preg_replace("/(%20)+/", '9u3iovBnRThju941s89rKozm', $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000563 $str = preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str);
Derek Jones01f72ca2007-05-04 18:19:17 +0000564 $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str);
565 $str = str_replace('9u3iovBnRThju941s89rKozm', "%20", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000566
567 /*
Derek Jones303c9cb2007-07-12 19:12:37 +0000568 * Convert character entities to ASCII
Derek Allarda72b60d2007-01-31 23:56:11 +0000569 *
570 * This permits our tests below to work reliably.
571 * We only convert entities that are within tags since
572 * these are the ones that will pose security problems.
573 *
574 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000575
576 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
577
578 $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
579
580 /*
581
582 Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
583
584 if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches))
585 {
586 for ($i = 0; $i < count($matches[0]); $i++)
Derek Allarda72b60d2007-01-31 23:56:11 +0000587 {
Derek Jones303c9cb2007-07-12 19:12:37 +0000588 if (stristr($matches[0][$i], '>'))
589 {
590 $str = str_replace( $matches['0'][$i],
591 str_replace('>', '&lt;', $matches[0][$i]),
592 $str);
593 }
594 }
595 }
596
597 if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches))
598 {
599 for ($i = 0; $i < count($matches[0]); $i++)
600 {
601 $str = str_replace($matches[0][$i],
602 $this->_html_entity_decode($matches[0][$i], $charset),
Derek Allarda72b60d2007-01-31 23:56:11 +0000603 $str);
604 }
605 }
Derek Jones303c9cb2007-07-12 19:12:37 +0000606 */
607
Derek Jones48bb32a2007-07-12 13:10:42 +0000608 /*
609 * Convert all tabs to spaces
610 *
611 * This prevents strings like this: ja vascript
612 * NOTE: we deal with spaces between characters later.
613 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
614 * so we use str_replace.
615 *
616 */
617
618 $str = str_replace("\t", " ", $str);
619
Derek Allarda72b60d2007-01-31 23:56:11 +0000620 /*
621 * Not Allowed Under Any Conditions
622 */
623 $bad = array(
624 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000625 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000626 '.parentNode' => '[removed]',
627 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000628 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000629 '-moz-binding' => '[removed]',
630 '<!--' => '&lt;!--',
631 '-->' => '--&gt;',
632 '<!CDATA[' => '&lt;![CDATA['
633 );
634
635 foreach ($bad as $key => $val)
636 {
637 $str = str_replace($key, $val, $str);
638 }
639
640 $bad = array(
Derek Allarda72b60d2007-01-31 23:56:11 +0000641 "javascript\s*:" => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000642 "expression\s*\(" => '[removed]', // CSS and IE
Derek Jones48bb32a2007-07-12 13:10:42 +0000643 "Redirect\s+302" => '[removed]'
Derek Allarda72b60d2007-01-31 23:56:11 +0000644 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000645
Derek Allarda72b60d2007-01-31 23:56:11 +0000646 foreach ($bad as $key => $val)
647 {
648 $str = preg_replace("#".$key."#i", $val, $str);
649 }
650
651 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000652 * Makes PHP tags safe
653 *
654 * Note: XML tags are inadvertently replaced too:
655 *
656 * <?xml
657 *
658 * But it doesn't seem to pose a problem.
659 *
660 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000661 $str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000662
663 /*
664 * Compact any exploded words
665 *
666 * This corrects words like: j a v a s c r i p t
667 * These words are compacted back to their correct state.
668 *
669 */
paulburdickb614d392007-06-26 21:58:56 +0000670 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
Derek Allarda72b60d2007-01-31 23:56:11 +0000671 foreach ($words as $word)
672 {
673 $temp = '';
674 for ($i = 0; $i < strlen($word); $i++)
675 {
676 $temp .= substr($word, $i, 1)."\s*";
677 }
678
Derek Jones01f72ca2007-05-04 18:19:17 +0000679 // We only want to do this when it is followed by a non-word character
680 // That way valid stuff like "dealer to" does not become "dealerto"
681 $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000682 }
683
684 /*
685 * Remove disallowed Javascript in links or img tags
paulburdick391eb032007-06-27 22:58:24 +0000686 */
687 do
688 {
689 $original = $str;
Derek Jones48bb32a2007-07-12 13:10:42 +0000690
691 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '</a>') !== FALSE) OR
692 preg_match("/<\/a>/i", $str))
693 {
694 $str = preg_replace_callback("#<a.*?</a>#si", array($this, '_js_link_removal'), $str);
695 }
696
697 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '<img') !== FALSE) OR
698 preg_match("/img/i", $str))
699 {
700 $str = preg_replace_callback("#<img.*?".">#si", array($this, '_js_img_removal'), $str);
701 }
702
703 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && (stripos($str, 'script') !== FALSE OR stripos($str, 'xss') !== FALSE)) OR
704 preg_match("/(script|xss)/i", $str))
705 {
706 $str = preg_replace("#</*(script|xss).*?\>#si", "", $str);
707 }
paulburdick391eb032007-06-27 22:58:24 +0000708 }
709 while($original != $str);
710
711 unset($original);
Derek Allarda72b60d2007-01-31 23:56:11 +0000712
713 /*
714 * Remove JavaScript Event Handlers
715 *
716 * Note: This code is a little blunt. It removes
717 * the event handler and anything up to the closing >,
718 * but it's unlikely to be a problem.
719 *
720 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000721 $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns');
722 $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "&lt;\\1\\2\\3&gt;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000723
724 /*
725 * Sanitize naughty HTML elements
726 *
727 * If a tag containing any of the words in the list
728 * below is found, the tag gets converted to entities.
729 *
730 * So this: <blink>
731 * Becomes: &lt;blink&gt;
732 *
733 */
734 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
735
736 /*
737 * Sanitize naughty scripting elements
738 *
739 * Similar to above, only instead of looking for
740 * tags it looks for PHP and JavaScript commands
741 * that are disallowed. Rather than removing the
742 * code, it simply converts the parenthesis to entities
743 * rendering the code un-executable.
744 *
745 * For example: eval('some code')
746 * Becomes: eval&#40;'some code'&#41;
747 *
748 */
paulburdick033ef022007-06-26 21:52:52 +0000749 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000750
751 /*
752 * Final clean up
753 *
754 * This adds a bit of extra precaution in case
755 * something got through the above filters
756 *
757 */
758 $bad = array(
759 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000760 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000761 '.parentNode' => '[removed]',
762 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000763 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000764 '-moz-binding' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000765 '<!--' => '&lt;!--',
Derek Jones48bb32a2007-07-12 13:10:42 +0000766 '-->' => '--&gt;',
767 '<!CDATA[' => '&lt;![CDATA['
Derek Allarda72b60d2007-01-31 23:56:11 +0000768 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000769
Derek Allarda72b60d2007-01-31 23:56:11 +0000770 foreach ($bad as $key => $val)
771 {
Derek Jones48bb32a2007-07-12 13:10:42 +0000772 $str = str_replace($key, $val, $str);
773 }
774
775 $bad = array(
776 "javascript\s*:" => '[removed]',
777 "expression\s*\(" => '[removed]', // CSS and IE
778 "Redirect\s+302" => '[removed]'
779 );
780
781 foreach ($bad as $key => $val)
782 {
783 $str = preg_replace("#".$key."#i", $val, $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000784 }
785
786
787 log_message('debug', "XSS Filtering completed");
788 return $str;
789 }
790
791 // --------------------------------------------------------------------
Derek Jones01f72ca2007-05-04 18:19:17 +0000792
793 /**
794 * JS Link Removal
795 *
796 * Callback function for xss_clean() to sanitize links
797 * This limits the PCRE backtracks, making it more performance friendly
798 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
799 * PHP 5.2+ on link-heavy strings
800 *
801 * @access private
802 * @param array
803 * @return string
804 */
805 function _js_link_removal($match)
806 {
807 return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
808 }
809
810 /**
811 * JS Image Removal
812 *
813 * Callback function for xss_clean() to sanitize image tags
814 * This limits the PCRE backtracks, making it more performance friendly
815 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
816 * PHP 5.2+ on image tag heavy strings
817 *
818 * @access private
819 * @param array
820 * @return string
821 */
822 function _js_img_removal($match)
823 {
824 return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
825 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000826
Derek Jones01f72ca2007-05-04 18:19:17 +0000827 // --------------------------------------------------------------------
Derek Jones303c9cb2007-07-12 19:12:37 +0000828
829 /**
830 * Attribute Conversion
831 *
832 * Used as a callback for XSS Clean
833 *
834 * @access public
835 * @param array
836 * @return string
837 */
838 function _attribute_conversion($match)
839 {
840 return str_replace('>', '&lt;', $match[0]);
841 }
842
843 // --------------------------------------------------------------------
844
845 /**
846 * HTML Entity Decode Callback
847 *
848 * Used as a callback for XSS Clean
849 *
850 * @access public
851 * @param array
852 * @return string
853 */
854 function _html_entity_decode_callback($match)
855 {
Derek Jones6159d1d2007-07-16 13:04:46 +0000856 global $CFG;
857 $charset = $CFG->item('charset');
Derek Jones303c9cb2007-07-12 19:12:37 +0000858
859 return $this->_html_entity_decode($match[0], strtoupper($charset));
860 }
861
862 // --------------------------------------------------------------------
863
Derek Allarda72b60d2007-01-31 23:56:11 +0000864 /**
865 * HTML Entities Decode
866 *
867 * This function is a replacement for html_entity_decode()
868 *
869 * In some versions of PHP the native function does not work
870 * when UTF-8 is the specified character set, so this gives us
871 * a work-around. More info here:
872 * http://bugs.php.net/bug.php?id=25670
873 *
874 * @access private
875 * @param string
876 * @param string
877 * @return string
878 */
879 /* -------------------------------------------------
880 /* Replacement for html_entity_decode()
881 /* -------------------------------------------------*/
882
883 /*
884 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
885 character set, and the PHP developers said they were not back porting the
886 fix to versions other than PHP 5.x.
887 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000888 function _html_entity_decode($str, $charset='UTF-8')
Derek Allarda72b60d2007-01-31 23:56:11 +0000889 {
890 if (stristr($str, '&') === FALSE) return $str;
Derek Jones303c9cb2007-07-12 19:12:37 +0000891
Derek Allarda72b60d2007-01-31 23:56:11 +0000892 // The reason we are not using html_entity_decode() by itself is because
893 // while it is not technically correct to leave out the semicolon
894 // at the end of an entity most browsers will still interpret the entity
895 // correctly. html_entity_decode() does not convert entities without
896 // semicolons, so we are left with our own little solution here. Bummer.
897
898 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
899 {
900 $str = html_entity_decode($str, ENT_COMPAT, $charset);
901 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
902 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
903 }
904
905 // Numeric Entities
906 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
907 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
908
909 // Literal Entities - Slightly slow so we do another check
910 if (stristr($str, '&') === FALSE)
911 {
912 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
913 }
914
915 return $str;
916 }
917
918}
919// END Input class
adminb0dd10f2006-08-25 17:25:49 +0000920?>