blob: 4044024917b3abcbbd7cb0e613d4bbbbce66eb3d [file] [log] [blame]
Derek Allarda72b60d2007-01-31 23:56:11 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
Derek Allardd2df9bc2007-04-15 17:41:17 +00003 * CodeIgniter
Derek Allarda72b60d2007-01-31 23:56:11 +00004 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
Derek Allard3d879d52008-01-18 19:41:32 +00008 * @author ExpressionEngine Dev Team
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Allard6838f002007-10-04 19:29:59 +000010 * @license http://www.codeigniter.com/user_guide/license.html
Derek Allarda72b60d2007-01-31 23:56:11 +000011 * @link http://www.codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Input Class
20 *
21 * Pre-processes global input data for security
22 *
23 * @package CodeIgniter
24 * @subpackage Libraries
25 * @category Input
Derek Allard3d879d52008-01-18 19:41:32 +000026 * @author ExpressionEngine Dev Team
Derek Allarda72b60d2007-01-31 23:56:11 +000027 * @link http://www.codeigniter.com/user_guide/libraries/input.html
28 */
29class CI_Input {
30 var $use_xss_clean = FALSE;
31 var $ip_address = FALSE;
32 var $user_agent = FALSE;
33 var $allow_get_array = FALSE;
34
35 /**
36 * Constructor
37 *
38 * Sets whether to globally enable the XSS processing
39 * and whether to allow the $_GET array
40 *
41 * @access public
42 */
43 function CI_Input()
44 {
45 log_message('debug', "Input Class Initialized");
46
47 $CFG =& load_class('Config');
48 $this->use_xss_clean = ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
49 $this->allow_get_array = ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
50 $this->_sanitize_globals();
51 }
52
53 // --------------------------------------------------------------------
54
55 /**
56 * Sanitize Globals
57 *
58 * This function does the following:
59 *
60 * Unsets $_GET data (if query strings are not enabled)
61 *
62 * Unsets all globals if register_globals is enabled
63 *
64 * Standardizes newline characters to \n
65 *
66 * @access private
67 * @return void
68 */
69 function _sanitize_globals()
70 {
paulburdick8816aaa2007-06-27 23:07:36 +000071 // Would kind of be "wrong" to unset any of these GLOBALS.
72 $protected = array('_SERVER', '_GET', '_POST', '_FILES', '_REQUEST', '_SESSION', '_ENV', 'GLOBALS', 'HTTP_RAW_POST_DATA');
73
Rick Ellisbb2041d2007-06-09 00:16:13 +000074 // Unset globals for securiy.
75 // This is effectively the same as register_globals = off
Derek Allarda72b60d2007-01-31 23:56:11 +000076 foreach (array($_GET, $_POST, $_COOKIE) as $global)
77 {
78 if ( ! is_array($global))
79 {
paulburdick8816aaa2007-06-27 23:07:36 +000080 if ( ! in_array($global, $protected))
81 {
82 global $global;
83 $$global = NULL;
84 }
Derek Allarda72b60d2007-01-31 23:56:11 +000085 }
86 else
87 {
88 foreach ($global as $key => $val)
89 {
paulburdick8816aaa2007-06-27 23:07:36 +000090 if ( ! in_array($key, $protected))
91 {
92 global $$key;
93 $$key = NULL;
94 }
Derek Allarda72b60d2007-01-31 23:56:11 +000095 }
96 }
97 }
98
99 // Is $_GET data allowed? If not we'll set the $_GET to an empty array
100 if ($this->allow_get_array == FALSE)
101 {
102 $_GET = array();
103 }
Rick Ellis112569d2007-02-26 19:19:08 +0000104 else
105 {
106 if (is_array($_GET) AND count($_GET) > 0)
107 {
108 foreach($_GET as $key => $val)
109 {
110 $_GET[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
111 }
112 }
113 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000114
115 // Clean $_POST Data
116 if (is_array($_POST) AND count($_POST) > 0)
117 {
118 foreach($_POST as $key => $val)
119 {
120 $_POST[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
121 }
122 }
123
124 // Clean $_COOKIE Data
125 if (is_array($_COOKIE) AND count($_COOKIE) > 0)
126 {
127 foreach($_COOKIE as $key => $val)
128 {
129 $_COOKIE[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
130 }
131 }
132
133 log_message('debug', "Global POST and COOKIE data sanitized");
134 }
135
136 // --------------------------------------------------------------------
137
138 /**
139 * Clean Input Data
140 *
141 * This is a helper function. It escapes data and
142 * standardizes newline characters to \n
143 *
144 * @access private
145 * @param string
146 * @return string
147 */
148 function _clean_input_data($str)
149 {
150 if (is_array($str))
151 {
152 $new_array = array();
153 foreach ($str as $key => $val)
154 {
155 $new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
156 }
157 return $new_array;
158 }
159
Rick Ellisbb2041d2007-06-09 00:16:13 +0000160 // We strip slashes if magic quotes is on to keep things consistent
161 if (get_magic_quotes_gpc())
162 {
163 $str = stripslashes($str);
164 }
165
166 // Should we filter the input data?
Derek Allarda72b60d2007-01-31 23:56:11 +0000167 if ($this->use_xss_clean === TRUE)
168 {
169 $str = $this->xss_clean($str);
170 }
171
172 // Standardize newlines
173 return preg_replace("/\015\012|\015|\012/", "\n", $str);
174 }
175
176 // --------------------------------------------------------------------
177
178 /**
179 * Clean Keys
180 *
181 * This is a helper function. To prevent malicious users
182 * from trying to exploit keys we make sure that keys are
183 * only named with alpha-numeric text and a few other items.
184 *
185 * @access private
186 * @param string
187 * @return string
188 */
189 function _clean_input_keys($str)
190 {
191 if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
192 {
193 exit('Disallowed Key Characters.');
194 }
Rick Ellisbb2041d2007-06-09 00:16:13 +0000195
Derek Allarda72b60d2007-01-31 23:56:11 +0000196 return $str;
197 }
Rick Ellis112569d2007-02-26 19:19:08 +0000198
199 // --------------------------------------------------------------------
200
201 /**
202 * Fetch an item from the GET array
203 *
204 * @access public
205 * @param string
206 * @param bool
207 * @return string
208 */
Derek Allard87d1eeb2007-03-01 13:20:43 +0000209 function get($index = '', $xss_clean = FALSE)
Rick Ellis112569d2007-02-26 19:19:08 +0000210 {
211 if ( ! isset($_GET[$index]))
212 {
213 return FALSE;
214 }
215
216 if ($xss_clean === TRUE)
217 {
218 if (is_array($_GET[$index]))
219 {
220 foreach($_GET[$index] as $key => $val)
221 {
222 $_GET[$index][$key] = $this->xss_clean($val);
223 }
224 }
225 else
226 {
227 return $this->xss_clean($_GET[$index]);
228 }
229 }
230
231 return $_GET[$index];
232 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000233
234 // --------------------------------------------------------------------
235
236 /**
237 * Fetch an item from the POST array
238 *
239 * @access public
240 * @param string
241 * @param bool
242 * @return string
243 */
244 function post($index = '', $xss_clean = FALSE)
245 {
246 if ( ! isset($_POST[$index]))
247 {
248 return FALSE;
249 }
250
251 if ($xss_clean === TRUE)
252 {
253 if (is_array($_POST[$index]))
254 {
255 foreach($_POST[$index] as $key => $val)
256 {
257 $_POST[$index][$key] = $this->xss_clean($val);
258 }
259 }
260 else
261 {
262 return $this->xss_clean($_POST[$index]);
263 }
264 }
265
266 return $_POST[$index];
267 }
268
269 // --------------------------------------------------------------------
270
271 /**
272 * Fetch an item from the COOKIE array
273 *
274 * @access public
275 * @param string
276 * @param bool
277 * @return string
278 */
279 function cookie($index = '', $xss_clean = FALSE)
280 {
281 if ( ! isset($_COOKIE[$index]))
282 {
283 return FALSE;
284 }
285
286 if ($xss_clean === TRUE)
287 {
288 if (is_array($_COOKIE[$index]))
289 {
290 $cookie = array();
291 foreach($_COOKIE[$index] as $key => $val)
292 {
293 $cookie[$key] = $this->xss_clean($val);
294 }
295
296 return $cookie;
297 }
298 else
299 {
300 return $this->xss_clean($_COOKIE[$index]);
301 }
302 }
303 else
304 {
305 return $_COOKIE[$index];
306 }
307 }
308
309 // --------------------------------------------------------------------
310
311 /**
312 * Fetch an item from the SERVER array
313 *
314 * @access public
315 * @param string
316 * @param bool
317 * @return string
318 */
319 function server($index = '', $xss_clean = FALSE)
320 {
321 if ( ! isset($_SERVER[$index]))
322 {
323 return FALSE;
324 }
325
326 if ($xss_clean === TRUE)
327 {
328 return $this->xss_clean($_SERVER[$index]);
329 }
330
331 return $_SERVER[$index];
332 }
333
334 // --------------------------------------------------------------------
335
336 /**
337 * Fetch the IP Address
338 *
339 * @access public
340 * @return string
341 */
342 function ip_address()
343 {
344 if ($this->ip_address !== FALSE)
345 {
346 return $this->ip_address;
347 }
348
349 if ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
350 {
351 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
352 }
353 elseif ($this->server('REMOTE_ADDR'))
354 {
355 $this->ip_address = $_SERVER['REMOTE_ADDR'];
356 }
357 elseif ($this->server('HTTP_CLIENT_IP'))
358 {
359 $this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
360 }
361 elseif ($this->server('HTTP_X_FORWARDED_FOR'))
362 {
363 $this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
364 }
365
366 if ($this->ip_address === FALSE)
367 {
368 $this->ip_address = '0.0.0.0';
369 return $this->ip_address;
370 }
371
372 if (strstr($this->ip_address, ','))
373 {
374 $x = explode(',', $this->ip_address);
375 $this->ip_address = end($x);
376 }
377
378 if ( ! $this->valid_ip($this->ip_address))
379 {
380 $this->ip_address = '0.0.0.0';
381 }
382
383 return $this->ip_address;
384 }
385
386 // --------------------------------------------------------------------
387
388 /**
389 * Validate IP Address
390 *
Rick Ellise666afc2007-06-11 05:03:11 +0000391 * Updated version suggested by Geert De Deckere
392 *
Derek Allarda72b60d2007-01-31 23:56:11 +0000393 * @access public
394 * @param string
395 * @return string
396 */
397 function valid_ip($ip)
398 {
Rick Ellise666afc2007-06-11 05:03:11 +0000399 $ip_segments = explode('.', $ip);
400
401 // Always 4 segments needed
402 if (count($ip_segments) != 4)
Rick Ellis112569d2007-02-26 19:19:08 +0000403 {
404 return FALSE;
405 }
Rick Ellis65e8f0e2007-06-12 03:53:21 +0000406 // IP can not start with 0
Rick Ellis39213142007-06-12 03:53:12 +0000407 if (substr($ip_segments[0], 0, 1) == '0')
Rick Ellis112569d2007-02-26 19:19:08 +0000408 {
Rick Ellise666afc2007-06-11 05:03:11 +0000409 return FALSE;
410 }
411 // Check each segment
412 foreach ($ip_segments as $segment)
413 {
414 // IP segments must be digits and can not be
415 // longer than 3 digits or greater then 255
Rick Ellisba648932007-06-12 03:39:38 +0000416 if (preg_match("/[^0-9]/", $segment) OR $segment > 255 OR strlen($segment) > 3)
Rick Ellis112569d2007-02-26 19:19:08 +0000417 {
Rick Ellise666afc2007-06-11 05:03:11 +0000418 return FALSE;
Rick Ellis112569d2007-02-26 19:19:08 +0000419 }
420 }
421
422 return TRUE;
Derek Allarda72b60d2007-01-31 23:56:11 +0000423 }
424
425 // --------------------------------------------------------------------
426
427 /**
428 * User Agent
429 *
430 * @access public
431 * @return string
432 */
433 function user_agent()
434 {
435 if ($this->user_agent !== FALSE)
436 {
437 return $this->user_agent;
438 }
439
440 $this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
441
442 return $this->user_agent;
443 }
444
445 // --------------------------------------------------------------------
446
447 /**
paulburdick763064b2007-06-27 23:25:55 +0000448 * Filename Security
449 *
450 * @access public
451 * @param string
452 * @return string
453 */
454 function filename_security($str)
455 {
456 $bad = array(
457 "../",
458 "./",
459 "<!--",
460 "-->",
461 "<",
462 ">",
463 "'",
464 '"',
465 '&',
466 '$',
467 '#',
468 '{',
469 '}',
470 '[',
471 ']',
472 '=',
473 ';',
474 '?',
paulburdick763064b2007-06-27 23:25:55 +0000475 "%20",
476 "%22",
477 "%3c", // <
478 "%253c", // <
479 "%3e", // >
480 "%0e", // >
481 "%28", // (
482 "%29", // )
483 "%2528", // (
484 "%26", // &
485 "%24", // $
486 "%3f", // ?
487 "%3b", // ;
488 "%3d" // =
489 );
490
491 return stripslashes(str_replace($bad, '', $str));
492 }
493
494 // --------------------------------------------------------------------
495
496 /**
Derek Allarda72b60d2007-01-31 23:56:11 +0000497 * XSS Clean
498 *
499 * Sanitizes data so that Cross Site Scripting Hacks can be
500 * prevented.  This function does a fair amount of work but
501 * it is extremely thorough, designed to prevent even the
502 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
503 * of course, but I haven't been able to get anything passed
504 * the filter.
505 *
506 * Note: This function should only be used to deal with data
507 * upon submission.  It's not something that should
508 * be used for general runtime processing.
509 *
510 * This function was based in part on some code and ideas I
511 * got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
512 *
513 * To help develop this script I used this great list of
514 * vulnerabilities along with a few other hacks I've
515 * harvested from examining vulnerabilities in other programs:
516 * http://ha.ckers.org/xss.html
517 *
518 * @access public
519 * @param string
520 * @return string
521 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000522 function xss_clean($str)
Derek Allarda72b60d2007-01-31 23:56:11 +0000523 {
524 /*
525 * Remove Null Characters
526 *
527 * This prevents sandwiching null characters
528 * between ascii characters, like Java\0script.
529 *
530 */
531 $str = preg_replace('/\0+/', '', $str);
532 $str = preg_replace('/(\\\\0)+/', '', $str);
533
534 /*
535 * Validate standard character entities
536 *
537 * Add a semicolon if missing. We do this to enable
538 * the conversion of entities to ASCII later.
539 *
540 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000541 $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000542
543 /*
Derek Jones48bb32a2007-07-12 13:10:42 +0000544 * Validate UTF16 two byte encoding (x00)
Derek Allarda72b60d2007-01-31 23:56:11 +0000545 *
546 * Just as above, adds a semicolon if missing.
547 *
548 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000549 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000550
551 /*
552 * URL Decode
553 *
554 * Just in case stuff like this is submitted:
555 *
556 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
557 *
558 * Note: Normally urldecode() would be easier but it removes plus signs
559 *
560 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000561 $str = preg_replace("/(%20)+/", '9u3iovBnRThju941s89rKozm', $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000562 $str = preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str);
Derek Jones01f72ca2007-05-04 18:19:17 +0000563 $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str);
564 $str = str_replace('9u3iovBnRThju941s89rKozm', "%20", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000565
566 /*
Derek Jones303c9cb2007-07-12 19:12:37 +0000567 * Convert character entities to ASCII
Derek Allarda72b60d2007-01-31 23:56:11 +0000568 *
569 * This permits our tests below to work reliably.
570 * We only convert entities that are within tags since
571 * these are the ones that will pose security problems.
572 *
573 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000574
575 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
576
577 $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
578
579 /*
580
581 Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
582
583 if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches))
584 {
585 for ($i = 0; $i < count($matches[0]); $i++)
Derek Allarda72b60d2007-01-31 23:56:11 +0000586 {
Derek Jones303c9cb2007-07-12 19:12:37 +0000587 if (stristr($matches[0][$i], '>'))
588 {
589 $str = str_replace( $matches['0'][$i],
590 str_replace('>', '&lt;', $matches[0][$i]),
591 $str);
592 }
593 }
594 }
595
596 if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches))
597 {
598 for ($i = 0; $i < count($matches[0]); $i++)
599 {
600 $str = str_replace($matches[0][$i],
601 $this->_html_entity_decode($matches[0][$i], $charset),
Derek Allarda72b60d2007-01-31 23:56:11 +0000602 $str);
603 }
604 }
Derek Jones303c9cb2007-07-12 19:12:37 +0000605 */
606
Derek Jones48bb32a2007-07-12 13:10:42 +0000607 /*
608 * Convert all tabs to spaces
609 *
610 * This prevents strings like this: ja vascript
611 * NOTE: we deal with spaces between characters later.
612 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
613 * so we use str_replace.
614 *
615 */
616
617 $str = str_replace("\t", " ", $str);
618
Derek Allarda72b60d2007-01-31 23:56:11 +0000619 /*
620 * Not Allowed Under Any Conditions
621 */
622 $bad = array(
623 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000624 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000625 '.parentNode' => '[removed]',
626 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000627 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000628 '-moz-binding' => '[removed]',
629 '<!--' => '&lt;!--',
630 '-->' => '--&gt;',
631 '<!CDATA[' => '&lt;![CDATA['
632 );
633
634 foreach ($bad as $key => $val)
635 {
636 $str = str_replace($key, $val, $str);
637 }
638
639 $bad = array(
Derek Allarda72b60d2007-01-31 23:56:11 +0000640 "javascript\s*:" => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000641 "expression\s*\(" => '[removed]', // CSS and IE
Derek Jones48bb32a2007-07-12 13:10:42 +0000642 "Redirect\s+302" => '[removed]'
Derek Allarda72b60d2007-01-31 23:56:11 +0000643 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000644
Derek Allarda72b60d2007-01-31 23:56:11 +0000645 foreach ($bad as $key => $val)
646 {
647 $str = preg_replace("#".$key."#i", $val, $str);
648 }
649
650 /*
Derek Allarda72b60d2007-01-31 23:56:11 +0000651 * Makes PHP tags safe
652 *
653 * Note: XML tags are inadvertently replaced too:
654 *
655 * <?xml
656 *
657 * But it doesn't seem to pose a problem.
658 *
659 */
Derek Jones48bb32a2007-07-12 13:10:42 +0000660 $str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000661
662 /*
663 * Compact any exploded words
664 *
665 * This corrects words like: j a v a s c r i p t
666 * These words are compacted back to their correct state.
667 *
668 */
paulburdickb614d392007-06-26 21:58:56 +0000669 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
Derek Allarda72b60d2007-01-31 23:56:11 +0000670 foreach ($words as $word)
671 {
672 $temp = '';
673 for ($i = 0; $i < strlen($word); $i++)
674 {
675 $temp .= substr($word, $i, 1)."\s*";
676 }
677
Derek Jones01f72ca2007-05-04 18:19:17 +0000678 // We only want to do this when it is followed by a non-word character
679 // That way valid stuff like "dealer to" does not become "dealerto"
680 $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000681 }
682
683 /*
684 * Remove disallowed Javascript in links or img tags
paulburdick391eb032007-06-27 22:58:24 +0000685 */
686 do
687 {
688 $original = $str;
Derek Jones48bb32a2007-07-12 13:10:42 +0000689
690 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '</a>') !== FALSE) OR
691 preg_match("/<\/a>/i", $str))
692 {
693 $str = preg_replace_callback("#<a.*?</a>#si", array($this, '_js_link_removal'), $str);
694 }
695
696 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '<img') !== FALSE) OR
697 preg_match("/img/i", $str))
698 {
699 $str = preg_replace_callback("#<img.*?".">#si", array($this, '_js_img_removal'), $str);
700 }
701
702 if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && (stripos($str, 'script') !== FALSE OR stripos($str, 'xss') !== FALSE)) OR
703 preg_match("/(script|xss)/i", $str))
704 {
705 $str = preg_replace("#</*(script|xss).*?\>#si", "", $str);
706 }
paulburdick391eb032007-06-27 22:58:24 +0000707 }
708 while($original != $str);
709
710 unset($original);
Derek Allarda72b60d2007-01-31 23:56:11 +0000711
712 /*
713 * Remove JavaScript Event Handlers
714 *
715 * Note: This code is a little blunt. It removes
716 * the event handler and anything up to the closing >,
717 * but it's unlikely to be a problem.
718 *
719 */
Derek Jones01f72ca2007-05-04 18:19:17 +0000720 $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns');
721 $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "&lt;\\1\\2\\3&gt;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000722
723 /*
724 * Sanitize naughty HTML elements
725 *
726 * If a tag containing any of the words in the list
727 * below is found, the tag gets converted to entities.
728 *
729 * So this: <blink>
730 * Becomes: &lt;blink&gt;
731 *
732 */
733 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
734
735 /*
736 * Sanitize naughty scripting elements
737 *
738 * Similar to above, only instead of looking for
739 * tags it looks for PHP and JavaScript commands
740 * that are disallowed. Rather than removing the
741 * code, it simply converts the parenthesis to entities
742 * rendering the code un-executable.
743 *
744 * For example: eval('some code')
745 * Becomes: eval&#40;'some code'&#41;
746 *
747 */
paulburdick033ef022007-06-26 21:52:52 +0000748 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000749
750 /*
751 * Final clean up
752 *
753 * This adds a bit of extra precaution in case
754 * something got through the above filters
755 *
756 */
757 $bad = array(
758 'document.cookie' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000759 'document.write' => '[removed]',
paulburdick033ef022007-06-26 21:52:52 +0000760 '.parentNode' => '[removed]',
761 '.innerHTML' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000762 'window.location' => '[removed]',
Derek Jones48bb32a2007-07-12 13:10:42 +0000763 '-moz-binding' => '[removed]',
Derek Allarda72b60d2007-01-31 23:56:11 +0000764 '<!--' => '&lt;!--',
Derek Jones48bb32a2007-07-12 13:10:42 +0000765 '-->' => '--&gt;',
766 '<!CDATA[' => '&lt;![CDATA['
Derek Allarda72b60d2007-01-31 23:56:11 +0000767 );
Derek Jones48bb32a2007-07-12 13:10:42 +0000768
Derek Allarda72b60d2007-01-31 23:56:11 +0000769 foreach ($bad as $key => $val)
770 {
Derek Jones48bb32a2007-07-12 13:10:42 +0000771 $str = str_replace($key, $val, $str);
772 }
773
774 $bad = array(
775 "javascript\s*:" => '[removed]',
776 "expression\s*\(" => '[removed]', // CSS and IE
777 "Redirect\s+302" => '[removed]'
778 );
779
780 foreach ($bad as $key => $val)
781 {
782 $str = preg_replace("#".$key."#i", $val, $str);
Derek Allarda72b60d2007-01-31 23:56:11 +0000783 }
784
785
786 log_message('debug', "XSS Filtering completed");
787 return $str;
788 }
789
790 // --------------------------------------------------------------------
Derek Jones01f72ca2007-05-04 18:19:17 +0000791
792 /**
793 * JS Link Removal
794 *
795 * Callback function for xss_clean() to sanitize links
796 * This limits the PCRE backtracks, making it more performance friendly
797 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
798 * PHP 5.2+ on link-heavy strings
799 *
800 * @access private
801 * @param array
802 * @return string
803 */
804 function _js_link_removal($match)
805 {
806 return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
807 }
808
809 /**
810 * JS Image Removal
811 *
812 * Callback function for xss_clean() to sanitize image tags
813 * This limits the PCRE backtracks, making it more performance friendly
814 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
815 * PHP 5.2+ on image tag heavy strings
816 *
817 * @access private
818 * @param array
819 * @return string
820 */
821 function _js_img_removal($match)
822 {
823 return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
824 }
Derek Allarda72b60d2007-01-31 23:56:11 +0000825
Derek Jones01f72ca2007-05-04 18:19:17 +0000826 // --------------------------------------------------------------------
Derek Jones303c9cb2007-07-12 19:12:37 +0000827
828 /**
829 * Attribute Conversion
830 *
831 * Used as a callback for XSS Clean
832 *
833 * @access public
834 * @param array
835 * @return string
836 */
837 function _attribute_conversion($match)
838 {
839 return str_replace('>', '&lt;', $match[0]);
840 }
841
842 // --------------------------------------------------------------------
843
844 /**
845 * HTML Entity Decode Callback
846 *
847 * Used as a callback for XSS Clean
848 *
849 * @access public
850 * @param array
851 * @return string
852 */
853 function _html_entity_decode_callback($match)
854 {
Derek Jones6159d1d2007-07-16 13:04:46 +0000855 global $CFG;
856 $charset = $CFG->item('charset');
Derek Jones303c9cb2007-07-12 19:12:37 +0000857
858 return $this->_html_entity_decode($match[0], strtoupper($charset));
859 }
860
861 // --------------------------------------------------------------------
862
Derek Allarda72b60d2007-01-31 23:56:11 +0000863 /**
864 * HTML Entities Decode
865 *
866 * This function is a replacement for html_entity_decode()
867 *
868 * In some versions of PHP the native function does not work
869 * when UTF-8 is the specified character set, so this gives us
870 * a work-around. More info here:
871 * http://bugs.php.net/bug.php?id=25670
872 *
873 * @access private
874 * @param string
875 * @param string
876 * @return string
877 */
878 /* -------------------------------------------------
879 /* Replacement for html_entity_decode()
880 /* -------------------------------------------------*/
881
882 /*
883 NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
884 character set, and the PHP developers said they were not back porting the
885 fix to versions other than PHP 5.x.
886 */
Derek Jones303c9cb2007-07-12 19:12:37 +0000887 function _html_entity_decode($str, $charset='UTF-8')
Derek Allarda72b60d2007-01-31 23:56:11 +0000888 {
889 if (stristr($str, '&') === FALSE) return $str;
Derek Jones303c9cb2007-07-12 19:12:37 +0000890
Derek Allarda72b60d2007-01-31 23:56:11 +0000891 // The reason we are not using html_entity_decode() by itself is because
892 // while it is not technically correct to leave out the semicolon
893 // at the end of an entity most browsers will still interpret the entity
894 // correctly. html_entity_decode() does not convert entities without
895 // semicolons, so we are left with our own little solution here. Bummer.
896
897 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
898 {
899 $str = html_entity_decode($str, ENT_COMPAT, $charset);
900 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
901 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
902 }
903
904 // Numeric Entities
905 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
906 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
907
908 // Literal Entities - Slightly slow so we do another check
909 if (stristr($str, '&') === FALSE)
910 {
911 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
912 }
913
914 return $str;
915 }
916
917}
918// END Input class
adminb0dd10f2006-08-25 17:25:49 +0000919?>