blob: a73b8e1f70c7f6587bd67d9ab7558e309b84cfbd [file] [log] [blame]
Derek Jonese701d762010-03-02 18:17:01 -06001<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * CodeIgniter
4 *
Greg Aker741de1c2010-11-10 14:52:57 -06005 * An open source application development framework for PHP 5.1.6 or newer
Derek Jonese701d762010-03-02 18:17:01 -06006 *
7 * @package CodeIgniter
8 * @author ExpressionEngine Dev Team
Greg Aker0711dc82011-01-05 10:49:40 -06009 * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc.
Derek Jonese701d762010-03-02 18:17:01 -060010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Security Class
20 *
21 * @package CodeIgniter
22 * @subpackage Libraries
23 * @category Security
24 * @author ExpressionEngine Dev Team
25 * @link http://codeigniter.com/user_guide/libraries/sessions.html
26 */
27class CI_Security {
Barry Mienydd671972010-10-04 16:33:58 +020028 var $xss_hash = '';
29 var $csrf_hash = '';
Derek Jonese701d762010-03-02 18:17:01 -060030 var $csrf_expire = 7200; // Two hours (in seconds)
31 var $csrf_token_name = 'ci_csrf_token';
Derek Jones95b183ad2010-08-31 09:42:39 -050032 var $csrf_cookie_name = 'ci_csrf_token';
Barry Mienydd671972010-10-04 16:33:58 +020033
Derek Jonese701d762010-03-02 18:17:01 -060034 /* never allowed, string replacement */
35 var $never_allowed_str = array(
36 'document.cookie' => '[removed]',
37 'document.write' => '[removed]',
38 '.parentNode' => '[removed]',
39 '.innerHTML' => '[removed]',
40 'window.location' => '[removed]',
41 '-moz-binding' => '[removed]',
42 '<!--' => '&lt;!--',
43 '-->' => '--&gt;',
44 '<![CDATA[' => '&lt;![CDATA['
45 );
46 /* never allowed, regex replacement */
47 var $never_allowed_regex = array(
48 "javascript\s*:" => '[removed]',
49 "expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
50 "vbscript\s*:" => '[removed]', // IE, surprise!
51 "Redirect\s+302" => '[removed]'
52 );
53
Greg Akera9263282010-11-10 15:26:43 -060054 public function __construct()
Derek Jonese701d762010-03-02 18:17:01 -060055 {
Derek Jonesb3f10a22010-07-25 19:11:26 -050056 // Append application specific cookie prefix to token name
Derek Jones95b183ad2010-08-31 09:42:39 -050057 $this->csrf_cookie_name = (config_item('cookie_prefix')) ? config_item('cookie_prefix').$this->csrf_token_name : $this->csrf_token_name;
Derek Jonesb3f10a22010-07-25 19:11:26 -050058
Derek Jonese701d762010-03-02 18:17:01 -060059 // Set the CSRF hash
60 $this->_csrf_set_hash();
Derek Allard958543a2010-07-22 14:10:26 -040061
Derek Jonese701d762010-03-02 18:17:01 -060062 log_message('debug', "Security Class Initialized");
63 }
64
65 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +020066
Derek Jonese701d762010-03-02 18:17:01 -060067 /**
68 * Verify Cross Site Request Forgery Protection
69 *
70 * @access public
71 * @return null
72 */
73 function csrf_verify()
Derek Allard958543a2010-07-22 14:10:26 -040074 {
Derek Jonese701d762010-03-02 18:17:01 -060075 // If no POST data exists we will set the CSRF cookie
76 if (count($_POST) == 0)
77 {
78 return $this->csrf_set_cookie();
79 }
80
81 // Do the tokens exist in both the _POST and _COOKIE arrays?
Derek Jones95b183ad2010-08-31 09:42:39 -050082 if ( ! isset($_POST[$this->csrf_token_name]) OR ! isset($_COOKIE[$this->csrf_cookie_name]))
Derek Jonese701d762010-03-02 18:17:01 -060083 {
84 $this->csrf_show_error();
85 }
86
87 // Do the tokens match?
Derek Jones95b183ad2010-08-31 09:42:39 -050088 if ($_POST[$this->csrf_token_name] != $_COOKIE[$this->csrf_cookie_name])
Derek Jonese701d762010-03-02 18:17:01 -060089 {
90 $this->csrf_show_error();
91 }
92
93 // We kill this since we're done and we don't want to polute the _POST array
94 unset($_POST[$this->csrf_token_name]);
Barry Mienydd671972010-10-04 16:33:58 +020095
Derek Jonesb3f10a22010-07-25 19:11:26 -050096 // Nothing should last forever
Derek Jones95b183ad2010-08-31 09:42:39 -050097 unset($_COOKIE[$this->csrf_cookie_name]);
Derek Jonesb3f10a22010-07-25 19:11:26 -050098 $this->_csrf_set_hash();
99 $this->csrf_set_cookie();
Derek Jonese701d762010-03-02 18:17:01 -0600100
101 log_message('debug', "CSRF token verified ");
102 }
Barry Mienydd671972010-10-04 16:33:58 +0200103
Derek Jonese701d762010-03-02 18:17:01 -0600104 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200105
Derek Jonese701d762010-03-02 18:17:01 -0600106 /**
107 * Set Cross Site Request Forgery Protection Cookie
108 *
109 * @access public
110 * @return null
111 */
112 function csrf_set_cookie()
113 {
Derek Jonese701d762010-03-02 18:17:01 -0600114 $expire = time() + $this->csrf_expire;
Robin Sowell154da112011-02-11 15:33:44 -0500115 $secure_cookie = (config_item('cookie_secure') === TRUE) ? 1 : 0;
Derek Jonese701d762010-03-02 18:17:01 -0600116
Robin Sowell154da112011-02-11 15:33:44 -0500117 setcookie($this->csrf_cookie_name, $this->csrf_hash, $expire, config_item('cookie_path'), config_item('cookie_domain'), $secure_cookie);
Barry Mienydd671972010-10-04 16:33:58 +0200118
119 log_message('debug', "CRSF cookie Set");
Derek Jonese701d762010-03-02 18:17:01 -0600120 }
Barry Mienydd671972010-10-04 16:33:58 +0200121
Derek Jonese701d762010-03-02 18:17:01 -0600122 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200123
Derek Jonese701d762010-03-02 18:17:01 -0600124 /**
125 * Set Cross Site Request Forgery Protection Cookie
126 *
127 * @access public
128 * @return null
129 */
130 function _csrf_set_hash()
131 {
132 if ($this->csrf_hash == '')
133 {
134 // If the cookie exists we will use it's value. We don't necessarily want to regenerate it with
135 // each page load since a page could contain embedded sub-pages causing this feature to fail
Derek Jones95b183ad2010-08-31 09:42:39 -0500136 if (isset($_COOKIE[$this->csrf_cookie_name]) AND $_COOKIE[$this->csrf_cookie_name] != '')
Derek Jonese701d762010-03-02 18:17:01 -0600137 {
Derek Jones95b183ad2010-08-31 09:42:39 -0500138 $this->csrf_hash = $_COOKIE[$this->csrf_cookie_name];
Derek Jonese701d762010-03-02 18:17:01 -0600139 }
140 else
141 {
142 $this->csrf_hash = md5(uniqid(rand(), TRUE));
143 }
144 }
Derek Allard958543a2010-07-22 14:10:26 -0400145
Derek Jonese701d762010-03-02 18:17:01 -0600146 return $this->csrf_hash;
147 }
148
149 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200150
Derek Jonese701d762010-03-02 18:17:01 -0600151 /**
152 * Show CSRF Error
153 *
154 * @access public
155 * @return null
156 */
157 function csrf_show_error()
158 {
159 show_error('The action you have requested is not allowed.');
160 }
161
162 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200163
Derek Jonese701d762010-03-02 18:17:01 -0600164 /**
165 * XSS Clean
166 *
167 * Sanitizes data so that Cross Site Scripting Hacks can be
168 * prevented. This function does a fair amount of work but
169 * it is extremely thorough, designed to prevent even the
170 * most obscure XSS attempts. Nothing is ever 100% foolproof,
171 * of course, but I haven't been able to get anything passed
172 * the filter.
173 *
174 * Note: This function should only be used to deal with data
175 * upon submission. It's not something that should
176 * be used for general runtime processing.
177 *
178 * This function was based in part on some code and ideas I
179 * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
180 *
181 * To help develop this script I used this great list of
182 * vulnerabilities along with a few other hacks I've
183 * harvested from examining vulnerabilities in other programs:
184 * http://ha.ckers.org/xss.html
185 *
186 * @access public
187 * @param mixed string or array
188 * @return string
189 */
190 function xss_clean($str, $is_image = FALSE)
191 {
192 /*
193 * Is the string an array?
194 *
195 */
196 if (is_array($str))
197 {
198 while (list($key) = each($str))
199 {
200 $str[$key] = $this->xss_clean($str[$key]);
201 }
Barry Mienydd671972010-10-04 16:33:58 +0200202
Derek Jonese701d762010-03-02 18:17:01 -0600203 return $str;
204 }
205
206 /*
207 * Remove Invisible Characters
208 */
Greg Aker757dda62010-04-14 19:06:19 -0500209 $str = remove_invisible_characters($str);
Derek Jonese701d762010-03-02 18:17:01 -0600210
211 /*
212 * Protect GET variables in URLs
213 */
Barry Mienydd671972010-10-04 16:33:58 +0200214
Derek Jonese701d762010-03-02 18:17:01 -0600215 // 901119URL5918AMP18930PROTECT8198
Barry Mienydd671972010-10-04 16:33:58 +0200216
Derek Jonese701d762010-03-02 18:17:01 -0600217 $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
218
219 /*
220 * Validate standard character entities
221 *
222 * Add a semicolon if missing. We do this to enable
223 * the conversion of entities to ASCII later.
224 *
225 */
226 $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
227
228 /*
Barry Mienydd671972010-10-04 16:33:58 +0200229 * Validate UTF16 two byte encoding (x00)
Derek Jonese701d762010-03-02 18:17:01 -0600230 *
231 * Just as above, adds a semicolon if missing.
232 *
233 */
234 $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
235
236 /*
237 * Un-Protect GET variables in URLs
238 */
239 $str = str_replace($this->xss_hash(), '&', $str);
240
241 /*
242 * URL Decode
243 *
244 * Just in case stuff like this is submitted:
245 *
246 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
247 *
248 * Note: Use rawurldecode() so it does not remove plus signs
249 *
250 */
251 $str = rawurldecode($str);
Barry Mienydd671972010-10-04 16:33:58 +0200252
Derek Jonese701d762010-03-02 18:17:01 -0600253 /*
Barry Mienydd671972010-10-04 16:33:58 +0200254 * Convert character entities to ASCII
Derek Jonese701d762010-03-02 18:17:01 -0600255 *
256 * This permits our tests below to work reliably.
257 * We only convert entities that are within tags since
258 * these are the ones that will pose security problems.
259 *
260 */
261
262 $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
Barry Mienydd671972010-10-04 16:33:58 +0200263
Derek Jonese701d762010-03-02 18:17:01 -0600264 $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
265
266 /*
267 * Remove Invisible Characters Again!
268 */
Greg Aker757dda62010-04-14 19:06:19 -0500269 $str = remove_invisible_characters($str);
Barry Mienydd671972010-10-04 16:33:58 +0200270
Derek Jonese701d762010-03-02 18:17:01 -0600271 /*
272 * Convert all tabs to spaces
273 *
274 * This prevents strings like this: ja vascript
275 * NOTE: we deal with spaces between characters later.
276 * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
277 * so we use str_replace.
278 *
279 */
Barry Mienydd671972010-10-04 16:33:58 +0200280
Derek Jonese701d762010-03-02 18:17:01 -0600281 if (strpos($str, "\t") !== FALSE)
282 {
283 $str = str_replace("\t", ' ', $str);
284 }
Barry Mienydd671972010-10-04 16:33:58 +0200285
Derek Jonese701d762010-03-02 18:17:01 -0600286 /*
287 * Capture converted string for later comparison
288 */
289 $converted_string = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200290
Derek Jonese701d762010-03-02 18:17:01 -0600291 /*
292 * Not Allowed Under Any Conditions
293 */
Barry Mienydd671972010-10-04 16:33:58 +0200294
Derek Jonese701d762010-03-02 18:17:01 -0600295 foreach ($this->never_allowed_str as $key => $val)
296 {
Barry Mienydd671972010-10-04 16:33:58 +0200297 $str = str_replace($key, $val, $str);
Derek Jonese701d762010-03-02 18:17:01 -0600298 }
Barry Mienydd671972010-10-04 16:33:58 +0200299
Derek Jonese701d762010-03-02 18:17:01 -0600300 foreach ($this->never_allowed_regex as $key => $val)
301 {
Barry Mienydd671972010-10-04 16:33:58 +0200302 $str = preg_replace("#".$key."#i", $val, $str);
Derek Jonese701d762010-03-02 18:17:01 -0600303 }
304
305 /*
306 * Makes PHP tags safe
307 *
308 * Note: XML tags are inadvertently replaced too:
309 *
310 * <?xml
311 *
312 * But it doesn't seem to pose a problem.
313 *
314 */
315 if ($is_image === TRUE)
316 {
317 // Images have a tendency to have the PHP short opening and closing tags every so often
318 // so we skip those and only do the long opening tags.
319 $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
320 }
321 else
322 {
323 $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
324 }
Barry Mienydd671972010-10-04 16:33:58 +0200325
Derek Jonese701d762010-03-02 18:17:01 -0600326 /*
327 * Compact any exploded words
328 *
329 * This corrects words like: j a v a s c r i p t
330 * These words are compacted back to their correct state.
331 *
332 */
333 $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
334 foreach ($words as $word)
335 {
336 $temp = '';
Barry Mienydd671972010-10-04 16:33:58 +0200337
Derek Jonese701d762010-03-02 18:17:01 -0600338 for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
339 {
340 $temp .= substr($word, $i, 1)."\s*";
341 }
342
343 // We only want to do this when it is followed by a non-word character
344 // That way valid stuff like "dealer to" does not become "dealerto"
345 $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
346 }
Barry Mienydd671972010-10-04 16:33:58 +0200347
Derek Jonese701d762010-03-02 18:17:01 -0600348 /*
349 * Remove disallowed Javascript in links or img tags
350 * We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
351 * to these simplified non-capturing preg_match(), especially if the pattern exists in the string
352 */
353 do
354 {
355 $original = $str;
Barry Mienydd671972010-10-04 16:33:58 +0200356
Derek Jonese701d762010-03-02 18:17:01 -0600357 if (preg_match("/<a/i", $str))
358 {
359 $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
360 }
Barry Mienydd671972010-10-04 16:33:58 +0200361
Derek Jonese701d762010-03-02 18:17:01 -0600362 if (preg_match("/<img/i", $str))
363 {
364 $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
365 }
Barry Mienydd671972010-10-04 16:33:58 +0200366
Derek Jonese701d762010-03-02 18:17:01 -0600367 if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
368 {
369 $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
370 }
371 }
372 while($original != $str);
373
374 unset($original);
375
376 /*
377 * Remove JavaScript Event Handlers
378 *
379 * Note: This code is a little blunt. It removes
380 * the event handler and anything up to the closing >,
381 * but it's unlikely to be a problem.
382 *
383 */
384 $event_handlers = array('[^a-z_\-]on\w*','xmlns');
385
386 if ($is_image === TRUE)
387 {
388 /*
Barry Mienydd671972010-10-04 16:33:58 +0200389 * Adobe Photoshop puts XML metadata into JFIF images, including namespacing,
Derek Jonese701d762010-03-02 18:17:01 -0600390 * so we have to allow this for images. -Paul
391 */
392 unset($event_handlers[array_search('xmlns', $event_handlers)]);
393 }
394
395 $str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
Barry Mienydd671972010-10-04 16:33:58 +0200396
Derek Jonese701d762010-03-02 18:17:01 -0600397 /*
398 * Sanitize naughty HTML elements
399 *
400 * If a tag containing any of the words in the list
401 * below is found, the tag gets converted to entities.
402 *
403 * So this: <blink>
404 * Becomes: &lt;blink&gt;
405 *
406 */
407 $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
408 $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
409
410 /*
411 * Sanitize naughty scripting elements
412 *
413 * Similar to above, only instead of looking for
414 * tags it looks for PHP and JavaScript commands
415 * that are disallowed. Rather than removing the
416 * code, it simply converts the parenthesis to entities
417 * rendering the code un-executable.
418 *
419 * For example: eval('some code')
420 * Becomes: eval&#40;'some code'&#41;
421 *
422 */
423 $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
Barry Mienydd671972010-10-04 16:33:58 +0200424
Derek Jonese701d762010-03-02 18:17:01 -0600425 /*
426 * Final clean up
427 *
428 * This adds a bit of extra precaution in case
429 * something got through the above filters
430 *
431 */
432 foreach ($this->never_allowed_str as $key => $val)
433 {
Barry Mienydd671972010-10-04 16:33:58 +0200434 $str = str_replace($key, $val, $str);
Derek Jonese701d762010-03-02 18:17:01 -0600435 }
Barry Mienydd671972010-10-04 16:33:58 +0200436
Derek Jonese701d762010-03-02 18:17:01 -0600437 foreach ($this->never_allowed_regex as $key => $val)
438 {
439 $str = preg_replace("#".$key."#i", $val, $str);
440 }
441
442 /*
443 * Images are Handled in a Special Way
444 * - Essentially, we want to know that after all of the character conversion is done whether
445 * any unwanted, likely XSS, code was found. If not, we return TRUE, as the image is clean.
446 * However, if the string post-conversion does not matched the string post-removal of XSS,
447 * then it fails, as there was unwanted XSS code found and removed/changed during processing.
448 */
449
450 if ($is_image === TRUE)
451 {
452 if ($str == $converted_string)
453 {
454 return TRUE;
455 }
456 else
457 {
458 return FALSE;
459 }
460 }
Barry Mienydd671972010-10-04 16:33:58 +0200461
Derek Jonese701d762010-03-02 18:17:01 -0600462 log_message('debug', "XSS Filtering completed");
463 return $str;
464 }
465
466 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200467
Derek Jonese701d762010-03-02 18:17:01 -0600468 /**
469 * Random Hash for protecting URLs
470 *
471 * @access public
472 * @return string
473 */
474 function xss_hash()
Barry Mienydd671972010-10-04 16:33:58 +0200475 {
Derek Jonese701d762010-03-02 18:17:01 -0600476 if ($this->xss_hash == '')
477 {
478 if (phpversion() >= 4.2)
479 mt_srand();
480 else
481 mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
Barry Mienydd671972010-10-04 16:33:58 +0200482
Derek Jonese701d762010-03-02 18:17:01 -0600483 $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
484 }
Barry Mienydd671972010-10-04 16:33:58 +0200485
Derek Jonese701d762010-03-02 18:17:01 -0600486 return $this->xss_hash;
487 }
488
489 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200490
Derek Jonese701d762010-03-02 18:17:01 -0600491 /**
Derek Jonese701d762010-03-02 18:17:01 -0600492 * Compact Exploded Words
493 *
494 * Callback function for xss_clean() to remove whitespace from
495 * things like j a v a s c r i p t
496 *
497 * @access public
498 * @param type
499 * @return type
500 */
501 function _compact_exploded_words($matches)
502 {
503 return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
504 }
Barry Mienydd671972010-10-04 16:33:58 +0200505
Derek Jonese701d762010-03-02 18:17:01 -0600506 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200507
Derek Jonese701d762010-03-02 18:17:01 -0600508 /**
509 * Sanitize Naughty HTML
510 *
511 * Callback function for xss_clean() to remove naughty HTML elements
512 *
513 * @access private
514 * @param array
515 * @return string
516 */
517 function _sanitize_naughty_html($matches)
518 {
519 // encode opening brace
520 $str = '&lt;'.$matches[1].$matches[2].$matches[3];
Barry Mienydd671972010-10-04 16:33:58 +0200521
Derek Jonese701d762010-03-02 18:17:01 -0600522 // encode captured opening or closing brace to prevent recursive vectors
523 $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
Barry Mienydd671972010-10-04 16:33:58 +0200524
Derek Jonese701d762010-03-02 18:17:01 -0600525 return $str;
526 }
Barry Mienydd671972010-10-04 16:33:58 +0200527
Derek Jonese701d762010-03-02 18:17:01 -0600528 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200529
Derek Jonese701d762010-03-02 18:17:01 -0600530 /**
531 * JS Link Removal
532 *
533 * Callback function for xss_clean() to sanitize links
534 * This limits the PCRE backtracks, making it more performance friendly
535 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
536 * PHP 5.2+ on link-heavy strings
537 *
538 * @access private
539 * @param array
540 * @return string
541 */
542 function _js_link_removal($match)
543 {
544 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
545 return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
546 }
Barry Mienydd671972010-10-04 16:33:58 +0200547
Derek Jonese701d762010-03-02 18:17:01 -0600548 /**
549 * JS Image Removal
550 *
551 * Callback function for xss_clean() to sanitize image tags
552 * This limits the PCRE backtracks, making it more performance friendly
553 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
554 * PHP 5.2+ on image tag heavy strings
555 *
556 * @access private
557 * @param array
558 * @return string
559 */
560 function _js_img_removal($match)
561 {
562 $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
563 return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
564 }
565
566 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200567
Derek Jonese701d762010-03-02 18:17:01 -0600568 /**
569 * Attribute Conversion
570 *
571 * Used as a callback for XSS Clean
572 *
573 * @access public
574 * @param array
575 * @return string
576 */
577 function _convert_attribute($match)
578 {
579 return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
580 }
Barry Mienydd671972010-10-04 16:33:58 +0200581
Derek Jonese701d762010-03-02 18:17:01 -0600582 // --------------------------------------------------------------------
583
584 /**
585 * Filter Attributes
586 *
587 * Filters tag attributes for consistency and safety
588 *
589 * @access public
590 * @param string
591 * @return string
592 */
593 function _filter_attributes($str)
594 {
595 $out = '';
596
597 if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
598 {
599 foreach ($matches[0] as $match)
600 {
601 $out .= preg_replace("#/\*.*?\*/#s", '', $match);
602 }
603 }
604
605 return $out;
606 }
607
608 // --------------------------------------------------------------------
609
610 /**
611 * HTML Entity Decode Callback
612 *
613 * Used as a callback for XSS Clean
614 *
615 * @access public
616 * @param array
617 * @return string
618 */
619 function _decode_entity($match)
620 {
Derek Jonesa0911472010-03-30 10:33:09 -0500621 return $this->entity_decode($match[0], strtoupper(config_item('charset')));
Derek Jonese701d762010-03-02 18:17:01 -0600622 }
623
624 // --------------------------------------------------------------------
625
626 /**
Derek Jonesa0911472010-03-30 10:33:09 -0500627 * HTML Entities Decode
628 *
629 * This function is a replacement for html_entity_decode()
630 *
631 * In some versions of PHP the native function does not work
632 * when UTF-8 is the specified character set, so this gives us
633 * a work-around. More info here:
634 * http://bugs.php.net/bug.php?id=25670
635 *
636 * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
637 * character set, and the PHP developers said they were not back porting the
638 * fix to versions other than PHP 5.x.
639 *
640 * @access public
641 * @param string
642 * @param string
643 * @return string
644 */
645 function entity_decode($str, $charset='UTF-8')
646 {
647 if (stristr($str, '&') === FALSE) return $str;
Barry Mienydd671972010-10-04 16:33:58 +0200648
Derek Jonesa0911472010-03-30 10:33:09 -0500649 // The reason we are not using html_entity_decode() by itself is because
650 // while it is not technically correct to leave out the semicolon
651 // at the end of an entity most browsers will still interpret the entity
652 // correctly. html_entity_decode() does not convert entities without
653 // semicolons, so we are left with our own little solution here. Bummer.
Barry Mienydd671972010-10-04 16:33:58 +0200654
Derek Jonesa0911472010-03-30 10:33:09 -0500655 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0')))
656 {
657 $str = html_entity_decode($str, ENT_COMPAT, $charset);
658 $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
659 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
660 }
Barry Mienydd671972010-10-04 16:33:58 +0200661
Derek Jonesa0911472010-03-30 10:33:09 -0500662 // Numeric Entities
663 $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
664 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
Barry Mienydd671972010-10-04 16:33:58 +0200665
Derek Jonesa0911472010-03-30 10:33:09 -0500666 // Literal Entities - Slightly slow so we do another check
667 if (stristr($str, '&') === FALSE)
668 {
669 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
670 }
Barry Mienydd671972010-10-04 16:33:58 +0200671
Derek Jonesa0911472010-03-30 10:33:09 -0500672 return $str;
673 }
Barry Mienydd671972010-10-04 16:33:58 +0200674
Derek Jonesa0911472010-03-30 10:33:09 -0500675 // --------------------------------------------------------------------
Barry Mienydd671972010-10-04 16:33:58 +0200676
Derek Jonesa0911472010-03-30 10:33:09 -0500677 /**
Derek Jonese701d762010-03-02 18:17:01 -0600678 * Filename Security
679 *
680 * @access public
681 * @param string
682 * @return string
683 */
Derek Jones2ef37592010-10-06 17:51:59 -0500684 function sanitize_filename($str, $relative_path = FALSE)
Derek Jonese701d762010-03-02 18:17:01 -0600685 {
686 $bad = array(
687 "../",
Derek Jonese701d762010-03-02 18:17:01 -0600688 "<!--",
689 "-->",
690 "<",
691 ">",
692 "'",
693 '"',
694 '&',
695 '$',
696 '#',
697 '{',
698 '}',
699 '[',
700 ']',
701 '=',
702 ';',
703 '?',
Derek Jonese701d762010-03-02 18:17:01 -0600704 "%20",
705 "%22",
706 "%3c", // <
Barry Mienydd671972010-10-04 16:33:58 +0200707 "%253c", // <
708 "%3e", // >
709 "%0e", // >
710 "%28", // (
711 "%29", // )
712 "%2528", // (
713 "%26", // &
714 "%24", // $
715 "%3f", // ?
716 "%3b", // ;
Derek Jonese701d762010-03-02 18:17:01 -0600717 "%3d" // =
718 );
Derek Jones2ef37592010-10-06 17:51:59 -0500719
720 if ( ! $relative_path)
721 {
722 $bad[] = './';
723 $bad[] = '/';
724 }
Derek Jonese701d762010-03-02 18:17:01 -0600725
726 return stripslashes(str_replace($bad, '', $str));
727 }
728
729}
730// END Security Class
731
732/* End of file Security.php */
733/* Location: ./system/libraries/Security.php */