Blame - system/libraries/Typography.php - code-igniter-v3-giggi

blob: 9fcf6b66968bec97822ca46bdd615faa8def9f72 [file] [log] [blame]

Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	1	<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* CodeIgniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
				8	* @author ExpressionEngine Dev Team
				9	* @copyright Copyright (c) 2006, EllisLab, Inc.
				10	* @license http://codeigniter.com/user_guide/license.html
				11	* @link http://codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* Typography Class
				20	*
				21	*
				22	* @access private
				23	* @category Helpers
				24	* @author ExpressionEngine Dev Team
				25	* @link http://codeigniter.com/user_guide/helpers/
				26	*/
				27	class CI_Typography {
				28
				29	// Block level elements that should not be wrapped inside <p> tags
				30	var $block_elements = 'div\|blockquote\|pre\|code\|h\d\|script\|ol\|ul';
				31
				32	// Elements that should not have <p> and <br /> tags within them.
				33	var $skip_elements = 'pre\|ol\|ul';
				34
				35	// Tags we want the parser to completely ignore when splitting the string.
				36	var $ignore_elements = 'a\|b\|i\|em\|strong\|span\|img\|li';
				37
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame]	38	// Whether to allow Javascript event handlers to be sumitted inside tags
				39	var $allow_js_event_handlers = FALSE;
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	40
				41	/**
				42	* Main Processing Function
				43	*
				44	*/
				45	function convert($str)
				46	{
				47	if ($str == '')
				48	{
				49	return '';
				50	}
				51
				52	$str = ' '.$str.' ';
				53
				54	// Standardize Newlines to make matching easier
				55	if (strpos($str, "\r") !== FALSE)
				56	{
				57	$str = str_replace(array("\r\n", "\r"), "\n", $str);
				58	}
				59
				60	/*
				61	* Reduce line breaks
				62	*
				63	* If there are more than two consecutive line
				64	* breaks we'll compress them down to a maximum
				65	* of two since there's no benefit to more.
				66	*
				67	*/
				68	$str = preg_replace("/\n\n+/", "\n\n", $str);
				69
				70	/*
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame]	71	* Do we allow JavaScript event handlers?
				72	*
				73	* If not, we strip them from within all tags
				74	*/
				75	if ($this->allow_js_event_handlers == FALSE)
				76	{
Rick Ellis	e320bc8	2008-09-11 18:11:31 +0000	[diff] [blame]	77	$str = preg_replace("#<([^><]+?)([^a-z_\-]on\w\|xmlns)(\s=\s[^><])([><]*)#i", "<\\1\\4", $str);
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame]	78	}
				79
				80	/*
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	81	* Convert quotes within tags to temporary marker
				82	*
				83	* We don't want quotes converted within
				84	* tags so we'll temporarily convert them to
				85	* {@DQ} and {@SQ}
				86	*
				87	*/
				88	if (preg_match_all("#\<.+?>#si", $str, $matches))
				89	{
				90	for ($i = 0; $i < count($matches['0']); $i++)
				91	{
				92	$str = str_replace($matches['0'][$i],
				93	str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
				94	$str);
				95	}
				96	}
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	97
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame]	98	/*
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	99	* Add closing/opening paragraph tags before/after "block" elements
				100	*
				101	* Since block elements (like <blockquotes>, <pre>, etc.) do not get
				102	* wrapped in paragraph tags we will add a closing </p> tag just before
				103	* each block element starts and an opening <p> tag right after the block element
				104	* ends. Later on we'll do some further clean up.
				105	*
				106	*/
				107	$str = preg_replace("#(<)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
				108	$str = preg_replace("#(</)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
				109
				110	/*
				111	* Convert "ignore" tags to temporary marker
				112	*
				113	* The parser splits out the string at every tag
				114	* it encounters. Certain inline tags, like image
				115	* tags, links, span tags, etc. will be adversely
				116	* affected if they are split out so we'll convert
				117	* the opening < temporarily to: {@TAG}
				118	*
				119	*/
				120	$str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
				121
				122	/*
				123	* Split the string at every tag
				124	*
				125	* This creates an array with this prototype:
				126	*
				127	* [array]
				128	* {
				129	* [0] = <opening tag>
				130	* [1] = Content contained between the tags
				131	* [2] = <closing tag>
				132	* Etc...
				133	* }
				134	*
				135	*/
				136	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				137
				138	/*
				139	* Build our finalized string
				140	*
				141	* We'll cycle through the array, skipping tags,
				142	* and processing the contained text
				143	*
				144	*/
				145	$str = '';
				146	$process = TRUE;
				147	foreach ($chunks as $chunk)
				148	{
				149	/*
				150	* Are we dealing with a tag?
				151	*
				152	* If so, we'll skip the processing for this cycle.
				153	* Well also set the "process" flag which allows us
				154	* to skip <pre> tags and a few other things.
				155	*
				156	*/
				157	if (preg_match("#<(/)(".$this->block_elements.").?\>#", $chunk, $match))
				158	{
				159	if (preg_match("#".$this->skip_elements."#", $match['2']))
				160	{
				161	$process = ($match['1'] == '/') ? TRUE : FALSE;
				162	}
				163
				164	$str .= $chunk;
				165	continue;
				166	}
				167
				168	if ($process == FALSE)
				169	{
				170	$str .= $chunk;
				171	continue;
				172	}
				173
				174	// Convert Newlines into <p> and <br /> tags
				175	$str .= $this->format_newlines($chunk);
				176	}
				177
				178	// FINAL CLEAN UP
				179	// IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
				180
				181	/*
				182	* Clean up paragraph tags before/after "block" elements
				183	*
				184	* Earlier we added <p></p> tags before/after block level elements.
				185	* Then, we added paragraph tags around double line breaks. This
				186	* potentially created incorrectly formatted paragraphs so we'll
				187	* clean it up here.
				188	*
				189	*/
				190	$str = preg_replace("#<p>({@TAG}.?)(".$this->block_elements.")(.?>)#", "\\1\\2\\3", $str);
				191	$str = preg_replace("#({@TAG}/.?)(".$this->block_elements.")(.?>)</p>#", "\\1\\2\\3", $str);
				192
				193	// Convert Quotes and other characters
				194	$str = $this->format_characters($str);
				195
				196	// Fix an artifact that happens during the paragraph replacement
				197	$str = preg_replace('#(<p>\n*</p>)#', '', $str);
				198
				199	// If the user submitted their own paragraph tags with class data
				200	// in them we will retain them instead of using our tags.
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame]	201	$str = preg_replace('#(<p.*?>)<p>#', "\\1", $str); // <?php BBEdit syntax coloring fix
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	202
				203	// Final clean up
				204	$str = str_replace(
				205	array(
				206	'</p></p>',
				207	'</p><p>',
				208	'<p> ',
				209	' </p>',
				210	'{@TAG}',
				211	'{@DQ}',
				212	'{@SQ}',
				213	'<p></p>'
				214	),
				215	array(
				216	'</p>',
				217	'<p>',
				218	'<p>',
				219	'</p>',
				220	'<',
				221	'"',
				222	"'",
				223	''
				224	),
				225	$str
				226	);
				227
				228	return $str;
				229	}
				230
				231	// --------------------------------------------------------------------
				232
				233	/**
				234	* Format Characters
				235	*
				236	* This function mainly converts double and single quotes
Derek Jones	ab504b8	2008-09-11 17:04:30 +0000	[diff] [blame]	237	* to curly entities, but it also converts em-dashes,
				238	* double spaces, and ampersands
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	239	*/
				240	function format_characters($str)
Derek Jones	ab504b8	2008-09-11 17:04:30 +0000	[diff] [blame]	241	{
				242	static $table;
				243
				244	if ( ! isset($table))
				245	{
				246	$table = array(
				247	// nested smart quotes, opening and closing
				248	// note that rules for grammar (English) allow only for two levels deep
				249	// and that single quotes are _supposed_ to always be on the outside
				250	// but we'll accommodate both
				251	'/(^\|\W\|\s)\'"/' => '$1‘“',
				252	'/\'"(\s\|\W\|$)/' => '’”$1',
				253	'/(^\|\W\|\s)"\'/' => '$1“‘',
				254	'/"\'(\s\|\W\|$)/' => '”’$1',
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	255
Derek Jones	ab504b8	2008-09-11 17:04:30 +0000	[diff] [blame]	256	// single quote smart quotes
				257	'/\'(\s\|\W\|$)/' => '’$1',
				258	'/(^\|\W\|\s)\'/' => '$1‘',
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	259
Derek Jones	ab504b8	2008-09-11 17:04:30 +0000	[diff] [blame]	260	// double quote smart quotes
				261	'/"(\s\|\W\|$)/' => '”$1',
				262	'/(^\|\W\|\s)"/' => '$1“',
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	263
Derek Jones	ab504b8	2008-09-11 17:04:30 +0000	[diff] [blame]	264	// apostrophes
				265	"/(\w)'(\w)/" => '$1’$2',
				266
				267	// Em dash and ellipses dots
				268	'/\s?\-\-\s?/' => '—',
				269	'/\w\.{3}/' => '…',
				270
				271	// double space after sentences
Derek Jones	50500a2	2008-09-11 18:21:18 +0000	[diff] [blame^]	272	'/(\W) /' => '$1  ',
Derek Jones	ab504b8	2008-09-11 17:04:30 +0000	[diff] [blame]	273
				274	// ampersands, if not a character entity
				275	'/&(?!#?[a-zA-Z0-9]{2,};)/' => '&'
				276	);
				277	}
				278
				279	return preg_replace(array_keys($table), $table, $str);
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	280	}
				281
				282	// --------------------------------------------------------------------
				283
				284	/**
				285	* Format Newlines
				286	*
				287	* Converts newline characters into either <p> tags or <br />
				288	*
				289	*/
				290	function format_newlines($str)
				291	{
				292	if ($str == '')
				293	{
				294	return $str;
				295	}
				296
				297	if (strpos($str, "\n") === FALSE)
				298	{
				299	return '<p>'.$str.'</p>';
				300	}
				301
				302	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				303	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				304
				305	return '<p>'.$str.'</p>';
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame]	306	}
				307
				308	// --------------------------------------------------------------------
				309
				310	/**
				311	* Allow JavaScript Event Handlers?
				312	*
				313	* For security reasons, by default we disallow JS event handlers
				314	*
				315	*/
				316	function allow_js_event_handlers($val = FALSE)
				317	{
				318	$this->allow_js_event_handlers = ($val === FALSE) ? FALSE : TRUE;
				319	}
				320
				321
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	322	}
				323	// END Typography Class
				324
				325	/* End of file Typography.php */
				326	/* Location: ./system/libraries/Typography.php */