Blame - system/libraries/Typography.php - code-igniter-v3-giggi

blob: dabf7a62507d028362db9aa40b6f70276a3c8983 [file] [log] [blame]

Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	1	<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* CodeIgniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
				8	* @author ExpressionEngine Dev Team
				9	* @copyright Copyright (c) 2006, EllisLab, Inc.
				10	* @license http://codeigniter.com/user_guide/license.html
				11	* @link http://codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* Typography Class
				20	*
				21	*
				22	* @access private
				23	* @category Helpers
				24	* @author ExpressionEngine Dev Team
				25	* @link http://codeigniter.com/user_guide/helpers/
				26	*/
				27	class CI_Typography {
				28
				29	// Block level elements that should not be wrapped inside <p> tags
				30	var $block_elements = 'div\|blockquote\|pre\|code\|h\d\|script\|ol\|ul';
				31
				32	// Elements that should not have <p> and <br /> tags within them.
				33	var $skip_elements = 'pre\|ol\|ul';
				34
				35	// Tags we want the parser to completely ignore when splitting the string.
				36	var $ignore_elements = 'a\|b\|i\|em\|strong\|span\|img\|li';
				37
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame^]	38	// Whether to allow Javascript event handlers to be sumitted inside tags
				39	var $allow_js_event_handlers = FALSE;
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	40
				41	/**
				42	* Main Processing Function
				43	*
				44	*/
				45	function convert($str)
				46	{
				47	if ($str == '')
				48	{
				49	return '';
				50	}
				51
				52	$str = ' '.$str.' ';
				53
				54	// Standardize Newlines to make matching easier
				55	if (strpos($str, "\r") !== FALSE)
				56	{
				57	$str = str_replace(array("\r\n", "\r"), "\n", $str);
				58	}
				59
				60	/*
				61	* Reduce line breaks
				62	*
				63	* If there are more than two consecutive line
				64	* breaks we'll compress them down to a maximum
				65	* of two since there's no benefit to more.
				66	*
				67	*/
				68	$str = preg_replace("/\n\n+/", "\n\n", $str);
				69
				70	/*
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame^]	71	* Do we allow JavaScript event handlers?
				72	*
				73	* If not, we strip them from within all tags
				74	*/
				75	if ($this->allow_js_event_handlers == FALSE)
				76	{
				77	$event_handlers = array('[^a-z_\-]on\w*','xmlns');
				78	$str = preg_replace("#<([^><]+?)(".implode('\|', $event_handlers).")(\s=\s[^><])([><])#i", "<\\1\\4", $str);
				79	}
				80
				81	/*
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	82	* Convert quotes within tags to temporary marker
				83	*
				84	* We don't want quotes converted within
				85	* tags so we'll temporarily convert them to
				86	* {@DQ} and {@SQ}
				87	*
				88	*/
				89	if (preg_match_all("#\<.+?>#si", $str, $matches))
				90	{
				91	for ($i = 0; $i < count($matches['0']); $i++)
				92	{
				93	$str = str_replace($matches['0'][$i],
				94	str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
				95	$str);
				96	}
				97	}
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	98
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame^]	99	/*
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	100	* Add closing/opening paragraph tags before/after "block" elements
				101	*
				102	* Since block elements (like <blockquotes>, <pre>, etc.) do not get
				103	* wrapped in paragraph tags we will add a closing </p> tag just before
				104	* each block element starts and an opening <p> tag right after the block element
				105	* ends. Later on we'll do some further clean up.
				106	*
				107	*/
				108	$str = preg_replace("#(<)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
				109	$str = preg_replace("#(</)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
				110
				111	/*
				112	* Convert "ignore" tags to temporary marker
				113	*
				114	* The parser splits out the string at every tag
				115	* it encounters. Certain inline tags, like image
				116	* tags, links, span tags, etc. will be adversely
				117	* affected if they are split out so we'll convert
				118	* the opening < temporarily to: {@TAG}
				119	*
				120	*/
				121	$str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
				122
				123	/*
				124	* Split the string at every tag
				125	*
				126	* This creates an array with this prototype:
				127	*
				128	* [array]
				129	* {
				130	* [0] = <opening tag>
				131	* [1] = Content contained between the tags
				132	* [2] = <closing tag>
				133	* Etc...
				134	* }
				135	*
				136	*/
				137	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				138
				139	/*
				140	* Build our finalized string
				141	*
				142	* We'll cycle through the array, skipping tags,
				143	* and processing the contained text
				144	*
				145	*/
				146	$str = '';
				147	$process = TRUE;
				148	foreach ($chunks as $chunk)
				149	{
				150	/*
				151	* Are we dealing with a tag?
				152	*
				153	* If so, we'll skip the processing for this cycle.
				154	* Well also set the "process" flag which allows us
				155	* to skip <pre> tags and a few other things.
				156	*
				157	*/
				158	if (preg_match("#<(/)(".$this->block_elements.").?\>#", $chunk, $match))
				159	{
				160	if (preg_match("#".$this->skip_elements."#", $match['2']))
				161	{
				162	$process = ($match['1'] == '/') ? TRUE : FALSE;
				163	}
				164
				165	$str .= $chunk;
				166	continue;
				167	}
				168
				169	if ($process == FALSE)
				170	{
				171	$str .= $chunk;
				172	continue;
				173	}
				174
				175	// Convert Newlines into <p> and <br /> tags
				176	$str .= $this->format_newlines($chunk);
				177	}
				178
				179	// FINAL CLEAN UP
				180	// IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
				181
				182	/*
				183	* Clean up paragraph tags before/after "block" elements
				184	*
				185	* Earlier we added <p></p> tags before/after block level elements.
				186	* Then, we added paragraph tags around double line breaks. This
				187	* potentially created incorrectly formatted paragraphs so we'll
				188	* clean it up here.
				189	*
				190	*/
				191	$str = preg_replace("#<p>({@TAG}.?)(".$this->block_elements.")(.?>)#", "\\1\\2\\3", $str);
				192	$str = preg_replace("#({@TAG}/.?)(".$this->block_elements.")(.?>)</p>#", "\\1\\2\\3", $str);
				193
				194	// Convert Quotes and other characters
				195	$str = $this->format_characters($str);
				196
				197	// Fix an artifact that happens during the paragraph replacement
				198	$str = preg_replace('#(<p>\n*</p>)#', '', $str);
				199
				200	// If the user submitted their own paragraph tags with class data
				201	// in them we will retain them instead of using our tags.
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame^]	202	$str = preg_replace('#(<p.*?>)<p>#', "\\1", $str); // <?php BBEdit syntax coloring fix
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	203
				204	// Final clean up
				205	$str = str_replace(
				206	array(
				207	'</p></p>',
				208	'</p><p>',
				209	'<p> ',
				210	' </p>',
				211	'{@TAG}',
				212	'{@DQ}',
				213	'{@SQ}',
				214	'<p></p>'
				215	),
				216	array(
				217	'</p>',
				218	'<p>',
				219	'<p>',
				220	'</p>',
				221	'<',
				222	'"',
				223	"'",
				224	''
				225	),
				226	$str
				227	);
				228
				229	return $str;
				230	}
				231
				232	// --------------------------------------------------------------------
				233
				234	/**
				235	* Format Characters
				236	*
				237	* This function mainly converts double and single quotes
				238	* to entities, but since these are directional, it does
				239	* it based on some rules. It also converts em-dashes
				240	* and a couple other things.
				241	*/
				242	function format_characters($str)
				243	{
				244	$table = array(
				245	' "' => " “",
				246	'" ' => "” ",
				247	" '" => " ‘",
				248	"' " => "’ ",
				249
				250	'>"' => ">“",
				251	'"<' => "”<",
				252	">'" => ">‘",
				253	"'<" => "’<",
				254
				255	"\"." => "”.",
				256	"\"," => "”,",
				257	"\";" => "”;",
				258	"\":" => "”:",
				259	"\"!" => "”!",
				260	"\"?" => "”?",
				261
				262	". " => ".  ",
				263	"? " => "?  ",
				264	"! " => "!  ",
				265	": " => ":  ",
				266	);
				267
				268	// These deal with quotes within quotes, like: "'hi here'"
				269	$start = 0;
				270	$space = array("\n", "\t", " ");
				271
				272	while(TRUE)
				273	{
				274	$current = strpos(substr($str, $start), "\"'");
				275
				276	if ($current === FALSE) break;
				277
				278	$one_before = substr($str, $start+$current-1, 1);
				279	$one_after = substr($str, $start+$current+2, 1);
				280
				281	if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
				282	{
				283	$str = str_replace( $one_before."\"'".$one_after,
				284	$one_before."“‘".$one_after,
				285	$str);
				286	}
				287	elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
				288	{
				289	$str = str_replace( $one_before."\"'".$one_after,
				290	$one_before."”’".$one_after,
				291	$str);
				292	}
				293
				294	$start = $start+$current+2;
				295	}
				296
				297	$start = 0;
				298
				299	while(TRUE)
				300	{
				301	$current = strpos(substr($str, $start), "'\"");
				302
				303	if ($current === FALSE) break;
				304
				305	$one_before = substr($str, $start+$current-1, 1);
				306	$one_after = substr($str, $start+$current+2, 1);
				307
				308	if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
				309	{
				310	$str = str_replace( $one_before."'\"".$one_after,
				311	$one_before."‘“".$one_after,
				312	$str);
				313	}
				314	elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
				315	{
				316	$str = str_replace( $one_before."'\"".$one_after,
				317	$one_before."’”".$one_after,
				318	$str);
				319	}
				320
				321	$start = $start+$current+2;
				322	}
				323
				324	// Are there quotes within a word, as in: ("something")
				325	if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
				326	{
				327	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				328	{
				329	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
				330	{
				331	$str = str_replace( $matches['0'][$i],
				332	$matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i],
				333	$str);
				334	}
				335	}
				336	}
				337
				338	if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
				339	{
				340	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				341	{
				342	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
				343	{
				344	$str = str_replace( $matches['0'][$i],
				345	$matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i],
				346	$str);
				347	}
				348	}
				349	}
				350
				351	// How about one apostrophe, as in Rick's
				352	$start = 0;
				353
				354	while(TRUE)
				355	{
				356	$current = strpos(substr($str, $start), "'");
				357
				358	if ($current === FALSE) break;
				359
				360	$one_before = substr($str, $start+$current-1, 1);
				361	$one_after = substr($str, $start+$current+1, 1);
				362
				363	if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
				364	{
				365	$str = str_replace( $one_before."'".$one_after,
				366	$one_before."’".$one_after,
				367	$str);
				368	}
				369
				370	$start = $start+$current+2;
				371	}
				372
				373	// Em-dashes
				374	$start = 0;
				375	while(TRUE)
				376	{
				377	$current = strpos(substr($str, $start), "--");
				378
				379	if ($current === FALSE) break;
				380
				381	$one_before = substr($str, $start+$current-1, 1);
				382	$one_after = substr($str, $start+$current+2, 1);
				383	$two_before = substr($str, $start+$current-2, 1);
				384	$two_after = substr($str, $start+$current+3, 1);
				385
				386	if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
				387	OR
				388	( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
				389	)
				390	{
				391	$str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
				392	$two_before.trim($one_before)."—".trim($one_after).$two_after,
				393	$str);
				394	}
				395
				396	$start = $start+$current+2;
				397	}
				398
				399	// Ellipsis
				400	$str = preg_replace("#(\w)\.\.\.(\s\|<br />\|</p>)#", "\\1…\\2", $str);
				401	$str = preg_replace("#(\s\|<br />\|</p>)\.\.\.(\w)#", "\\1…\\2", $str);
				402
				403	// Run the translation array we defined above
				404	$str = str_replace(array_keys($table), array_values($table), $str);
				405
				406	// If there are any stray double quotes we'll catch them here
				407
				408	$start = 0;
				409
				410	while(TRUE)
				411	{
				412	$current = strpos(substr($str, $start), '"');
				413
				414	if ($current === FALSE) break;
				415
				416	$one_before = substr($str, $start+$current-1, 1);
				417	$one_after = substr($str, $start+$current+1, 1);
				418
				419	if ( ! in_array($one_after, $space, TRUE))
				420	{
				421	$str = str_replace( $one_before.'"'.$one_after,
				422	$one_before."“".$one_after,
				423	$str);
				424	}
				425	elseif( ! in_array($one_before, $space, TRUE))
				426	{
				427	$str = str_replace( $one_before."'".$one_after,
				428	$one_before."”".$one_after,
				429	$str);
				430	}
				431
				432	$start = $start+$current+2;
				433	}
				434
				435	$start = 0;
				436
				437	while(TRUE)
				438	{
				439	$current = strpos(substr($str, $start), "'");
				440
				441	if ($current === FALSE) break;
				442
				443	$one_before = substr($str, $start+$current-1, 1);
				444	$one_after = substr($str, $start+$current+1, 1);
				445
				446	if ( ! in_array($one_after, $space, TRUE))
				447	{
				448	$str = str_replace( $one_before."'".$one_after,
				449	$one_before."‘".$one_after,
				450	$str);
				451	}
				452	elseif( ! in_array($one_before, $space, TRUE))
				453	{
				454	$str = str_replace( $one_before."'".$one_after,
				455	$one_before."’".$one_after,
				456	$str);
				457	}
				458
				459	$start = $start+$current+2;
				460	}
				461
				462	return $str;
				463	}
				464
				465	// --------------------------------------------------------------------
				466
				467	/**
				468	* Format Newlines
				469	*
				470	* Converts newline characters into either <p> tags or <br />
				471	*
				472	*/
				473	function format_newlines($str)
				474	{
				475	if ($str == '')
				476	{
				477	return $str;
				478	}
				479
				480	if (strpos($str, "\n") === FALSE)
				481	{
				482	return '<p>'.$str.'</p>';
				483	}
				484
				485	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				486	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				487
				488	return '<p>'.$str.'</p>';
Rick Ellis	18bd8b5	2008-09-10 23:40:35 +0000	[diff] [blame^]	489	}
				490
				491	// --------------------------------------------------------------------
				492
				493	/**
				494	* Allow JavaScript Event Handlers?
				495	*
				496	* For security reasons, by default we disallow JS event handlers
				497	*
				498	*/
				499	function allow_js_event_handlers($val = FALSE)
				500	{
				501	$this->allow_js_event_handlers = ($val === FALSE) ? FALSE : TRUE;
				502	}
				503
				504
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	505	}
				506	// END Typography Class
				507
				508	/* End of file Typography.php */
				509	/* Location: ./system/libraries/Typography.php */