Blame - system/helpers/typography_helper.php - code-igniter-v3-giggi

blob: 84ca0ed6fb86344e6a3626b323fb85e002f256e8 [file] [log] [blame]

admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	1	<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* Code Igniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
				8	* @author Rick Ellis
				9	* @copyright Copyright (c) 2006, pMachine, Inc.
				10	* @license http://www.codeignitor.com/user_guide/license.html
				11	* @link http://www.codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* Code Igniter Typography Helpers
				20	*
				21	* @package CodeIgniter
				22	* @subpackage Helpers
				23	* @category Helpers
				24	* @author Rick Ellis
				25	* @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
				26	*/
				27
				28	// ------------------------------------------------------------------------
				29
				30	/**
				31	* Convert newlines to HTML line breaks except within PRE tags
				32	*
				33	* @access public
				34	* @param string
				35	* @return string
				36	*/
				37	function nl2br_except_pre($str)
				38	{
				39	$ex = explode("pre>",$str);
				40	$ct = count($ex);
				41
				42	$newstr = "";
				43	for ($i = 0; $i < $ct; $i++)
				44	{
				45	if (($i % 2) == 0)
				46	{
				47	$newstr .= nl2br($ex[$i]);
				48	}
				49	else
				50	{
				51	$newstr .= $ex[$i];
				52	}
				53
				54	if ($ct - 1 != $i)
				55	$newstr .= "pre>";
				56	}
				57
				58	return $newstr;
				59	}
				60
				61	// ------------------------------------------------------------------------
				62
				63	/**
				64	* Auto Typography Wrapper Function
				65	*
				66	*
				67	* @access public
				68	* @parm string
				69	* @return string
				70	*/
				71	function auto_typography($str)
				72	{
				73	$TYPE = new Auto_typography();
				74	return $TYPE->convert($str);
				75	}
				76
				77	// ------------------------------------------------------------------------
				78
				79	/**
				80	* Auto Typography Class
				81	*
				82	*
				83	* @access private
				84	* @category Helpers
				85	* @author Rick Ellis
				86	* @author Paul Burdick
				87	* @link http://www.codeigniter.com/user_guide/helpers/
				88	*/
				89	class Auto_typography {
				90
				91	// Block level elements that should not be wrapped inside <p> tags
				92	var $block_elements = 'div\|blockquote\|pre\|code\|h\d\|script\|ol\|un';
				93
				94	// Elements that should not have <p> and <br /> tags within them.
				95	var $skip_elements = 'pre\|ol\|ul';
				96
				97	// Tags we want the parser to completely ignore when splitting the string.
				98	var $ignore_elements = 'a\|b\|i\|em\|strong\|span\|img\|li';
				99
				100
				101	/**
				102	* Main Processing Function
				103	*
				104	*/
				105	function convert($str)
				106	{
				107	if ($str == '')
				108	{
				109	return '';
				110	}
				111
				112	$str = ' '.$str.' ';
				113
				114	// Standardize Newlines to make matching easier
				115	$str = preg_replace("/(\r\n\|\r)/", "\n", $str);
				116
				117	/*
				118	* Reduce line breaks
				119	*
				120	* If there are more than two consecutive line
				121	* breaks we'll compress them down to a maximum
				122	* of two since there's no benefit to more.
				123	*
				124	*/
				125	$str = preg_replace("/\n\n+/", "\n\n", $str);
				126
				127	/*
				128	* Convert quotes within tags to tempoarary marker
				129	*
				130	* We don't want quotes converted within
				131	* tags so we'll temporarily convert them to
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	132	* {@DQ} and {@SQ}
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	133	*
				134	*/
				135	if (preg_match_all("#\<.+?>#si", $str, $matches))
				136	{
				137	for ($i = 0; $i < count($matches['0']); $i++)
				138	{
				139	$str = str_replace($matches['0'][$i],
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	140	str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	141	$str);
				142	}
				143	}
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	144
				145
				146	/*
				147	* Add closing/opening paragraph tags before/after "block" elements
				148	*
				149	* Since block elements (like <blockquotes>, <pre>, etc.) do not get
				150	* wrapped in paragraph tags we will add a closing </p> tag just before
				151	* each block element starts and an opening <p> tag right after the block element
				152	* ends. Later on we'll do some further clean up.
				153	*
				154	*/
				155	$str = preg_replace("#(<.?)(".$this->block_elements.")(.?>)#", "</p>\\1\\2\\3", $str);
				156	$str = preg_replace("#(</.?)(".$this->block_elements.")(.?>)#", "\\1\\2\\3<p>", $str);
				157
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	158	/*
				159	* Convert "ignore" tags to tempoarary marker
				160	*
				161	* The parser splits out the string at every tag
				162	* it encounters. Certain inline tags, like image
				163	* tags, links, span tags, etc. will be adversely
				164	* affected if they are split out so we'll convert
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	165	* the opening < temporarily to: {@TAG}
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	166	*
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	167	*/
				168	$str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	169
				170	/*
				171	* Split the string at every tag
				172	*
				173	* This creates an array with this prototype:
				174	*
				175	* [array]
				176	* {
				177	* [0] = <opening tag>
				178	* [1] = Content contained between the tags
				179	* [2] = <closing tag>
				180	* Etc...
				181	* }
				182	*
				183	*/
				184	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				185
				186	/*
				187	* Build our finalized string
				188	*
				189	* We'll cycle through the array, skipping tags,
				190	* and processing the contained text
				191	*
				192	*/
				193	$str = '';
				194	$process = TRUE;
				195	foreach ($chunks as $chunk)
				196	{
				197	/*
				198	* Are we dealing with a tag?
				199	*
				200	* If so, we'll skip the processing for this cycle.
				201	* Well also set the "process" flag which allows us
				202	* to skip <pre> tags and a few other things.
				203	*
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	204	*/
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	205	if (preg_match("#<(/)(".$this->block_elements.").?\>#", $chunk, $match))
				206	{
				207	if (preg_match("#".$this->skip_elements."#", $match['2']))
				208	{
				209	$process = ($match['1'] == '/') ? TRUE : FALSE;
				210	}
				211
				212	$str .= $chunk;
				213	continue;
				214	}
				215
				216	if ($process == FALSE)
				217	{
				218	$str .= $chunk;
				219	continue;
				220	}
				221
				222	// Convert Newlines into <p> and <br /> tags
				223	$str .= $this->format_newlines($chunk);
				224	}
				225
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	226
				227	/*
				228	* Clean up paragraph tags before/after "block" elements
				229	*
				230	* Earlier we added <p></p> tags before/after block level elements.
				231	* Then, we added paragraph tags around double line breaks. This
				232	* potentially created incorrectly formatted paragraphs so we'll
				233	* clean it up here.
				234	*
				235	*/
				236	$str = preg_replace("#<p>({@TAG}.?)(".$this->block_elements.")(.?>)#", "\\1\\2\\3", $str);
				237	$str = preg_replace("#({@TAG}/.?)(".$this->block_elements.")(.?>)</p>#", "\\1\\2\\3", $str);
				238
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	239	// Convert Quotes and other characters
				240	$str = $this->format_characters($str);
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	241
				242	// Final clean up
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	243	$str = preg_replace('#(<p>\n*</p>)#', '', $str);
				244	$str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	245
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	246	$str = str_replace(
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	247	array(
				248	'</p></p>',
				249	'</p><p>',
				250	'<p> ',
				251	' </p>',
				252	'{@TAG}',
				253	'{@DQ}',
				254	'{@SQ}',
				255	'<p></p>'
				256	),
				257	array(
				258	'</p>',
				259	'<p>',
				260	'<p>',
				261	'</p>',
				262	'<',
				263	'"',
				264	"'",
				265	''
				266	),
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	267	$str
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	268	);
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	269
admin	d54078f	2006-10-20 00:38:16 +0000	[diff] [blame^]	270	return $str;
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	271	}
				272
				273	// --------------------------------------------------------------------
				274
				275	/**
				276	* Format Characters
				277	*
				278	* This function mainly converts double and single quotes
				279	* to entities, but since these are directional, it does
				280	* it based on some rules. It also converts em-dashes
				281	* and a couple other things.
				282	*/
				283	function format_characters($str)
				284	{
				285	$table = array(
				286	' "' => " “",
				287	'" ' => "” ",
				288	" '" => " ‘",
				289	"' " => "’ ",
				290
				291	'>"' => ">“",
				292	'"<' => "”<",
				293	">'" => ">‘",
				294	"'<" => "’<",
				295
				296	"\"." => "”.",
				297	"\"," => "”,",
				298	"\";" => "”;",
				299	"\":" => "”:",
				300	"\"!" => "”!",
				301	"\"?" => "”?",
				302
				303	". " => ".  ",
				304	"? " => "?  ",
				305	"! " => "!  ",
				306	": " => ":  ",
				307	);
				308
				309	// These deal with quotes within quotes, like: "'hi here'"
				310	$start = 0;
				311	$space = array("\n", "\t", " ");
				312
				313	while(TRUE)
				314	{
				315	$current = strpos(substr($str, $start), "\"'");
				316
				317	if ($current === FALSE) break;
				318
				319	$one_before = substr($str, $start+$current-1, 1);
				320	$one_after = substr($str, $start+$current+2, 1);
				321
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	322	if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	323	{
				324	$str = str_replace( $one_before."\"'".$one_after,
				325	$one_before."“‘".$one_after,
				326	$str);
				327	}
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	328	elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	329	{
				330	$str = str_replace( $one_before."\"'".$one_after,
				331	$one_before."”’".$one_after,
				332	$str);
				333	}
				334
				335	$start = $start+$current+2;
				336	}
				337
				338	$start = 0;
				339
				340	while(TRUE)
				341	{
				342	$current = strpos(substr($str, $start), "'\"");
				343
				344	if ($current === FALSE) break;
				345
				346	$one_before = substr($str, $start+$current-1, 1);
				347	$one_after = substr($str, $start+$current+2, 1);
				348
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	349	if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	350	{
				351	$str = str_replace( $one_before."'\"".$one_after,
				352	$one_before."‘“".$one_after,
				353	$str);
				354	}
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	355	elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	356	{
				357	$str = str_replace( $one_before."'\"".$one_after,
				358	$one_before."’”".$one_after,
				359	$str);
				360	}
				361
				362	$start = $start+$current+2;
				363	}
				364
				365	// Are there quotes within a word, as in: ("something")
				366	if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
				367	{
				368	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				369	{
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	370	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	371	{
				372	$str = str_replace( $matches['0'][$i],
				373	$matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i],
				374	$str);
				375	}
				376	}
				377	}
				378
				379	if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
				380	{
				381	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				382	{
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	383	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	384	{
				385	$str = str_replace( $matches['0'][$i],
				386	$matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i],
				387	$str);
				388	}
				389	}
				390	}
				391
				392	// How about one apostrophe, as in Rick's
				393	$start = 0;
				394
				395	while(TRUE)
				396	{
				397	$current = strpos(substr($str, $start), "'");
				398
				399	if ($current === FALSE) break;
				400
				401	$one_before = substr($str, $start+$current-1, 1);
				402	$one_after = substr($str, $start+$current+1, 1);
				403
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	404	if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	405	{
				406	$str = str_replace( $one_before."'".$one_after,
				407	$one_before."’".$one_after,
				408	$str);
				409	}
				410
				411	$start = $start+$current+2;
				412	}
				413
				414	// Em-dashes
				415	$start = 0;
				416	while(TRUE)
				417	{
				418	$current = strpos(substr($str, $start), "--");
				419
				420	if ($current === FALSE) break;
				421
				422	$one_before = substr($str, $start+$current-1, 1);
				423	$one_after = substr($str, $start+$current+2, 1);
				424	$two_before = substr($str, $start+$current-2, 1);
				425	$two_after = substr($str, $start+$current+3, 1);
				426
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	427	if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	428	OR
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	429	( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	430	)
				431	{
				432	$str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
				433	$two_before.trim($one_before)."—".trim($one_after).$two_after,
				434	$str);
				435	}
				436
				437	$start = $start+$current+2;
				438	}
				439
				440	// Ellipsis
				441	$str = preg_replace("#(\w)\.\.\.(\s\|<br />\|</p>)#", "\\1…\\2", $str);
				442	$str = preg_replace("#(\s\|<br />\|</p>)\.\.\.(\w)#", "\\1…\\2", $str);
				443
				444	// Run the translation array we defined above
				445	$str = str_replace(array_keys($table), array_values($table), $str);
				446
				447	// If there are any stray double quotes we'll catch them here
				448
				449	$start = 0;
				450
				451	while(TRUE)
				452	{
				453	$current = strpos(substr($str, $start), '"');
				454
				455	if ($current === FALSE) break;
				456
				457	$one_before = substr($str, $start+$current-1, 1);
				458	$one_after = substr($str, $start+$current+1, 1);
				459
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	460	if ( ! in_array($one_after, $space, TRUE))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	461	{
				462	$str = str_replace( $one_before.'"'.$one_after,
				463	$one_before."“".$one_after,
				464	$str);
				465	}
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	466	elseif( ! in_array($one_before, $space, TRUE))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	467	{
				468	$str = str_replace( $one_before."'".$one_after,
				469	$one_before."”".$one_after,
				470	$str);
				471	}
				472
				473	$start = $start+$current+2;
				474	}
				475
				476	$start = 0;
				477
				478	while(TRUE)
				479	{
				480	$current = strpos(substr($str, $start), "'");
				481
				482	if ($current === FALSE) break;
				483
				484	$one_before = substr($str, $start+$current-1, 1);
				485	$one_after = substr($str, $start+$current+1, 1);
				486
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	487	if ( ! in_array($one_after, $space, TRUE))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	488	{
				489	$str = str_replace( $one_before."'".$one_after,
				490	$one_before."‘".$one_after,
				491	$str);
				492	}
admin	ee54c11	2006-09-28 17:13:38 +0000	[diff] [blame]	493	elseif( ! in_array($one_before, $space, TRUE))
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	494	{
				495	$str = str_replace( $one_before."'".$one_after,
				496	$one_before."’".$one_after,
				497	$str);
				498	}
				499
				500	$start = $start+$current+2;
				501	}
				502
				503	return $str;
				504	}
				505
				506	// --------------------------------------------------------------------
				507
				508	/**
				509	* Format Newlines
				510	*
				511	* Converts newline characters into either <p> tags or <br />
				512	*
				513	*/
				514	function format_newlines($str)
				515	{
				516	if ($str == '')
				517	{
				518	return $str;
				519	}
				520
				521	if (strpos($str, "\n") === FALSE)
				522	{
				523	return '<p>'.$str.'</p>';
				524	}
				525
				526	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				527	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				528
				529	return '<p>'.$str.'</p>';
				530	}
				531	}
				532
				533
				534	?>