Blame - system/helpers/typography_helper.php - code-igniter-v3-giggi

blob: 355f02cf9d80f6d28f30cf26ccba1138d3fbf761 [file] [log] [blame]

Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	1	<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* CodeIgniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
				8	* @author Rick Ellis
				9	* @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Allard	6838f00	2007-10-04 19:29:59 +0000	[diff] [blame^]	10	* @license http://www.codeigniter.com/user_guide/license.html
Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	11	* @link http://www.codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* CodeIgniter Typography Helpers
				20	*
				21	* @package CodeIgniter
				22	* @subpackage Helpers
				23	* @category Helpers
				24	* @author Rick Ellis
				25	* @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
				26	*/
				27
				28	// ------------------------------------------------------------------------
				29
				30	/**
				31	* Convert newlines to HTML line breaks except within PRE tags
				32	*
				33	* @access public
				34	* @param string
				35	* @return string
				36	*/
				37	function nl2br_except_pre($str)
				38	{
				39	$ex = explode("pre>",$str);
				40	$ct = count($ex);
				41
				42	$newstr = "";
				43	for ($i = 0; $i < $ct; $i++)
				44	{
				45	if (($i % 2) == 0)
				46	{
				47	$newstr .= nl2br($ex[$i]);
				48	}
				49	else
				50	{
				51	$newstr .= $ex[$i];
				52	}
				53
				54	if ($ct - 1 != $i)
				55	$newstr .= "pre>";
				56	}
				57
				58	return $newstr;
				59	}
				60
				61	// ------------------------------------------------------------------------
				62
				63	/**
				64	* Auto Typography Wrapper Function
				65	*
				66	*
				67	* @access public
				68	* @param string
				69	* @return string
				70	*/
				71	function auto_typography($str)
				72	{
				73	$TYPE = new Auto_typography();
				74	return $TYPE->convert($str);
				75	}
				76
				77	// ------------------------------------------------------------------------
				78
				79	/**
				80	* Auto Typography Class
				81	*
				82	*
				83	* @access private
				84	* @category Helpers
				85	* @author Rick Ellis
				86	* @author Paul Burdick
				87	* @link http://www.codeigniter.com/user_guide/helpers/
				88	*/
				89	class Auto_typography {
				90
				91	// Block level elements that should not be wrapped inside <p> tags
				92	var $block_elements = 'div\|blockquote\|pre\|code\|h\d\|script\|ol\|un';
				93
				94	// Elements that should not have <p> and <br /> tags within them.
				95	var $skip_elements = 'pre\|ol\|ul';
				96
				97	// Tags we want the parser to completely ignore when splitting the string.
				98	var $ignore_elements = 'a\|b\|i\|em\|strong\|span\|img\|li';
				99
				100
				101	/**
				102	* Main Processing Function
				103	*
				104	*/
				105	function convert($str)
				106	{
				107	if ($str == '')
				108	{
				109	return '';
				110	}
				111
				112	$str = ' '.$str.' ';
				113
				114	// Standardize Newlines to make matching easier
				115	$str = preg_replace("/(\r\n\|\r)/", "\n", $str);
				116
				117	/*
				118	* Reduce line breaks
				119	*
				120	* If there are more than two consecutive line
				121	* breaks we'll compress them down to a maximum
				122	* of two since there's no benefit to more.
				123	*
				124	*/
				125	$str = preg_replace("/\n\n+/", "\n\n", $str);
				126
				127	/*
				128	* Convert quotes within tags to temporary marker
				129	*
				130	* We don't want quotes converted within
				131	* tags so we'll temporarily convert them to
				132	* {@DQ} and {@SQ}
				133	*
				134	*/
				135	if (preg_match_all("#\<.+?>#si", $str, $matches))
				136	{
				137	for ($i = 0; $i < count($matches['0']); $i++)
				138	{
				139	$str = str_replace($matches['0'][$i],
				140	str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
				141	$str);
				142	}
				143	}
				144
				145
				146	/*
				147	* Add closing/opening paragraph tags before/after "block" elements
				148	*
				149	* Since block elements (like <blockquotes>, <pre>, etc.) do not get
				150	* wrapped in paragraph tags we will add a closing </p> tag just before
				151	* each block element starts and an opening <p> tag right after the block element
				152	* ends. Later on we'll do some further clean up.
				153	*
				154	*/
				155	$str = preg_replace("#(<.?)(".$this->block_elements.")(.?>)#", "</p>\\1\\2\\3", $str);
				156	$str = preg_replace("#(</.?)(".$this->block_elements.")(.?>)#", "\\1\\2\\3<p>", $str);
				157
				158	/*
				159	* Convert "ignore" tags to temporary marker
				160	*
				161	* The parser splits out the string at every tag
				162	* it encounters. Certain inline tags, like image
				163	* tags, links, span tags, etc. will be adversely
				164	* affected if they are split out so we'll convert
				165	* the opening < temporarily to: {@TAG}
				166	*
				167	*/
				168	$str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
				169
				170	/*
				171	* Split the string at every tag
				172	*
				173	* This creates an array with this prototype:
				174	*
				175	* [array]
				176	* {
				177	* [0] = <opening tag>
				178	* [1] = Content contained between the tags
				179	* [2] = <closing tag>
				180	* Etc...
				181	* }
				182	*
				183	*/
				184	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				185
				186	/*
				187	* Build our finalized string
				188	*
				189	* We'll cycle through the array, skipping tags,
				190	* and processing the contained text
				191	*
				192	*/
				193	$str = '';
				194	$process = TRUE;
				195	foreach ($chunks as $chunk)
				196	{
				197	/*
				198	* Are we dealing with a tag?
				199	*
				200	* If so, we'll skip the processing for this cycle.
				201	* Well also set the "process" flag which allows us
				202	* to skip <pre> tags and a few other things.
				203	*
				204	*/
				205	if (preg_match("#<(/)(".$this->block_elements.").?\>#", $chunk, $match))
				206	{
				207	if (preg_match("#".$this->skip_elements."#", $match['2']))
				208	{
				209	$process = ($match['1'] == '/') ? TRUE : FALSE;
				210	}
				211
				212	$str .= $chunk;
				213	continue;
				214	}
				215
				216	if ($process == FALSE)
				217	{
				218	$str .= $chunk;
				219	continue;
				220	}
				221
				222	// Convert Newlines into <p> and <br /> tags
				223	$str .= $this->format_newlines($chunk);
				224	}
				225
				226	// FINAL CLEAN UP
				227	// IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
				228
				229	/*
				230	* Clean up paragraph tags before/after "block" elements
				231	*
				232	* Earlier we added <p></p> tags before/after block level elements.
				233	* Then, we added paragraph tags around double line breaks. This
				234	* potentially created incorrectly formatted paragraphs so we'll
				235	* clean it up here.
				236	*
				237	*/
				238	$str = preg_replace("#<p>({@TAG}.?)(".$this->block_elements.")(.?>)#", "\\1\\2\\3", $str);
				239	$str = preg_replace("#({@TAG}/.?)(".$this->block_elements.")(.?>)</p>#", "\\1\\2\\3", $str);
				240
				241	// Convert Quotes and other characters
				242	$str = $this->format_characters($str);
				243
				244	// Fix an artifact that happens during the paragraph replacement
				245	$str = preg_replace('#(<p>\n*</p>)#', '', $str);
				246
				247	// If the user submitted their own paragraph tags with class data
				248	// in them we will retain them instead of using our tags.
				249	$str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
				250
				251	// Final clean up
				252	$str = str_replace(
				253	array(
				254	'</p></p>',
				255	'</p><p>',
				256	'<p> ',
				257	' </p>',
				258	'{@TAG}',
				259	'{@DQ}',
				260	'{@SQ}',
				261	'<p></p>'
				262	),
				263	array(
				264	'</p>',
				265	'<p>',
				266	'<p>',
				267	'</p>',
				268	'<',
				269	'"',
				270	"'",
				271	''
				272	),
				273	$str
				274	);
				275
				276	return $str;
				277	}
				278
				279	// --------------------------------------------------------------------
				280
				281	/**
				282	* Format Characters
				283	*
				284	* This function mainly converts double and single quotes
				285	* to entities, but since these are directional, it does
				286	* it based on some rules. It also converts em-dashes
				287	* and a couple other things.
				288	*/
				289	function format_characters($str)
				290	{
				291	$table = array(
				292	' "' => " “",
				293	'" ' => "” ",
				294	" '" => " ‘",
				295	"' " => "’ ",
				296
				297	'>"' => ">“",
				298	'"<' => "”<",
				299	">'" => ">‘",
				300	"'<" => "’<",
				301
				302	"\"." => "”.",
				303	"\"," => "”,",
				304	"\";" => "”;",
				305	"\":" => "”:",
				306	"\"!" => "”!",
				307	"\"?" => "”?",
				308
				309	". " => ".  ",
				310	"? " => "?  ",
				311	"! " => "!  ",
				312	": " => ":  ",
				313	);
				314
				315	// These deal with quotes within quotes, like: "'hi here'"
				316	$start = 0;
				317	$space = array("\n", "\t", " ");
				318
				319	while(TRUE)
				320	{
				321	$current = strpos(substr($str, $start), "\"'");
				322
				323	if ($current === FALSE) break;
				324
				325	$one_before = substr($str, $start+$current-1, 1);
				326	$one_after = substr($str, $start+$current+2, 1);
				327
				328	if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
				329	{
				330	$str = str_replace( $one_before."\"'".$one_after,
				331	$one_before."“‘".$one_after,
				332	$str);
				333	}
				334	elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
				335	{
				336	$str = str_replace( $one_before."\"'".$one_after,
				337	$one_before."”’".$one_after,
				338	$str);
				339	}
				340
				341	$start = $start+$current+2;
				342	}
				343
				344	$start = 0;
				345
				346	while(TRUE)
				347	{
				348	$current = strpos(substr($str, $start), "'\"");
				349
				350	if ($current === FALSE) break;
				351
				352	$one_before = substr($str, $start+$current-1, 1);
				353	$one_after = substr($str, $start+$current+2, 1);
				354
				355	if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
				356	{
				357	$str = str_replace( $one_before."'\"".$one_after,
				358	$one_before."‘“".$one_after,
				359	$str);
				360	}
				361	elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
				362	{
				363	$str = str_replace( $one_before."'\"".$one_after,
				364	$one_before."’”".$one_after,
				365	$str);
				366	}
				367
				368	$start = $start+$current+2;
				369	}
				370
				371	// Are there quotes within a word, as in: ("something")
				372	if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
				373	{
				374	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				375	{
				376	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
				377	{
				378	$str = str_replace( $matches['0'][$i],
				379	$matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i],
				380	$str);
				381	}
				382	}
				383	}
				384
				385	if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
				386	{
				387	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				388	{
				389	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
				390	{
				391	$str = str_replace( $matches['0'][$i],
				392	$matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i],
				393	$str);
				394	}
				395	}
				396	}
				397
				398	// How about one apostrophe, as in Rick's
				399	$start = 0;
				400
				401	while(TRUE)
				402	{
				403	$current = strpos(substr($str, $start), "'");
				404
				405	if ($current === FALSE) break;
				406
				407	$one_before = substr($str, $start+$current-1, 1);
				408	$one_after = substr($str, $start+$current+1, 1);
				409
				410	if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
				411	{
				412	$str = str_replace( $one_before."'".$one_after,
				413	$one_before."’".$one_after,
				414	$str);
				415	}
				416
				417	$start = $start+$current+2;
				418	}
				419
				420	// Em-dashes
				421	$start = 0;
				422	while(TRUE)
				423	{
				424	$current = strpos(substr($str, $start), "--");
				425
				426	if ($current === FALSE) break;
				427
				428	$one_before = substr($str, $start+$current-1, 1);
				429	$one_after = substr($str, $start+$current+2, 1);
				430	$two_before = substr($str, $start+$current-2, 1);
				431	$two_after = substr($str, $start+$current+3, 1);
				432
				433	if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
				434	OR
				435	( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
				436	)
				437	{
				438	$str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
				439	$two_before.trim($one_before)."—".trim($one_after).$two_after,
				440	$str);
				441	}
				442
				443	$start = $start+$current+2;
				444	}
				445
				446	// Ellipsis
				447	$str = preg_replace("#(\w)\.\.\.(\s\|<br />\|</p>)#", "\\1…\\2", $str);
				448	$str = preg_replace("#(\s\|<br />\|</p>)\.\.\.(\w)#", "\\1…\\2", $str);
				449
				450	// Run the translation array we defined above
				451	$str = str_replace(array_keys($table), array_values($table), $str);
				452
				453	// If there are any stray double quotes we'll catch them here
				454
				455	$start = 0;
				456
				457	while(TRUE)
				458	{
				459	$current = strpos(substr($str, $start), '"');
				460
				461	if ($current === FALSE) break;
				462
				463	$one_before = substr($str, $start+$current-1, 1);
				464	$one_after = substr($str, $start+$current+1, 1);
				465
				466	if ( ! in_array($one_after, $space, TRUE))
				467	{
				468	$str = str_replace( $one_before.'"'.$one_after,
				469	$one_before."“".$one_after,
				470	$str);
				471	}
				472	elseif( ! in_array($one_before, $space, TRUE))
				473	{
				474	$str = str_replace( $one_before."'".$one_after,
				475	$one_before."”".$one_after,
				476	$str);
				477	}
				478
				479	$start = $start+$current+2;
				480	}
				481
				482	$start = 0;
				483
				484	while(TRUE)
				485	{
				486	$current = strpos(substr($str, $start), "'");
				487
				488	if ($current === FALSE) break;
				489
				490	$one_before = substr($str, $start+$current-1, 1);
				491	$one_after = substr($str, $start+$current+1, 1);
				492
				493	if ( ! in_array($one_after, $space, TRUE))
				494	{
				495	$str = str_replace( $one_before."'".$one_after,
				496	$one_before."‘".$one_after,
				497	$str);
				498	}
				499	elseif( ! in_array($one_before, $space, TRUE))
				500	{
				501	$str = str_replace( $one_before."'".$one_after,
				502	$one_before."’".$one_after,
				503	$str);
				504	}
				505
				506	$start = $start+$current+2;
				507	}
				508
				509	return $str;
				510	}
				511
				512	// --------------------------------------------------------------------
				513
				514	/**
				515	* Format Newlines
				516	*
				517	* Converts newline characters into either <p> tags or <br />
				518	*
				519	*/
				520	function format_newlines($str)
				521	{
				522	if ($str == '')
				523	{
				524	return $str;
				525	}
				526
				527	if (strpos($str, "\n") === FALSE)
				528	{
				529	return '<p>'.$str.'</p>';
				530	}
				531
				532	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				533	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				534
				535	return '<p>'.$str.'</p>';
				536	}
				537	}
				538
				539
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	540	?>