Blame - system/helpers/typography_helper.php - code-igniter-v3-giggi

blob: 93a887dbe581900ba4c517dece52468057d79c41 [file] [log] [blame]

Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	1	<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* CodeIgniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
Derek Allard	3d879d5	2008-01-18 19:41:32 +0000	[diff] [blame]	8	* @author ExpressionEngine Dev Team
Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	9	* @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Allard	6838f00	2007-10-04 19:29:59 +0000	[diff] [blame]	10	* @license http://www.codeigniter.com/user_guide/license.html
Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	11	* @link http://www.codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* CodeIgniter Typography Helpers
				20	*
				21	* @package CodeIgniter
				22	* @subpackage Helpers
				23	* @category Helpers
Derek Allard	3d879d5	2008-01-18 19:41:32 +0000	[diff] [blame]	24	* @author ExpressionEngine Dev Team
Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	25	* @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
				26	*/
				27
				28	// ------------------------------------------------------------------------
				29
				30	/**
				31	* Convert newlines to HTML line breaks except within PRE tags
				32	*
				33	* @access public
				34	* @param string
				35	* @return string
				36	*/
				37	function nl2br_except_pre($str)
				38	{
				39	$ex = explode("pre>",$str);
				40	$ct = count($ex);
				41
				42	$newstr = "";
				43	for ($i = 0; $i < $ct; $i++)
				44	{
				45	if (($i % 2) == 0)
				46	{
				47	$newstr .= nl2br($ex[$i]);
				48	}
				49	else
				50	{
				51	$newstr .= $ex[$i];
				52	}
				53
				54	if ($ct - 1 != $i)
				55	$newstr .= "pre>";
				56	}
				57
				58	return $newstr;
				59	}
				60
				61	// ------------------------------------------------------------------------
				62
				63	/**
				64	* Auto Typography Wrapper Function
				65	*
				66	*
				67	* @access public
				68	* @param string
				69	* @return string
				70	*/
				71	function auto_typography($str)
				72	{
				73	$TYPE = new Auto_typography();
				74	return $TYPE->convert($str);
				75	}
				76
				77	// ------------------------------------------------------------------------
				78
				79	/**
				80	* Auto Typography Class
				81	*
				82	*
				83	* @access private
				84	* @category Helpers
Derek Allard	3d879d5	2008-01-18 19:41:32 +0000	[diff] [blame]	85	* @author ExpressionEngine Dev Team
Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	86	* @link http://www.codeigniter.com/user_guide/helpers/
				87	*/
				88	class Auto_typography {
				89
				90	// Block level elements that should not be wrapped inside <p> tags
Derek Allard	bab7ed9	2008-01-19 15:41:28 +0000	[diff] [blame]	91	var $block_elements = 'div\|blockquote\|pre\|code\|h\d\|script\|ol\|ul';
Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	92
				93	// Elements that should not have <p> and <br /> tags within them.
				94	var $skip_elements = 'pre\|ol\|ul';
				95
				96	// Tags we want the parser to completely ignore when splitting the string.
				97	var $ignore_elements = 'a\|b\|i\|em\|strong\|span\|img\|li';
				98
				99
				100	/**
				101	* Main Processing Function
				102	*
				103	*/
				104	function convert($str)
				105	{
				106	if ($str == '')
				107	{
				108	return '';
				109	}
				110
				111	$str = ' '.$str.' ';
				112
				113	// Standardize Newlines to make matching easier
				114	$str = preg_replace("/(\r\n\|\r)/", "\n", $str);
				115
				116	/*
				117	* Reduce line breaks
				118	*
				119	* If there are more than two consecutive line
				120	* breaks we'll compress them down to a maximum
				121	* of two since there's no benefit to more.
				122	*
				123	*/
				124	$str = preg_replace("/\n\n+/", "\n\n", $str);
				125
				126	/*
				127	* Convert quotes within tags to temporary marker
				128	*
				129	* We don't want quotes converted within
				130	* tags so we'll temporarily convert them to
				131	* {@DQ} and {@SQ}
				132	*
				133	*/
				134	if (preg_match_all("#\<.+?>#si", $str, $matches))
				135	{
				136	for ($i = 0; $i < count($matches['0']); $i++)
				137	{
				138	$str = str_replace($matches['0'][$i],
				139	str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
				140	$str);
				141	}
				142	}
				143
				144
				145	/*
				146	* Add closing/opening paragraph tags before/after "block" elements
				147	*
				148	* Since block elements (like <blockquotes>, <pre>, etc.) do not get
				149	* wrapped in paragraph tags we will add a closing </p> tag just before
				150	* each block element starts and an opening <p> tag right after the block element
				151	* ends. Later on we'll do some further clean up.
				152	*
				153	*/
Derek Jones	8b25191	2008-01-17 00:34:37 +0000	[diff] [blame]	154	$str = preg_replace("#(<)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
				155	$str = preg_replace("#(</)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
Derek Allard	d2df9bc	2007-04-15 17:41:17 +0000	[diff] [blame]	156
				157	/*
				158	* Convert "ignore" tags to temporary marker
				159	*
				160	* The parser splits out the string at every tag
				161	* it encounters. Certain inline tags, like image
				162	* tags, links, span tags, etc. will be adversely
				163	* affected if they are split out so we'll convert
				164	* the opening < temporarily to: {@TAG}
				165	*
				166	*/
				167	$str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
				168
				169	/*
				170	* Split the string at every tag
				171	*
				172	* This creates an array with this prototype:
				173	*
				174	* [array]
				175	* {
				176	* [0] = <opening tag>
				177	* [1] = Content contained between the tags
				178	* [2] = <closing tag>
				179	* Etc...
				180	* }
				181	*
				182	*/
				183	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				184
				185	/*
				186	* Build our finalized string
				187	*
				188	* We'll cycle through the array, skipping tags,
				189	* and processing the contained text
				190	*
				191	*/
				192	$str = '';
				193	$process = TRUE;
				194	foreach ($chunks as $chunk)
				195	{
				196	/*
				197	* Are we dealing with a tag?
				198	*
				199	* If so, we'll skip the processing for this cycle.
				200	* Well also set the "process" flag which allows us
				201	* to skip <pre> tags and a few other things.
				202	*
				203	*/
				204	if (preg_match("#<(/)(".$this->block_elements.").?\>#", $chunk, $match))
				205	{
				206	if (preg_match("#".$this->skip_elements."#", $match['2']))
				207	{
				208	$process = ($match['1'] == '/') ? TRUE : FALSE;
				209	}
				210
				211	$str .= $chunk;
				212	continue;
				213	}
				214
				215	if ($process == FALSE)
				216	{
				217	$str .= $chunk;
				218	continue;
				219	}
				220
				221	// Convert Newlines into <p> and <br /> tags
				222	$str .= $this->format_newlines($chunk);
				223	}
				224
				225	// FINAL CLEAN UP
				226	// IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
				227
				228	/*
				229	* Clean up paragraph tags before/after "block" elements
				230	*
				231	* Earlier we added <p></p> tags before/after block level elements.
				232	* Then, we added paragraph tags around double line breaks. This
				233	* potentially created incorrectly formatted paragraphs so we'll
				234	* clean it up here.
				235	*
				236	*/
				237	$str = preg_replace("#<p>({@TAG}.?)(".$this->block_elements.")(.?>)#", "\\1\\2\\3", $str);
				238	$str = preg_replace("#({@TAG}/.?)(".$this->block_elements.")(.?>)</p>#", "\\1\\2\\3", $str);
				239
				240	// Convert Quotes and other characters
				241	$str = $this->format_characters($str);
				242
				243	// Fix an artifact that happens during the paragraph replacement
				244	$str = preg_replace('#(<p>\n*</p>)#', '', $str);
				245
				246	// If the user submitted their own paragraph tags with class data
				247	// in them we will retain them instead of using our tags.
				248	$str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
				249
				250	// Final clean up
				251	$str = str_replace(
				252	array(
				253	'</p></p>',
				254	'</p><p>',
				255	'<p> ',
				256	' </p>',
				257	'{@TAG}',
				258	'{@DQ}',
				259	'{@SQ}',
				260	'<p></p>'
				261	),
				262	array(
				263	'</p>',
				264	'<p>',
				265	'<p>',
				266	'</p>',
				267	'<',
				268	'"',
				269	"'",
				270	''
				271	),
				272	$str
				273	);
				274
				275	return $str;
				276	}
				277
				278	// --------------------------------------------------------------------
				279
				280	/**
				281	* Format Characters
				282	*
				283	* This function mainly converts double and single quotes
				284	* to entities, but since these are directional, it does
				285	* it based on some rules. It also converts em-dashes
				286	* and a couple other things.
				287	*/
				288	function format_characters($str)
				289	{
				290	$table = array(
				291	' "' => " “",
				292	'" ' => "” ",
				293	" '" => " ‘",
				294	"' " => "’ ",
				295
				296	'>"' => ">“",
				297	'"<' => "”<",
				298	">'" => ">‘",
				299	"'<" => "’<",
				300
				301	"\"." => "”.",
				302	"\"," => "”,",
				303	"\";" => "”;",
				304	"\":" => "”:",
				305	"\"!" => "”!",
				306	"\"?" => "”?",
				307
				308	". " => ".  ",
				309	"? " => "?  ",
				310	"! " => "!  ",
				311	": " => ":  ",
				312	);
				313
				314	// These deal with quotes within quotes, like: "'hi here'"
				315	$start = 0;
				316	$space = array("\n", "\t", " ");
				317
				318	while(TRUE)
				319	{
				320	$current = strpos(substr($str, $start), "\"'");
				321
				322	if ($current === FALSE) break;
				323
				324	$one_before = substr($str, $start+$current-1, 1);
				325	$one_after = substr($str, $start+$current+2, 1);
				326
				327	if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
				328	{
				329	$str = str_replace( $one_before."\"'".$one_after,
				330	$one_before."“‘".$one_after,
				331	$str);
				332	}
				333	elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
				334	{
				335	$str = str_replace( $one_before."\"'".$one_after,
				336	$one_before."”’".$one_after,
				337	$str);
				338	}
				339
				340	$start = $start+$current+2;
				341	}
				342
				343	$start = 0;
				344
				345	while(TRUE)
				346	{
				347	$current = strpos(substr($str, $start), "'\"");
				348
				349	if ($current === FALSE) break;
				350
				351	$one_before = substr($str, $start+$current-1, 1);
				352	$one_after = substr($str, $start+$current+2, 1);
				353
				354	if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
				355	{
				356	$str = str_replace( $one_before."'\"".$one_after,
				357	$one_before."‘“".$one_after,
				358	$str);
				359	}
				360	elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
				361	{
				362	$str = str_replace( $one_before."'\"".$one_after,
				363	$one_before."’”".$one_after,
				364	$str);
				365	}
				366
				367	$start = $start+$current+2;
				368	}
				369
				370	// Are there quotes within a word, as in: ("something")
				371	if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
				372	{
				373	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				374	{
				375	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
				376	{
				377	$str = str_replace( $matches['0'][$i],
				378	$matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i],
				379	$str);
				380	}
				381	}
				382	}
				383
				384	if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
				385	{
				386	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				387	{
				388	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
				389	{
				390	$str = str_replace( $matches['0'][$i],
				391	$matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i],
				392	$str);
				393	}
				394	}
				395	}
				396
				397	// How about one apostrophe, as in Rick's
				398	$start = 0;
				399
				400	while(TRUE)
				401	{
				402	$current = strpos(substr($str, $start), "'");
				403
				404	if ($current === FALSE) break;
				405
				406	$one_before = substr($str, $start+$current-1, 1);
				407	$one_after = substr($str, $start+$current+1, 1);
				408
				409	if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
				410	{
				411	$str = str_replace( $one_before."'".$one_after,
				412	$one_before."’".$one_after,
				413	$str);
				414	}
				415
				416	$start = $start+$current+2;
				417	}
				418
				419	// Em-dashes
				420	$start = 0;
				421	while(TRUE)
				422	{
				423	$current = strpos(substr($str, $start), "--");
				424
				425	if ($current === FALSE) break;
				426
				427	$one_before = substr($str, $start+$current-1, 1);
				428	$one_after = substr($str, $start+$current+2, 1);
				429	$two_before = substr($str, $start+$current-2, 1);
				430	$two_after = substr($str, $start+$current+3, 1);
				431
				432	if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
				433	OR
				434	( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
				435	)
				436	{
				437	$str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
				438	$two_before.trim($one_before)."—".trim($one_after).$two_after,
				439	$str);
				440	}
				441
				442	$start = $start+$current+2;
				443	}
				444
				445	// Ellipsis
				446	$str = preg_replace("#(\w)\.\.\.(\s\|<br />\|</p>)#", "\\1…\\2", $str);
				447	$str = preg_replace("#(\s\|<br />\|</p>)\.\.\.(\w)#", "\\1…\\2", $str);
				448
				449	// Run the translation array we defined above
				450	$str = str_replace(array_keys($table), array_values($table), $str);
				451
				452	// If there are any stray double quotes we'll catch them here
				453
				454	$start = 0;
				455
				456	while(TRUE)
				457	{
				458	$current = strpos(substr($str, $start), '"');
				459
				460	if ($current === FALSE) break;
				461
				462	$one_before = substr($str, $start+$current-1, 1);
				463	$one_after = substr($str, $start+$current+1, 1);
				464
				465	if ( ! in_array($one_after, $space, TRUE))
				466	{
				467	$str = str_replace( $one_before.'"'.$one_after,
				468	$one_before."“".$one_after,
				469	$str);
				470	}
				471	elseif( ! in_array($one_before, $space, TRUE))
				472	{
				473	$str = str_replace( $one_before."'".$one_after,
				474	$one_before."”".$one_after,
				475	$str);
				476	}
				477
				478	$start = $start+$current+2;
				479	}
				480
				481	$start = 0;
				482
				483	while(TRUE)
				484	{
				485	$current = strpos(substr($str, $start), "'");
				486
				487	if ($current === FALSE) break;
				488
				489	$one_before = substr($str, $start+$current-1, 1);
				490	$one_after = substr($str, $start+$current+1, 1);
				491
				492	if ( ! in_array($one_after, $space, TRUE))
				493	{
				494	$str = str_replace( $one_before."'".$one_after,
				495	$one_before."‘".$one_after,
				496	$str);
				497	}
				498	elseif( ! in_array($one_before, $space, TRUE))
				499	{
				500	$str = str_replace( $one_before."'".$one_after,
				501	$one_before."’".$one_after,
				502	$str);
				503	}
				504
				505	$start = $start+$current+2;
				506	}
				507
				508	return $str;
				509	}
				510
				511	// --------------------------------------------------------------------
				512
				513	/**
				514	* Format Newlines
				515	*
				516	* Converts newline characters into either <p> tags or <br />
				517	*
				518	*/
				519	function format_newlines($str)
				520	{
				521	if ($str == '')
				522	{
				523	return $str;
				524	}
				525
				526	if (strpos($str, "\n") === FALSE)
				527	{
				528	return '<p>'.$str.'</p>';
				529	}
				530
				531	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				532	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				533
				534	return '<p>'.$str.'</p>';
				535	}
				536	}
				537
				538
admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	539	?>