Blame - system/helpers/typography_helper.php - code-igniter-v3-giggi

blob: 6a5495239efbd0b4c18f7691f6f9144caa062674 [file] [log] [blame]

admin	b0dd10f	2006-08-25 17:25:49 +0000	[diff] [blame]	1	<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* Code Igniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
				8	* @author Rick Ellis
				9	* @copyright Copyright (c) 2006, pMachine, Inc.
				10	* @license http://www.codeignitor.com/user_guide/license.html
				11	* @link http://www.codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* Code Igniter Typography Helpers
				20	*
				21	* @package CodeIgniter
				22	* @subpackage Helpers
				23	* @category Helpers
				24	* @author Rick Ellis
				25	* @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
				26	*/
				27
				28	// ------------------------------------------------------------------------
				29
				30	/**
				31	* Convert newlines to HTML line breaks except within PRE tags
				32	*
				33	* @access public
				34	* @param string
				35	* @return string
				36	*/
				37	function nl2br_except_pre($str)
				38	{
				39	$ex = explode("pre>",$str);
				40	$ct = count($ex);
				41
				42	$newstr = "";
				43	for ($i = 0; $i < $ct; $i++)
				44	{
				45	if (($i % 2) == 0)
				46	{
				47	$newstr .= nl2br($ex[$i]);
				48	}
				49	else
				50	{
				51	$newstr .= $ex[$i];
				52	}
				53
				54	if ($ct - 1 != $i)
				55	$newstr .= "pre>";
				56	}
				57
				58	return $newstr;
				59	}
				60
				61	// ------------------------------------------------------------------------
				62
				63	/**
				64	* Auto Typography Wrapper Function
				65	*
				66	*
				67	* @access public
				68	* @parm string
				69	* @return string
				70	*/
				71	function auto_typography($str)
				72	{
				73	$TYPE = new Auto_typography();
				74	return $TYPE->convert($str);
				75	}
				76
				77	// ------------------------------------------------------------------------
				78
				79	/**
				80	* Auto Typography Class
				81	*
				82	*
				83	* @access private
				84	* @category Helpers
				85	* @author Rick Ellis
				86	* @author Paul Burdick
				87	* @link http://www.codeigniter.com/user_guide/helpers/
				88	*/
				89	class Auto_typography {
				90
				91	// Block level elements that should not be wrapped inside <p> tags
				92	var $block_elements = 'div\|blockquote\|pre\|code\|h\d\|script\|ol\|un';
				93
				94	// Elements that should not have <p> and <br /> tags within them.
				95	var $skip_elements = 'pre\|ol\|ul';
				96
				97	// Tags we want the parser to completely ignore when splitting the string.
				98	var $ignore_elements = 'a\|b\|i\|em\|strong\|span\|img\|li';
				99
				100
				101	/**
				102	* Main Processing Function
				103	*
				104	*/
				105	function convert($str)
				106	{
				107	if ($str == '')
				108	{
				109	return '';
				110	}
				111
				112	$str = ' '.$str.' ';
				113
				114	// Standardize Newlines to make matching easier
				115	$str = preg_replace("/(\r\n\|\r)/", "\n", $str);
				116
				117	/*
				118	* Reduce line breaks
				119	*
				120	* If there are more than two consecutive line
				121	* breaks we'll compress them down to a maximum
				122	* of two since there's no benefit to more.
				123	*
				124	*/
				125	$str = preg_replace("/\n\n+/", "\n\n", $str);
				126
				127	/*
				128	* Convert quotes within tags to tempoarary marker
				129	*
				130	* We don't want quotes converted within
				131	* tags so we'll temporarily convert them to
				132	* {{{DQ}}} and {{{SQ}}}
				133	*
				134	*/
				135	if (preg_match_all("#\<.+?>#si", $str, $matches))
				136	{
				137	for ($i = 0; $i < count($matches['0']); $i++)
				138	{
				139	$str = str_replace($matches['0'][$i],
				140	str_replace(array("'",'"'), array('{{{SQ}}}', '{{{DQ}}}'), $matches['0'][$i]),
				141	$str);
				142	}
				143	}
				144
				145	/*
				146	* Convert "ignore" tags to tempoarary marker
				147	*
				148	* The parser splits out the string at every tag
				149	* it encounters. Certain inline tags, like image
				150	* tags, links, span tags, etc. will be adversely
				151	* affected if they are split out so we'll convert
				152	* the opening < temporarily to: {{{tag}}}
				153	*
				154	*/
				155	$str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{{{tag}}}\\1\\2", $str);
				156
				157	/*
				158	* Split the string at every tag
				159	*
				160	* This creates an array with this prototype:
				161	*
				162	* [array]
				163	* {
				164	* [0] = <opening tag>
				165	* [1] = Content contained between the tags
				166	* [2] = <closing tag>
				167	* Etc...
				168	* }
				169	*
				170	*/
				171	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				172
				173	/*
				174	* Build our finalized string
				175	*
				176	* We'll cycle through the array, skipping tags,
				177	* and processing the contained text
				178	*
				179	*/
				180	$str = '';
				181	$process = TRUE;
				182	foreach ($chunks as $chunk)
				183	{
				184	/*
				185	* Are we dealing with a tag?
				186	*
				187	* If so, we'll skip the processing for this cycle.
				188	* Well also set the "process" flag which allows us
				189	* to skip <pre> tags and a few other things.
				190	*
				191	*/
				192	if (preg_match("#<(/)(".$this->block_elements.").?\>#", $chunk, $match))
				193	{
				194	if (preg_match("#".$this->skip_elements."#", $match['2']))
				195	{
				196	$process = ($match['1'] == '/') ? TRUE : FALSE;
				197	}
				198
				199	$str .= $chunk;
				200	continue;
				201	}
				202
				203	if ($process == FALSE)
				204	{
				205	$str .= $chunk;
				206	continue;
				207	}
				208
				209	// Convert Newlines into <p> and <br /> tags
				210	$str .= $this->format_newlines($chunk);
				211	}
				212
				213	// Convert Quotes and other characters
				214	$str = $this->format_characters($str);
				215
				216	// We'll swap our temporary markers back and do some clean up.
				217	$str = preg_replace('#(<p>\n*</p>)#', '', $str);
				218	$str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
				219
				220	$str = str_replace(
				221	array('</p></p>', '</p><p>', '{{{tag}}}', '{{{DQ}}}', '{{{SQ}}}'),
				222	array('</p>', '<p>', '<', '"', "'"),
				223	$str
				224	);
				225
				226	return trim($str);
				227	}
				228
				229	// --------------------------------------------------------------------
				230
				231	/**
				232	* Format Characters
				233	*
				234	* This function mainly converts double and single quotes
				235	* to entities, but since these are directional, it does
				236	* it based on some rules. It also converts em-dashes
				237	* and a couple other things.
				238	*/
				239	function format_characters($str)
				240	{
				241	$table = array(
				242	' "' => " “",
				243	'" ' => "” ",
				244	" '" => " ‘",
				245	"' " => "’ ",
				246
				247	'>"' => ">“",
				248	'"<' => "”<",
				249	">'" => ">‘",
				250	"'<" => "’<",
				251
				252	"\"." => "”.",
				253	"\"," => "”,",
				254	"\";" => "”;",
				255	"\":" => "”:",
				256	"\"!" => "”!",
				257	"\"?" => "”?",
				258
				259	". " => ".  ",
				260	"? " => "?  ",
				261	"! " => "!  ",
				262	": " => ":  ",
				263	);
				264
				265	// These deal with quotes within quotes, like: "'hi here'"
				266	$start = 0;
				267	$space = array("\n", "\t", " ");
				268
				269	while(TRUE)
				270	{
				271	$current = strpos(substr($str, $start), "\"'");
				272
				273	if ($current === FALSE) break;
				274
				275	$one_before = substr($str, $start+$current-1, 1);
				276	$one_after = substr($str, $start+$current+2, 1);
				277
				278	if ( ! in_array($one_after, $space) && $one_after != "<")
				279	{
				280	$str = str_replace( $one_before."\"'".$one_after,
				281	$one_before."“‘".$one_after,
				282	$str);
				283	}
				284	elseif ( ! in_array($one_before, $space) && (in_array($one_after, $space) OR $one_after == '<'))
				285	{
				286	$str = str_replace( $one_before."\"'".$one_after,
				287	$one_before."”’".$one_after,
				288	$str);
				289	}
				290
				291	$start = $start+$current+2;
				292	}
				293
				294	$start = 0;
				295
				296	while(TRUE)
				297	{
				298	$current = strpos(substr($str, $start), "'\"");
				299
				300	if ($current === FALSE) break;
				301
				302	$one_before = substr($str, $start+$current-1, 1);
				303	$one_after = substr($str, $start+$current+2, 1);
				304
				305	if ( in_array($one_before, $space) && ! in_array($one_after, $space) && $one_after != "<")
				306	{
				307	$str = str_replace( $one_before."'\"".$one_after,
				308	$one_before."‘“".$one_after,
				309	$str);
				310	}
				311	elseif ( ! in_array($one_before, $space) && $one_before != ">")
				312	{
				313	$str = str_replace( $one_before."'\"".$one_after,
				314	$one_before."’”".$one_after,
				315	$str);
				316	}
				317
				318	$start = $start+$current+2;
				319	}
				320
				321	// Are there quotes within a word, as in: ("something")
				322	if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
				323	{
				324	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				325	{
				326	if ( ! in_array($matches['1'][$i], $space) && ! in_array($matches['3'][$i], $space))
				327	{
				328	$str = str_replace( $matches['0'][$i],
				329	$matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i],
				330	$str);
				331	}
				332	}
				333	}
				334
				335	if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
				336	{
				337	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				338	{
				339	if ( ! in_array($matches['1'][$i], $space) && ! in_array($matches['3'][$i], $space))
				340	{
				341	$str = str_replace( $matches['0'][$i],
				342	$matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i],
				343	$str);
				344	}
				345	}
				346	}
				347
				348	// How about one apostrophe, as in Rick's
				349	$start = 0;
				350
				351	while(TRUE)
				352	{
				353	$current = strpos(substr($str, $start), "'");
				354
				355	if ($current === FALSE) break;
				356
				357	$one_before = substr($str, $start+$current-1, 1);
				358	$one_after = substr($str, $start+$current+1, 1);
				359
				360	if ( ! in_array($one_before, $space) && ! in_array($one_after, $space))
				361	{
				362	$str = str_replace( $one_before."'".$one_after,
				363	$one_before."’".$one_after,
				364	$str);
				365	}
				366
				367	$start = $start+$current+2;
				368	}
				369
				370	// Em-dashes
				371	$start = 0;
				372	while(TRUE)
				373	{
				374	$current = strpos(substr($str, $start), "--");
				375
				376	if ($current === FALSE) break;
				377
				378	$one_before = substr($str, $start+$current-1, 1);
				379	$one_after = substr($str, $start+$current+2, 1);
				380	$two_before = substr($str, $start+$current-2, 1);
				381	$two_after = substr($str, $start+$current+3, 1);
				382
				383	if (( ! in_array($one_before, $space) && ! in_array($one_after, $space))
				384	OR
				385	( ! in_array($two_before, $space) && ! in_array($two_after, $space) && $one_before == ' ' && $one_after == ' ')
				386	)
				387	{
				388	$str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
				389	$two_before.trim($one_before)."—".trim($one_after).$two_after,
				390	$str);
				391	}
				392
				393	$start = $start+$current+2;
				394	}
				395
				396	// Ellipsis
				397	$str = preg_replace("#(\w)\.\.\.(\s\|<br />\|</p>)#", "\\1…\\2", $str);
				398	$str = preg_replace("#(\s\|<br />\|</p>)\.\.\.(\w)#", "\\1…\\2", $str);
				399
				400	// Run the translation array we defined above
				401	$str = str_replace(array_keys($table), array_values($table), $str);
				402
				403	// If there are any stray double quotes we'll catch them here
				404
				405	$start = 0;
				406
				407	while(TRUE)
				408	{
				409	$current = strpos(substr($str, $start), '"');
				410
				411	if ($current === FALSE) break;
				412
				413	$one_before = substr($str, $start+$current-1, 1);
				414	$one_after = substr($str, $start+$current+1, 1);
				415
				416	if ( ! in_array($one_after, $space))
				417	{
				418	$str = str_replace( $one_before.'"'.$one_after,
				419	$one_before."“".$one_after,
				420	$str);
				421	}
				422	elseif( ! in_array($one_before, $space))
				423	{
				424	$str = str_replace( $one_before."'".$one_after,
				425	$one_before."”".$one_after,
				426	$str);
				427	}
				428
				429	$start = $start+$current+2;
				430	}
				431
				432	$start = 0;
				433
				434	while(TRUE)
				435	{
				436	$current = strpos(substr($str, $start), "'");
				437
				438	if ($current === FALSE) break;
				439
				440	$one_before = substr($str, $start+$current-1, 1);
				441	$one_after = substr($str, $start+$current+1, 1);
				442
				443	if ( ! in_array($one_after, $space))
				444	{
				445	$str = str_replace( $one_before."'".$one_after,
				446	$one_before."‘".$one_after,
				447	$str);
				448	}
				449	elseif( ! in_array($one_before, $space))
				450	{
				451	$str = str_replace( $one_before."'".$one_after,
				452	$one_before."’".$one_after,
				453	$str);
				454	}
				455
				456	$start = $start+$current+2;
				457	}
				458
				459	return $str;
				460	}
				461
				462	// --------------------------------------------------------------------
				463
				464	/**
				465	* Format Newlines
				466	*
				467	* Converts newline characters into either <p> tags or <br />
				468	*
				469	*/
				470	function format_newlines($str)
				471	{
				472	if ($str == '')
				473	{
				474	return $str;
				475	}
				476
				477	if (strpos($str, "\n") === FALSE)
				478	{
				479	return '<p>'.$str.'</p>';
				480	}
				481
				482	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				483	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				484
				485	return '<p>'.$str.'</p>';
				486	}
				487	}
				488
				489
				490	?>