Blame - system/libraries/Typography.php - code-igniter-v3-giggi

blob: 83f8973d07debc9a3060e1a19e4a05f5ab8860b8 [file] [log] [blame]

Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame^]	1	<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* CodeIgniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
				8	* @author ExpressionEngine Dev Team
				9	* @copyright Copyright (c) 2006, EllisLab, Inc.
				10	* @license http://codeigniter.com/user_guide/license.html
				11	* @link http://codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* Typography Class
				20	*
				21	*
				22	* @access private
				23	* @category Helpers
				24	* @author ExpressionEngine Dev Team
				25	* @link http://codeigniter.com/user_guide/helpers/
				26	*/
				27	class CI_Typography {
				28
				29	// Block level elements that should not be wrapped inside <p> tags
				30	var $block_elements = 'div\|blockquote\|pre\|code\|h\d\|script\|ol\|ul';
				31
				32	// Elements that should not have <p> and <br /> tags within them.
				33	var $skip_elements = 'pre\|ol\|ul';
				34
				35	// Tags we want the parser to completely ignore when splitting the string.
				36	var $ignore_elements = 'a\|b\|i\|em\|strong\|span\|img\|li';
				37
				38
				39	/**
				40	* Main Processing Function
				41	*
				42	*/
				43	function convert($str)
				44	{
				45	if ($str == '')
				46	{
				47	return '';
				48	}
				49
				50	$str = ' '.$str.' ';
				51
				52	// Standardize Newlines to make matching easier
				53	if (strpos($str, "\r") !== FALSE)
				54	{
				55	$str = str_replace(array("\r\n", "\r"), "\n", $str);
				56	}
				57
				58	/*
				59	* Reduce line breaks
				60	*
				61	* If there are more than two consecutive line
				62	* breaks we'll compress them down to a maximum
				63	* of two since there's no benefit to more.
				64	*
				65	*/
				66	$str = preg_replace("/\n\n+/", "\n\n", $str);
				67
				68	/*
				69	* Convert quotes within tags to temporary marker
				70	*
				71	* We don't want quotes converted within
				72	* tags so we'll temporarily convert them to
				73	* {@DQ} and {@SQ}
				74	*
				75	*/
				76	if (preg_match_all("#\<.+?>#si", $str, $matches))
				77	{
				78	for ($i = 0; $i < count($matches['0']); $i++)
				79	{
				80	$str = str_replace($matches['0'][$i],
				81	str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
				82	$str);
				83	}
				84	}
				85
				86
				87	/*
				88	* Add closing/opening paragraph tags before/after "block" elements
				89	*
				90	* Since block elements (like <blockquotes>, <pre>, etc.) do not get
				91	* wrapped in paragraph tags we will add a closing </p> tag just before
				92	* each block element starts and an opening <p> tag right after the block element
				93	* ends. Later on we'll do some further clean up.
				94	*
				95	*/
				96	$str = preg_replace("#(<)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
				97	$str = preg_replace("#(</)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
				98
				99	/*
				100	* Convert "ignore" tags to temporary marker
				101	*
				102	* The parser splits out the string at every tag
				103	* it encounters. Certain inline tags, like image
				104	* tags, links, span tags, etc. will be adversely
				105	* affected if they are split out so we'll convert
				106	* the opening < temporarily to: {@TAG}
				107	*
				108	*/
				109	$str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
				110
				111	/*
				112	* Split the string at every tag
				113	*
				114	* This creates an array with this prototype:
				115	*
				116	* [array]
				117	* {
				118	* [0] = <opening tag>
				119	* [1] = Content contained between the tags
				120	* [2] = <closing tag>
				121	* Etc...
				122	* }
				123	*
				124	*/
				125	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				126
				127	/*
				128	* Build our finalized string
				129	*
				130	* We'll cycle through the array, skipping tags,
				131	* and processing the contained text
				132	*
				133	*/
				134	$str = '';
				135	$process = TRUE;
				136	foreach ($chunks as $chunk)
				137	{
				138	/*
				139	* Are we dealing with a tag?
				140	*
				141	* If so, we'll skip the processing for this cycle.
				142	* Well also set the "process" flag which allows us
				143	* to skip <pre> tags and a few other things.
				144	*
				145	*/
				146	if (preg_match("#<(/)(".$this->block_elements.").?\>#", $chunk, $match))
				147	{
				148	if (preg_match("#".$this->skip_elements."#", $match['2']))
				149	{
				150	$process = ($match['1'] == '/') ? TRUE : FALSE;
				151	}
				152
				153	$str .= $chunk;
				154	continue;
				155	}
				156
				157	if ($process == FALSE)
				158	{
				159	$str .= $chunk;
				160	continue;
				161	}
				162
				163	// Convert Newlines into <p> and <br /> tags
				164	$str .= $this->format_newlines($chunk);
				165	}
				166
				167	// FINAL CLEAN UP
				168	// IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
				169
				170	/*
				171	* Clean up paragraph tags before/after "block" elements
				172	*
				173	* Earlier we added <p></p> tags before/after block level elements.
				174	* Then, we added paragraph tags around double line breaks. This
				175	* potentially created incorrectly formatted paragraphs so we'll
				176	* clean it up here.
				177	*
				178	*/
				179	$str = preg_replace("#<p>({@TAG}.?)(".$this->block_elements.")(.?>)#", "\\1\\2\\3", $str);
				180	$str = preg_replace("#({@TAG}/.?)(".$this->block_elements.")(.?>)</p>#", "\\1\\2\\3", $str);
				181
				182	// Convert Quotes and other characters
				183	$str = $this->format_characters($str);
				184
				185	// Fix an artifact that happens during the paragraph replacement
				186	$str = preg_replace('#(<p>\n*</p>)#', '', $str);
				187
				188	// If the user submitted their own paragraph tags with class data
				189	// in them we will retain them instead of using our tags.
				190	$str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
				191
				192	// Final clean up
				193	$str = str_replace(
				194	array(
				195	'</p></p>',
				196	'</p><p>',
				197	'<p> ',
				198	' </p>',
				199	'{@TAG}',
				200	'{@DQ}',
				201	'{@SQ}',
				202	'<p></p>'
				203	),
				204	array(
				205	'</p>',
				206	'<p>',
				207	'<p>',
				208	'</p>',
				209	'<',
				210	'"',
				211	"'",
				212	''
				213	),
				214	$str
				215	);
				216
				217	return $str;
				218	}
				219
				220	// --------------------------------------------------------------------
				221
				222	/**
				223	* Format Characters
				224	*
				225	* This function mainly converts double and single quotes
				226	* to entities, but since these are directional, it does
				227	* it based on some rules. It also converts em-dashes
				228	* and a couple other things.
				229	*/
				230	function format_characters($str)
				231	{
				232	$table = array(
				233	' "' => " “",
				234	'" ' => "” ",
				235	" '" => " ‘",
				236	"' " => "’ ",
				237
				238	'>"' => ">“",
				239	'"<' => "”<",
				240	">'" => ">‘",
				241	"'<" => "’<",
				242
				243	"\"." => "”.",
				244	"\"," => "”,",
				245	"\";" => "”;",
				246	"\":" => "”:",
				247	"\"!" => "”!",
				248	"\"?" => "”?",
				249
				250	". " => ".  ",
				251	"? " => "?  ",
				252	"! " => "!  ",
				253	": " => ":  ",
				254	);
				255
				256	// These deal with quotes within quotes, like: "'hi here'"
				257	$start = 0;
				258	$space = array("\n", "\t", " ");
				259
				260	while(TRUE)
				261	{
				262	$current = strpos(substr($str, $start), "\"'");
				263
				264	if ($current === FALSE) break;
				265
				266	$one_before = substr($str, $start+$current-1, 1);
				267	$one_after = substr($str, $start+$current+2, 1);
				268
				269	if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
				270	{
				271	$str = str_replace( $one_before."\"'".$one_after,
				272	$one_before."“‘".$one_after,
				273	$str);
				274	}
				275	elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
				276	{
				277	$str = str_replace( $one_before."\"'".$one_after,
				278	$one_before."”’".$one_after,
				279	$str);
				280	}
				281
				282	$start = $start+$current+2;
				283	}
				284
				285	$start = 0;
				286
				287	while(TRUE)
				288	{
				289	$current = strpos(substr($str, $start), "'\"");
				290
				291	if ($current === FALSE) break;
				292
				293	$one_before = substr($str, $start+$current-1, 1);
				294	$one_after = substr($str, $start+$current+2, 1);
				295
				296	if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
				297	{
				298	$str = str_replace( $one_before."'\"".$one_after,
				299	$one_before."‘“".$one_after,
				300	$str);
				301	}
				302	elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
				303	{
				304	$str = str_replace( $one_before."'\"".$one_after,
				305	$one_before."’”".$one_after,
				306	$str);
				307	}
				308
				309	$start = $start+$current+2;
				310	}
				311
				312	// Are there quotes within a word, as in: ("something")
				313	if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
				314	{
				315	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				316	{
				317	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
				318	{
				319	$str = str_replace( $matches['0'][$i],
				320	$matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i],
				321	$str);
				322	}
				323	}
				324	}
				325
				326	if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
				327	{
				328	for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
				329	{
				330	if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
				331	{
				332	$str = str_replace( $matches['0'][$i],
				333	$matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i],
				334	$str);
				335	}
				336	}
				337	}
				338
				339	// How about one apostrophe, as in Rick's
				340	$start = 0;
				341
				342	while(TRUE)
				343	{
				344	$current = strpos(substr($str, $start), "'");
				345
				346	if ($current === FALSE) break;
				347
				348	$one_before = substr($str, $start+$current-1, 1);
				349	$one_after = substr($str, $start+$current+1, 1);
				350
				351	if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
				352	{
				353	$str = str_replace( $one_before."'".$one_after,
				354	$one_before."’".$one_after,
				355	$str);
				356	}
				357
				358	$start = $start+$current+2;
				359	}
				360
				361	// Em-dashes
				362	$start = 0;
				363	while(TRUE)
				364	{
				365	$current = strpos(substr($str, $start), "--");
				366
				367	if ($current === FALSE) break;
				368
				369	$one_before = substr($str, $start+$current-1, 1);
				370	$one_after = substr($str, $start+$current+2, 1);
				371	$two_before = substr($str, $start+$current-2, 1);
				372	$two_after = substr($str, $start+$current+3, 1);
				373
				374	if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
				375	OR
				376	( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
				377	)
				378	{
				379	$str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
				380	$two_before.trim($one_before)."—".trim($one_after).$two_after,
				381	$str);
				382	}
				383
				384	$start = $start+$current+2;
				385	}
				386
				387	// Ellipsis
				388	$str = preg_replace("#(\w)\.\.\.(\s\|<br />\|</p>)#", "\\1…\\2", $str);
				389	$str = preg_replace("#(\s\|<br />\|</p>)\.\.\.(\w)#", "\\1…\\2", $str);
				390
				391	// Run the translation array we defined above
				392	$str = str_replace(array_keys($table), array_values($table), $str);
				393
				394	// If there are any stray double quotes we'll catch them here
				395
				396	$start = 0;
				397
				398	while(TRUE)
				399	{
				400	$current = strpos(substr($str, $start), '"');
				401
				402	if ($current === FALSE) break;
				403
				404	$one_before = substr($str, $start+$current-1, 1);
				405	$one_after = substr($str, $start+$current+1, 1);
				406
				407	if ( ! in_array($one_after, $space, TRUE))
				408	{
				409	$str = str_replace( $one_before.'"'.$one_after,
				410	$one_before."“".$one_after,
				411	$str);
				412	}
				413	elseif( ! in_array($one_before, $space, TRUE))
				414	{
				415	$str = str_replace( $one_before."'".$one_after,
				416	$one_before."”".$one_after,
				417	$str);
				418	}
				419
				420	$start = $start+$current+2;
				421	}
				422
				423	$start = 0;
				424
				425	while(TRUE)
				426	{
				427	$current = strpos(substr($str, $start), "'");
				428
				429	if ($current === FALSE) break;
				430
				431	$one_before = substr($str, $start+$current-1, 1);
				432	$one_after = substr($str, $start+$current+1, 1);
				433
				434	if ( ! in_array($one_after, $space, TRUE))
				435	{
				436	$str = str_replace( $one_before."'".$one_after,
				437	$one_before."‘".$one_after,
				438	$str);
				439	}
				440	elseif( ! in_array($one_before, $space, TRUE))
				441	{
				442	$str = str_replace( $one_before."'".$one_after,
				443	$one_before."’".$one_after,
				444	$str);
				445	}
				446
				447	$start = $start+$current+2;
				448	}
				449
				450	return $str;
				451	}
				452
				453	// --------------------------------------------------------------------
				454
				455	/**
				456	* Format Newlines
				457	*
				458	* Converts newline characters into either <p> tags or <br />
				459	*
				460	*/
				461	function format_newlines($str)
				462	{
				463	if ($str == '')
				464	{
				465	return $str;
				466	}
				467
				468	if (strpos($str, "\n") === FALSE)
				469	{
				470	return '<p>'.$str.'</p>';
				471	}
				472
				473	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				474	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				475
				476	return '<p>'.$str.'</p>';
				477	}
				478	}
				479	// END Typography Class
				480
				481	/* End of file Typography.php */
				482	/* Location: ./system/libraries/Typography.php */