PHP – Class String Similarities Finder

The PHP class compares unlimited number of strings and returns similarities (with the highest word
count) if any was found.
Similarities = substrings that appear in every string.

It can take an arbitrary number of strings and returns the similiarities between them considering
the number of common substrings.

Code Example: Download Example
index.php
<?php
require_once "similarity.class.php";

$check	= new Similarity;

$array	= array("one two three four five six",
				"two three four five six one seven",
				"three four five two one six");

$test	= $check -> add($array); // Add the strings

$test	= $check -> compare(); // Compare the strings

echo $check -> html();  // To ouput

?>
similarity.class.php
<?php
class Similarity{

private $newarray				= array();	//the clean strings will be put here
private $err_messages			= "";		//error messages will be put here
private $errors					= array("0");
private $founded_similarities	= array();	//the similarities will be put here

	private function show_error($err, $show=TRUE, $echo=FALSE){
	/*
	== this function outputs error messages
	*/

		$errors = array("0" => "You need to supply an array.",
						"1" => "Less than two valid strings found in the array!",
						"2" => "No similarities were found!",
						"3" => "Add strings first! Use add(somearray) somearray = array(string1, string2, ...)",
						"4" => "The string is NULL. No similarities were found!",

						);

		if (array_key_exists($err,$errors)){
			$show = $errors[$err];
			}else{
			$show = "Unknown error."; //Unknown error.
		}

		$err = '<div style = "color: #000000;font-weight:bold; background-color: #EBEBEB;font-family:Arial; font-size:10px; border: 4px solid;margin: 10px 0px; padding:10px;"><u>Similarity Class</u>: '.$show."</div>\n";

		if($echo){return $err;}
		if($show){$this -> err_messages .= $err;}
	}

	public function add($array=""){

		if(is_array($array) && !empty($array)){

				foreach($array as $string){
					//check if the elements from the array are valid
					$newstring  = strtolower(trim($string));
					$newstring	= preg_replace("/ {2,}/", " ", $newstring);

					if($newstring !=""){ $new_array[] = $newstring; } //creating a new and clean array
				}

				$counted	= count($new_array);

				if($counted > 1){
						foreach($new_array as $key => $attr){
						//we do this to make sure the keys are consecutives starting with 0
							$this -> newarray[] = $attr;
						}

					} else {
					//less than 2 elements in the array. we can't compare.
						$this -> show_error("1", TRUE) ;
						$this -> errors[] = "1";
				}

			} else {
				$this -> show_error("0", TRUE) ;
				$this -> errors[] = "1";
		}

	}

	public function compare(){

	if(!in_array("1", $this -> errors) && !empty($this -> newarray)){
		//explode the 1st string in the array
			$similarity	= array();
			$check		= array();
			$array		= $this -> newarray;
			$str1		= explode(" ", $array["0"]);
			$count_str1	= count($str1);
			unset($array["0"]);

			for($i=1;$i<=$count_str1;$i++){
			 //$i = number of consecutive words (from the 1st string) we will search ... 1 TO (wordcount)

				foreach ($array as $stringX){
				//check every string added

					for($elem=0;$elem<$count_str1;$elem++){
					//we begin the "search string" with word number (elem + 1)
					   $words			= 0;
					   $search_string	= "";

					   if(($elem + $i) <= $count_str1){

							 while($words < $i){
							 //we add $i words to the search
								$key			= $elem + $words;
								$search_string .= $str1[$key]." "; //this will be the searched string
								$words			= $words + 1;

							 }

							$search_string	= trim($search_string);
							$pos			= stristr($stringX, $search_string);
							//search if these words (search_string) exist in the other string
							//if they do, add them to the list of similarities
							if($pos){
									$similarity[$i][$elem] = $search_string;
									$check[$i][$elem][] = "0";

								} else {

									$check[$i][$elem][] = "1"; //the string we've searched not present in every string

							}
					   }

					}
				}

					if(is_array($check[$i])){
							foreach($check[$i] as $key => $element){
								if(in_array("1", $element) && isset($similarity[$i][$key])){
								//if the string we've searched was not present in every string we remove it from the array
									unset ($similarity[$i][$key]);
								}
							}
					}
			}

			//clear empty arrays
			$similarity = array_filter($similarity);

			//return only the strings with the highest word count
			$similar = end($similarity);
			//remove duplicates
			if(empty($similar)){
					$this -> show_error("2", TRUE) ;
				} else {
					$similar = array_unique($similar);
					$this -> founded_similarities = $similar;
			}

		} else {
			return $this -> err_messages ;
	}

	}

	public function get_string($delimiter="|"){
	//returns the similarities as a string. you can also set a delimiter useful if there are multiple similarities
		if(!empty($this -> newarray)){
				if(!empty($this -> founded_similarities)){
						return implode($delimiter, $this -> founded_similarities);
					} else {

						return $this -> err_messages ;
				}
			} else {
			$this -> show_error("3", TRUE) ;
			return $this -> err_messages ;
		}

	}

	public function get_array(){

		if(!empty($this -> newarray)){
				if(!empty($this -> founded_similarities)){
						return $this -> founded_similarities;
					} else {
						//no similarities found
						return FALSE;
				}
			} else {
				//not enough strings
				$this -> show_error("3", TRUE) ;
				return $this -> err_messages ;
		}
	}

	public function html(){
		if(count($this -> newarray) >1){
				$html ="<h2>The strings:</h2>\n<ul>\n\t<li>";
				$html.=implode("</li>\n\t<li>", $this ->newarray);
				$html.="</li>\n</ul>\n";
				$html.="<h2>The similarities:</h2>\n";
				if(!empty($this -> founded_similarities)){
						$html.="<ul>\n\t<li>";
						$html.=implode("</li>\n\t<li>", $this -> founded_similarities);
						$html.="</li>\n</ul>\n";

					} else {
						//no similarities found
						$html.= $this -> err_messages;
				}

			} else {
			//not enough strings
			$html = $this -> err_messages;
		}
		return $html;
	}

}

?>

Post to Twitter Post to Digg Post to Facebook Post to Google Buzz Send Gmail

Leave a Comment

Your email address will not be published. Required fields are marked *