
 * This script was modified to fit in Koblog

 * Fuzz Class
 * kevinfiol\fuzzget
 * @category Class
 * @package  None
 * @author   Kevin Fiol <fiolkevin@gmail.com>
 * @license  https://opensource.org/licenses/MIT  MIT License
 * @link     http://github.com/kevinfiol

class Fuzz
    private $_source;
    private $_sourceLen;
    private $_maxResults;
    private $_searchMode;
    private $_useLCS;

     * Fuzz Object Constructor
     * Initialize private variables
     * @param array   $source     An array of associative arrays
     * @param int     $maxResults The maximum number of results to retrieve upon a search
     * @param int     $searchMode 0 = Levenshtein, 1 = Jaro-Winkler
     * @param boolean $useLCS     Factor in Longest Common Substring in search results
    public function __construct($source, $maxResults, $searchMode, $useLCS)
        $this->_source = $source;
        $this->_sourceLen = count($source);
        $this->_maxResults = max($maxResults, 1);
        $this->_useLCS = $useLCS;

        if ($searchMode < 0 || $searchMode > 1) {
            throw new \Exception('Invalid search mode');
        } else {
            $this->_searchMode = $searchMode;

     * Search Method
     * Initiate Search
     * @param string $search      Term to search for
     * @param int    $minLCS      (if using LCS) Specify the minimum longest common substring
     * @param int    $maxDistance (if using Levenshtein) Specify the maximum distance allowed
     * @return array $results     Array of associative arrays containing search matches
    public function search($search, $minLCS = null, $maxDistance = null)
        $results = [];
        $scores = [];

        // Nullify these parameters if they are irrelevant to searchMode
        if (!$this->_useLCS) $minLCS = null;
        if ($this->_searchMode != 0) $maxDistance = null;

        // Cycle through result pool
        //for ($i = 0; $i < $this->_sourceLen; $i++) {
	foreach ($this->_source as $pageKey => $data) {
            $allLev = [];
            $allJaros = [];
            $allLCSs = [];

            // Cycle through each object's properties
            foreach ($data as $key => $val) {
                if ($this->_searchMode == 0) {
                    $allLev[] = $this->getLevenshtein(strval($val), $search);
                } elseif ($this->_searchMode == 1) {
                    $allJaros[] = $this->getJaroWinkler(strval($val), $search);

                if ($this->_useLCS) {
                    $allLCSs[] = $this->getLCS(strval($val), $search);

            $lowestLev = $allLev ? min($allLev) : null;
            $highestJaro = $allJaros ? max($allJaros) : null;
            $highestLCS = $allLCSs ? max($allLCSs) : null;

            // Get Score
            if ($this->_searchMode == 0) {
                $score = $lowestLev;
            } else {
                $score = -1 * abs($highestJaro);

            if ($this->_useLCS) {
                $score -= $highestLCS;

            // Append Index of object + Best Score
            if (($maxDistance == null || $lowestLev <= $maxDistance)
                && ($minLCS == null || $highestLCS >= $minLCS)
            ) {
                $scores[$pageKey] = $score;

	// Sort by score
        return $scores;

     * Get Longest Common Substring
     * @param string $source Term to search for
     * @param string $target Target term to search against
     * @return int   $result LCS Score
    public function getLCS($source, $target)
        $suffix = [];
        $result = 0;
        $n = mb_strlen($source, CHARSET);
        $m = mb_strlen($target, CHARSET);

        for ($i = 0; $i <= $n; $i++) {
            for ($j = 0; $j <= $m; $j++) {
                if ($i === 0 || $j === 0) {
                    $suffix[$i][$j] = 0;
                } elseif ($source[$i - 1] == $target[$j - 1]) {
                    $suffix[$i][$j] = $suffix[$i - 1][$j - 1] + 1;
                    $result = max($result, $suffix[$i][$j]);
                } else {
                    $suffix[$i][$j] = 0;

        return $result;

     * Get Levenshtein Distance
     * @param string $source Term to search for
     * @param string $target Target term to search against
     * @return int   Levenshtein Distance
    public function getLevenshtein($source, $target)
        $matrix = [];
        $n = mb_strlen($source, CHARSET);
        $m = mb_strlen($target, CHARSET);

        if ($n === 0) {
            return $m;
        } elseif ($m === 0) {
            return $n;

        // Initialize First Row
        for ($i = 0; $i <= $n; $i++) {
            $matrix[0][$i] = $i;
        // Initialize First Column
        for ($i = 0; $i <= $m; $i++) {
            $matrix[$i][0] = $i;

        for ($i = 1; $i <= $n; $i++) {
            for ($j = 1; $j <= $m; $j++) {
                if ($source[$i - 1] === $target[$j - 1]) {
                    $cost = 0;
                } else {
                    $cost = 1;

                // Cell immediately above + 1
                $up = $matrix[$j - 1][$i] + 1;
                // Cell immediately to the left + 1
                $left = $matrix[$j][$i - 1] + 1;
                // Cell diagnolly above and to the left + cost
                $upleft = $matrix[$j - 1][$i - 1] + $cost;

                $matrix[$j][$i] = min($up, $left, $upleft);

        return $matrix[$m][$n];

     * Get Jaro-Winkler Score
     * @param string $first  String to match
     * @param string $second String to match
     * @return double $jaroWinkler Jaro-Winkler score between 0.0 and 1.0
    public function getJaroWinkler($first, $second)

        if (mb_strlen($first, CHARSET) > mb_strlen($second, CHARSET)) {
            $longer = mb_strtolower($first, CHARSET);
            $shorter = mb_strtolower($second, CHARSET);
        } else {
            $longer = mb_strtolower($second, CHARSET);
            $shorter = mb_strtolower($first, CHARSET);

        // Get half the length distance of shorter string
        $halfLen = intval((mb_strlen($shorter,CHARSET) / 2) + 1);

        $match1 = $this->_getCharMatch($shorter, $longer, $halfLen);
        $match2 = $this->_getCharMatch($longer, $shorter, $halfLen);

        if ((mb_strlen($match1, CHARSET) == 0 || mb_strlen($match2, CHARSET) == 0)
            || (mb_strlen($match1, CHARSET) != mb_strlen($match2, CHARSET))
        ) {
            return 0.0;

        $trans = $this->_getTranspositions($match1, $match2);

        $distance = (mb_strlen($match1, CHARSET) / mb_strlen($shorter, CHARSET)
            + mb_strlen($match2, CHARSET) / mb_strlen($longer, CHARSET)
            + (mb_strlen($match1, CHARSET) - $trans)
            / mb_strlen($match1, CHARSET)) / 3.0;

        // Apply Winkler Adjustment
        $prefixLen = min(mb_strlen($this->_getPrefix($first, $second),CHARSET), 4);
        $jaroWinkler = round(($distance + (0.1 * $prefixLen * (1.0 - $distance))) * 100.0) / 100.0;

        return $jaroWinkler;

     * Get Character Matches
     * @param string $first  String to match
     * @param string $second String to match
     * @param int    $limit  Limit of characters to match
     * @return string $common Common substring
    private function _getCharMatch($first, $second, $limit)
        $common = '';
        $copy = $second;
        $firstLen = mb_strlen($first, CHARSET);
        $secondLen = mb_strlen($second, CHARSET);

        for ($i = 0; $i < $firstLen; $i++) {
            $char = $first[$i];
            $found = false;

            for ($j = max(0, $i - $limit); !$found && $j < min($i + $limit, $secondLen); $j++) {
                if ($copy[$j] == $char) {
                    $found = true;
                    $common .= $char;
                    $copy[$j] = '*';

        return $common;

     * Get Transpositions
     * @param string $first  String to match
     * @param string $second String to match
     * @return int $trans Number of transpositions between strings
    private function _getTranspositions($first, $second)
        $trans = 0;
        $firstLen = mb_strlen($first, CHARSET);

        for ($i = 0; $i < $firstLen; $i++) {
            if ($first[$i] != $second[$i]) {
                $trans += 1;

        $trans /= 2;
        return $trans;

     * Get Prefix
     * @param string $first  String to match
     * @param string $second String to match
     * @return string Returns substring representing the longest prefix
    private function _getPrefix($first, $second)
        if (mb_strlen($first, CHARSET) == 0 || mb_strlen($second, CHARSET) == 0) {
            return '';

        $index = $this->_getDiffIndex($first, $second);
        if ($index == -1) {
            return $first;
        } elseif ($index == 0) {
            return '';
        } else {
            return mb_substr($first, 0, $index, CHARSET);

     * Get Difference Index
     * @param string $first  String to match
     * @param string $second String to match
     * @return Return index of first difference
    private function _getDiffIndex($first, $second)
        if ($first == $second) {
            return -1;

        $maxLen = min(mb_strlen($first, CHARSET), mb_strlen($second, CHARSET));
        for ($i = 0; $i < $maxLen; $i++) {
            if ($first[$i] != $second[$i]) {
                return $i;

        return $maxLen;

     * Print Matrix
     * Utility / Testing function for testing purposes
     * @param array $arr 2-dimensional array representing a matrix
     * @return void
    private function _printMatrix($arr)
        $str = '';
        $width = count($arr[0]);
        $height = count($arr);

        for ($i = 0; $i < $height; $i++) {
            for ($j = 0; $j < $width; $j++) {
                if (!isset($arr[$i][$j])) {
                    $arr[$i][$j] = ' ';

                $str = $str . "[{$arr[$i][$j]}]";

                if ($j === $width - 1) {
                    $str = $str . PHP_EOL;
