HEX
Server: Microsoft-IIS/8.5
System: Windows NT YDAWBH120 6.3 build 9600 (Windows Server 2012 R2 Standard Edition) AMD64
User: tentjecom_web (0)
PHP: 7.4.14
Disabled: NONE
Upload Files
File: D:/HostingSpaces/SBogers84/zuiderbos.nl/vendor/nqxcode/phpmorphy/src/phpMorphy/Morphier/Bulk.php
<?php
/*
* This file is part of phpMorphy project
*
* Copyright (c) 2007-2012 Kamaev Vladimir <heromantor@users.sourceforge.net>
*
*     This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
*     This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
*     You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/

// TODO: Fix this LSP violation!!! this class can`t implement phpMorphy_Morphier_MorphierInterface
class phpMorphy_Morphier_Bulk implements phpMorphy_Morphier_MorphierInterface {
    protected
        /** @var phpMorphy_Fsa_FsaInterface */
        $fsa,
        /** @var int */
        $root_trans,
        /** @var phpMorphy_Helper */
        $helper,
        /** @var array */
        $notfound = array(),
        /** @var phpMorphy_GramInfo_GramInfoInterface */
        $graminfo;

    /**
     * @param phpMorphy_Fsa_FsaInterface $fsa
     * @param phpMorphy_Helper $helper
     */
    function __construct(phpMorphy_Fsa_FsaInterface $fsa, phpMorphy_Helper $helper) {
        $this->fsa = $fsa;
        $this->root_trans = $fsa->getRootTrans();

        $this->helper = clone $helper;
        $this->helper->setAnnotDecoder($this->createAnnotDecoder($helper));

        $this->graminfo = $helper->getGramInfo();
    }

    /**
     * @return phpMorphy_Fsa_FsaInterface
     */
    function getFsa() {
        return $this->fsa;
    }

    /**
     * @return phpMorphy_Helper
     */
    function getHelper() {
        return $this->helper;
    }

    /**
     * @return phpMorphy_GramInfo_GramInfoInterface
     */
    function getGraminfo() {
        return $this->graminfo;
    }

    /**
     * @return array
     */
    function getNotFoundWords() {
        return $this->notfound;
    }

    /**
     * @param phpMorphy_Helper $helper
     * @return phpMorphy_AnnotDecoder_Common
     */
    protected function createAnnotDecoder(phpMorphy_Helper $helper) {
        return phpMorphy_AnnotDecoder_Factory::instance($helper->getEndOfString())->getCommonDecoder();
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getAnnot($words) {
        $result = array();

        foreach($this->findWord($words) as $annot => $found_words) {
            $annot = $this->helper->decodeAnnot($annot, true);

            foreach($found_words as $word) {
                $result[$word][] = $annot;
            }
        }

        return $result;
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getBaseForm($words) {
        $annots = $this->findWord($words);

        return $this->composeForms($annots, true, false, false);
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getAllForms($words) {
        $annots = $this->findWord($words);

        return $this->composeForms($annots, false, false, false);
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getPseudoRoot($words) {
        $annots = $this->findWord($words);

        return $this->composeForms($annots, false, true, false);
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getPartOfSpeech($words) {
        $annots = $this->findWord($words);

        return $this->composeForms($annots, false, false, true);
    }

    /**
     * @param string[] $words
     * @param string $method
     * @param bool $passWordAsFirstArg
     * @return array
     */
    protected function processAnnotsWithHelper($words, $method, $passWordAsFirstArg = false) {
        $result = array();

        foreach($this->findWord($words) as $annot_raw => $words) {
            if($GLOBALS['__phpmorphy_strlen']($annot_raw) == 0) continue;

            if($passWordAsFirstArg) {
                foreach($words as $word) {
                    $result[$word] = $this->helper->$method($word, $annot_raw);
                }
            } else {
                $result_for_annot = $this->helper->$method($annot_raw);

                foreach($words as $word) {
                    $result[$word] = $result_for_annot;
                }
            }
        }

        return $result;
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getAncode($words) {
        return $this->processAnnotsWithHelper($words, 'getAncode');
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getGrammarInfoMergeForms($words) {
        return $this->processAnnotsWithHelper($words, 'getGrammarInfoMergeForms');
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getGrammarInfo($words) {
        return $this->processAnnotsWithHelper($words, 'getGrammarInfo');
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getAllFormsWithAncodes($words) {
        return $this->processAnnotsWithHelper($words, 'getAllFormsWithResolvedAncodes', true);
    }

    /**
     * @param string[] $words
     * @return array
     */
    function getParadigmCollection($words) {
        return $this->processAnnotsWithHelper($words, 'getParadigmCollection', true);
    }

    /**
     * @param string[] $words
     * @return array
     */
    protected function findWord($words) {
        $unknown_words_annot = '';

        $this->notfound = array();

        list($labels, $finals, $dests) = $this->buildPatriciaTrie($words);

        $annots = array();
        $unknown_words_annot = '';
        $stack = array(0, '', $this->root_trans);
        $stack_idx = 0;

        $fsa = $this->fsa;

        // TODO: Improve this
        while($stack_idx >= 0) {
            $n = $stack[$stack_idx];
            $path = $stack[$stack_idx + 1] . $labels[$n];
            $trans = $stack[$stack_idx + 2];
            $stack_idx -= 3; // TODO: Remove items from stack? (performance!!!)

            $is_final = $finals[$n] > 0;

            $result = false;
            if(false !== $trans && $n > 0) {
                $label = $labels[$n];

                $result = $fsa->walk($trans, $label, $is_final);

                if($GLOBALS['__phpmorphy_strlen']($label) == $result['walked']) {
                    $trans = $result['word_trans'];
                } else {
                    $trans = false;
                }
            }

            if($is_final) {
                if(false !== $trans && isset($result['annot'])) {
                    $annots[$result['annot']][] = $path;
                } else {
                    //$annots[$unknown_words_annot][] = $path;
                    $this->notfound[] = $path;
                }
            }

            if(false !== $dests[$n]) {
                foreach($dests[$n] as $dest) {
                    $stack_idx += 3;
                    $stack[$stack_idx] = $dest;
                    $stack[$stack_idx + 1] = $path;
                    $stack[$stack_idx + 2] = $trans;
                }
            }
        }

        return $annots;
    }

    /**
     * @param string[] $annotsRaw
     * @param bool $composeBase
     * @param bool $composePseudoRoot
     * @param bool $composePartOfSpeech
     * @return array
     */
    protected function composeForms(
        $annotsRaw, $composeBase, $composePseudoRoot, $composePartOfSpeech
    ) {
        $result = array();

        // process found annotations
        foreach($annotsRaw as $annot_raw => $words) {
            if($GLOBALS['__phpmorphy_strlen']($annot_raw) == 0) continue;

            foreach($this->helper->decodeAnnot($annot_raw, $composeBase) as $annot) {
                if(!($composeBase || $composePseudoRoot)) {
                    $flexias = $this->graminfo->readFlexiaData($annot);
                }

                $cplen = $annot['cplen'];
                $plen = $annot['plen'];
                $flen = $annot['flen'];

                if($composePartOfSpeech) {
                    $pos_id = $this->helper->extractPartOfSpeech($annot);
                }

                foreach($words as $word) {
                    if($flen) {
                        $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen, -$flen);
                    } else {
                        if($cplen || $plen) {
                            $base = $GLOBALS['__phpmorphy_substr']($word, $cplen + $plen);
                        } else {
                            $base = $word;
                        }
                    }

                    $prefix = $cplen ? $GLOBALS['__phpmorphy_substr']($word, 0, $cplen) : '';

                    if($composePseudoRoot) {
                        $result[$word][$base] = 1;
                    } else if($composeBase) {
                        $form = $prefix . $annot['base_prefix'] . $base . $annot['base_suffix'];

                        $result[$word][$form] = 1;
                    } else if($composePartOfSpeech) {
                        $result[$word][$pos_id] = 1;
                    } else {
                        for($i = 0, $c = count($flexias); $i < $c; $i += 2) {
                            $form = $prefix . $flexias[$i] . $base . $flexias[$i + 1];
                            $result[$word][$form] = 1;
                        }
                    }
                }
            }
        }

        for($keys = array_keys($result), $i = 0, $c = count($result); $i < $c; $i++) {
            $key = $keys[$i];

            $result[$key] = array_keys($result[$key]);
        }

        return $result;
    }

    /**
     * @throws phpMorphy_Exception
     * @param string[] $words
     * @return array
     */
    protected function buildPatriciaTrie($words) {
        if(!is_array($words)) {
            throw new phpMorphy_Exception("Words must be array");
        }

        sort($words);

        $stack = array();
        $prev_word = '';
        $prev_word_len = 0;
        $prev_lcp = 0;

        $state_labels = array();
        $state_finals = array();
        $state_dests = array();

        $state_labels[] = '';
        $state_finals = '0';
        $state_dests[] = array();

        $node = 0;

        foreach($words as $word) {
            if($word == $prev_word) {
                continue;
            }

            $word_len = $GLOBALS['__phpmorphy_strlen']($word);
            // find longest common prefix
            for($lcp = 0, $c = min($prev_word_len, $word_len); $lcp < $c && $word[$lcp] == $prev_word[$lcp]; $lcp++);

            if($lcp == 0) {
                $stack = array();

                $new_state_id = count($state_labels);

                $state_labels[] = $word;
                $state_finals .= '1';
                $state_dests[] = false;

                $state_dests[0][] = $new_state_id;

                $node = $new_state_id;
            } else {
                $need_split = true;
                $trim_size = 0; // for split

                if($lcp == $prev_lcp) {
                    $need_split = false;
                    $node = $stack[count($stack) - 1];
                } elseif($lcp > $prev_lcp) {
                    if($lcp == $prev_word_len) {
                        $need_split = false;
                    } else {
                        $need_split = true;
                        $trim_size = $lcp - $prev_lcp;
                    }

                    $stack[] = $node;
                } else {
                    $trim_size = $GLOBALS['__phpmorphy_strlen']($prev_word) - $lcp;

                    for($stack_size = count($stack) - 1; ;--$stack_size) {
                        $trim_size -= $GLOBALS['__phpmorphy_strlen']($state_labels[$node]);

                        if($trim_size <= 0) {
                            break;
                        }

                        if(count($stack) < 1) {
                            throw new phpMorphy_Exception('Infinite loop posible');
                        }

                        $node = array_pop($stack);
                    }

                    $need_split = $trim_size < 0;
                    $trim_size = abs($trim_size);

                    if($need_split) {
                        $stack[] = $node;
                    } else {
                        $node = $stack[$stack_size];
                    }
                }

                if($need_split) {
                    $node_key = $state_labels[$node];

                    // split
                    $new_node_id_1 = count($state_labels);
                    $new_node_id_2 = $new_node_id_1 + 1;

                    // new_node_1
                    $state_labels[] = $GLOBALS['__phpmorphy_substr']($node_key, $trim_size);
                    $state_finals .= $state_finals[$node];
                    $state_dests[] = $state_dests[$node];

                    // adjust old node
                    $state_labels[$node] = $GLOBALS['__phpmorphy_substr']($node_key, 0, $trim_size);
                    $state_finals[$node] = '0';
                    $state_dests[$node] = array($new_node_id_1);

                    // append new node, new_node_2
                    $state_labels[] = $GLOBALS['__phpmorphy_substr']($word, $lcp);
                    $state_finals .= '1';
                    $state_dests[] = false;

                    $state_dests[$node][] = $new_node_id_2;

                    $node = $new_node_id_2;
                } else {
                    $new_node_id = count($state_labels);

                    $state_labels[] = $GLOBALS['__phpmorphy_substr']($word, $lcp);
                    $state_finals .= '1';
                    $state_dests[] = false;

                    if(false !== $state_dests[$node]) {
                        $state_dests[$node][] = $new_node_id;
                    } else {
                        $state_dests[$node] = array($new_node_id);
                    }

                    $node = $new_node_id;
                }
            }

            $prev_word = $word;
            $prev_word_len = $word_len;
            $prev_lcp = $lcp;
        }

        return array($state_labels, $state_finals, $state_dests);
    }
}