/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.dependency.perceptron.accessories;

import com.hankcs.hanlp.dependency.perceptron.accessories.Edge;
import com.hankcs.hanlp.dependency.perceptron.accessories.Pair;
import com.hankcs.hanlp.dependency.perceptron.structures.IndexMaps;
import com.hankcs.hanlp.dependency.perceptron.structures.Sentence;
import com.hankcs.hanlp.dependency.perceptron.transition.configuration.CompactTree;
import com.hankcs.hanlp.dependency.perceptron.transition.configuration.Instance;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;

public class CoNLLReader {
    BufferedReader fileReader;

    public CoNLLReader(String filePath) throws FileNotFoundException {
        this.fileReader = new BufferedReader(new FileReader(filePath));
    }

    public static IndexMaps createIndices(String conllPath, boolean labeled, boolean lowercased, String clusterFile) throws IOException {
        String[] cells;
        String line;
        HashMap<String, Integer> wordMap = new HashMap<String, Integer>();
        HashMap<Integer, Integer> labels = new HashMap<Integer, Integer>();
        HashMap<String, Integer> clusterMap = new HashMap<String, Integer>();
        HashMap<Integer, Integer> cluster4Map = new HashMap<Integer, Integer>();
        HashMap<Integer, Integer> cluster6Map = new HashMap<Integer, Integer>();
        String rootString = "ROOT";
        wordMap.put("ROOT", 0);
        labels.put(0, 0);
        BufferedReader reader = new BufferedReader(new FileReader(conllPath));
        while ((line = reader.readLine()) != null) {
            String[] args = line.trim().split("\t");
            if (args.length <= 7) continue;
            String label = args[7];
            int head = Integer.parseInt(args[6]);
            if (head == 0) {
                rootString = label;
            }
            if (!labeled) {
                label = "~";
            } else if (label.equals("_")) {
                label = "-";
            }
            if (wordMap.containsKey(label)) continue;
            labels.put(wordMap.size(), labels.size());
            wordMap.put(label, wordMap.size());
        }
        reader = new BufferedReader(new FileReader(conllPath));
        while ((line = reader.readLine()) != null) {
            String pos;
            cells = line.trim().split("\t");
            if (cells.length <= 7 || wordMap.containsKey(pos = cells[3])) continue;
            wordMap.put(pos, wordMap.size());
        }
        if (clusterFile.length() > 0) {
            reader = new BufferedReader(new FileReader(clusterFile));
            while ((line = reader.readLine()) != null) {
                cells = line.trim().split("\t");
                if (cells.length <= 2) continue;
                String cluster = cells[0];
                String word = cells[1];
                String prefix4 = cluster.substring(0, Math.min(4, cluster.length()));
                String prefix6 = cluster.substring(0, Math.min(6, cluster.length()));
                int clusterId = wordMap.size();
                if (!wordMap.containsKey(cluster)) {
                    clusterMap.put(word, wordMap.size());
                    wordMap.put(cluster, wordMap.size());
                } else {
                    clusterId = wordMap.get(cluster);
                    clusterMap.put(word, clusterId);
                }
                int pref4Id = wordMap.size();
                if (!wordMap.containsKey(prefix4)) {
                    wordMap.put(prefix4, wordMap.size());
                } else {
                    pref4Id = wordMap.get(prefix4);
                }
                int pref6Id = wordMap.size();
                if (!wordMap.containsKey(prefix6)) {
                    wordMap.put(prefix6, wordMap.size());
                } else {
                    pref6Id = wordMap.get(prefix6);
                }
                cluster4Map.put(clusterId, pref4Id);
                cluster6Map.put(clusterId, pref6Id);
            }
        }
        reader = new BufferedReader(new FileReader(conllPath));
        while ((line = reader.readLine()) != null) {
            cells = line.trim().split("\t");
            if (cells.length <= 7) continue;
            String word = cells[1];
            if (lowercased) {
                word = word.toLowerCase();
            }
            if (wordMap.containsKey(word)) continue;
            wordMap.put(word, wordMap.size());
        }
        return new IndexMaps(wordMap, labels, rootString, cluster4Map, cluster6Map, clusterMap);
    }

    public ArrayList<Instance> readData(int limit, boolean keepNonProjective, boolean labeled, boolean rootFirst, boolean lowerCased, IndexMaps maps) throws IOException {
        String[] cells;
        Sentence currentSentence;
        String line;
        HashMap<String, Integer> wordMap = maps.getWordId();
        ArrayList<Instance> instanceList = new ArrayList<Instance>();
        ArrayList<Integer> tokens = new ArrayList<Integer>();
        ArrayList<Integer> tags = new ArrayList<Integer>();
        ArrayList<Integer> cluster4Ids = new ArrayList<Integer>();
        ArrayList<Integer> cluster6Ids = new ArrayList<Integer>();
        ArrayList<Integer> clusterIds = new ArrayList<Integer>();
        HashMap<Integer, Edge> goldDependencies = new HashMap<Integer, Edge>();
        int sentenceCounter = 0;
        while ((line = this.fileReader.readLine()) != null) {
            if ((line = line.trim()).length() == 0) {
                if (tokens.size() > 0) {
                    ++sentenceCounter;
                    if (!rootFirst) {
                        for (Edge edge : goldDependencies.values()) {
                            if (edge.headIndex != 0) continue;
                            edge.headIndex = tokens.size() + 1;
                        }
                        tokens.add(0);
                        tags.add(0);
                        cluster4Ids.add(0);
                        cluster6Ids.add(0);
                        clusterIds.add(0);
                    }
                    currentSentence = new Sentence(tokens, tags, cluster4Ids, cluster6Ids, clusterIds);
                    Instance instance = new Instance(currentSentence, goldDependencies);
                    if (keepNonProjective || !instance.isNonprojective()) {
                        instanceList.add(instance);
                    }
                    goldDependencies = new HashMap();
                    tokens = new ArrayList();
                    tags = new ArrayList();
                    cluster4Ids = new ArrayList();
                    cluster6Ids = new ArrayList();
                    clusterIds = new ArrayList();
                } else {
                    goldDependencies = new HashMap();
                    tokens = new ArrayList();
                    tags = new ArrayList();
                    cluster4Ids = new ArrayList();
                    cluster6Ids = new ArrayList();
                    clusterIds = new ArrayList();
                }
                if (sentenceCounter < limit) continue;
                System.out.println("buffer full..." + instanceList.size());
                break;
            }
            cells = line.split("\t");
            if (cells.length < 8) {
                throw new IllegalArgumentException("invalid conll format");
            }
            int wordIndex = Integer.parseInt(cells[0]);
            String word = cells[1].trim();
            if (lowerCased) {
                word = word.toLowerCase();
            }
            String pos = cells[3].trim();
            int wi = CoNLLReader.getId(word, wordMap);
            int pi = CoNLLReader.getId(pos, wordMap);
            tags.add(pi);
            tokens.add(wi);
            int headIndex = Integer.parseInt(cells[6]);
            String relation = cells[7];
            if (!labeled) {
                relation = "~";
            } else if (relation.equals("_")) {
                relation = "-";
            }
            if (headIndex == 0) {
                relation = "ROOT";
            }
            int ri = CoNLLReader.getId(relation, wordMap);
            if (headIndex == -1) {
                ri = -1;
            }
            int[] ids = maps.clusterId(word);
            clusterIds.add(ids[0]);
            cluster4Ids.add(ids[1]);
            cluster6Ids.add(ids[2]);
            if (headIndex < 0) continue;
            goldDependencies.put(wordIndex, new Edge(headIndex, ri));
        }
        if (tokens.size() > 0) {
            if (!rootFirst) {
                cells = goldDependencies.keySet().iterator();
                while (cells.hasNext()) {
                    int gold = (Integer)cells.next();
                    if (((Edge)goldDependencies.get((Object)Integer.valueOf((int)gold))).headIndex != 0) continue;
                    goldDependencies.get((Object)Integer.valueOf((int)gold)).headIndex = goldDependencies.size() + 1;
                }
                tokens.add(0);
                tags.add(0);
                cluster4Ids.add(0);
                cluster6Ids.add(0);
                clusterIds.add(0);
            }
            ++sentenceCounter;
            currentSentence = new Sentence(tokens, tags, cluster4Ids, cluster6Ids, clusterIds);
            instanceList.add(new Instance(currentSentence, goldDependencies));
        }
        return instanceList;
    }

    private static int getId(String word, HashMap<String, Integer> wordMap) {
        return CoNLLReader.getId(word, wordMap, -1);
    }

    private static int getId(String word, HashMap<String, Integer> wordMap, int defaultValue) {
        Integer id = wordMap.get(word);
        if (id == null) {
            return defaultValue;
        }
        return id;
    }

    public ArrayList<CompactTree> readStringData() throws IOException {
        String line;
        ArrayList<CompactTree> treeSet = new ArrayList<CompactTree>();
        ArrayList<String> tags = new ArrayList<String>();
        HashMap<Integer, Pair<Integer, String>> goldDependencies = new HashMap<Integer, Pair<Integer, String>>();
        while ((line = this.fileReader.readLine()) != null) {
            if ((line = line.trim()).length() == 0) {
                if (tags.size() >= 1) {
                    CompactTree goldConfiguration = new CompactTree(goldDependencies, tags);
                    treeSet.add(goldConfiguration);
                }
                tags = new ArrayList();
                goldDependencies = new HashMap();
                continue;
            }
            String[] splitLine = line.split("\t");
            if (splitLine.length < 8) {
                throw new IllegalArgumentException("wrong file format");
            }
            int wordIndex = Integer.parseInt(splitLine[0]);
            String pos = splitLine[3].trim();
            tags.add(pos);
            int headIndex = Integer.parseInt(splitLine[6]);
            String relation = splitLine[7];
            if (headIndex == 0) {
                relation = "ROOT";
            }
            if (pos.length() <= 0) continue;
            goldDependencies.put(wordIndex, new Pair<Integer, String>(headIndex, relation));
        }
        if (tags.size() > 0) {
            treeSet.add(new CompactTree(goldDependencies, tags));
        }
        return treeSet;
    }
}

