"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Dataset = exports.DefaultFeatureLabel = exports.EXCLUDED_COLUMNS_ALL = exports.EXCLUDED_COLUMNS = exports.PrebuiltFeatures = void 0;
const FeatureType_1 = require("./FeatureType");
const DataLine_1 = require("./DataLine");
const NumTs_1 = require("../../NumTs/NumTs");
const ProjectionParamsDuck_1 = require("../../Ducks/ProjectionParamsDuck");
var PrebuiltFeatures;
(function (PrebuiltFeatures) {
    PrebuiltFeatures["Line"] = "line";
    PrebuiltFeatures["ClusterLabel"] = "groupLabel";
})(PrebuiltFeatures = exports.PrebuiltFeatures || (exports.PrebuiltFeatures = {}));
exports.EXCLUDED_COLUMNS = ["__meta__", "x", "y", "algo", "clusterProbability", "age", "multiplicity"];
exports.EXCLUDED_COLUMNS_ALL = ["__meta__", "x", "y", "algo", "clusterProbability", "age", "multiplicity", "groupLabel"];
exports.DefaultFeatureLabel = "Default";
/**
 * Dataset class that holds all data, the ranges and additional stuff
 */
class Dataset {
    constructor(vectors, ranges, info, featureTypes, metaInformation = {}) {
        this.mapProjectionInitialization = entry => {
            return {
                name: entry,
                checked: entry[0] === '*'
            };
        };
        this.vectors = vectors;
        this.info = info;
        this.columns = {};
        this.type = this.info.type;
        this.metaInformation = metaInformation;
        this.calculateBounds();
        this.calculateColumnTypes(ranges, featureTypes, metaInformation);
        this.checkLabels();
        // If the dataset is sequential, calculate the segments
        this.isSequential = this.checkSequential();
        if (this.isSequential) {
            this.segments = this.getSegs();
        }
    }
    getSegs(key = 'line') {
        let vectors = this.vectors;
        // Get a list of lines that are in the set
        var lineKeys = [...new Set(vectors.map(vector => vector[key]))];
        var segments = lineKeys.map(lineKey => {
            var l = new DataLine_1.DataLine(lineKey, vectors.filter(vector => vector[key] == lineKey).sort((a, b) => a.age - b.age));
            // Set segment of vectors
            l.vectors.forEach((v, vi) => {
                v.view.segment = l;
                v.view.sequenceIndex = vi;
            });
            return l;
        });
        return segments;
    }
    inferRangeForAttribute(key) {
        let values = this.vectors.map(sample => sample[key]);
        let numeric = true;
        let min = Number.MAX_SAFE_INTEGER;
        let max = Number.MIN_SAFE_INTEGER;
        values.forEach(value => {
            value = parseFloat(value);
            if (isNaN(value)) {
                numeric = false;
            }
            else if (numeric) {
                if (value < min) {
                    min = value;
                }
                if (value > max) {
                    max = value;
                }
            }
        });
        return numeric ? { min: min, max: max, inferred: true } : null; // false
    }
    reloadRanges() {
    }
    // Checks if the dataset contains sequential data
    checkSequential() {
        var header = this.getColumns();
        // If we have no line attribute, its not sequential
        if (!header.includes(PrebuiltFeatures.Line)) {
            return false;
        }
        // If each sample is a different line, its not sequential either
        var set = new Set(this.vectors.map(vector => vector.line));
        return set.size != this.vectors.length;
    }
    checkLabels() {
        this.multivariateLabels = false;
        this.vectors.forEach(vector => {
            if (vector.groupLabel.length > 1) {
                this.multivariateLabels = true;
                return;
            }
        });
    }
    /**
     * Creates a map which shows the distinct types and data types of the columns.
     */
    calculateColumnTypes(ranges, featureTypes, metaInformation) {
        var columnNames = Object.keys(this.vectors[0]);
        columnNames.forEach(columnName => {
            var _a;
            // @ts-ignore
            this.columns[columnName] = {};
            this.columns[columnName].featureType = featureTypes[columnName];
            // Store dictionary with key/value pairs in column
            let columnMetaInformation = (_a = metaInformation[columnName]) !== null && _a !== void 0 ? _a : {};
            this.columns[columnName].metaInformation = columnMetaInformation;
            // Extract featureLabel from dictionary
            if ("featureLabel" in columnMetaInformation) {
                this.columns[columnName].featureLabel = columnMetaInformation["featureLabel"];
            }
            else {
                this.columns[columnName].featureLabel = exports.DefaultFeatureLabel;
            }
            // Extract included
            if ("project" in columnMetaInformation) {
                this.columns[columnName].project = columnMetaInformation["project"];
            }
            else {
                this.columns[columnName].project = true;
            }
            // Check data type
            if (columnName in ranges) {
                this.columns[columnName].range = ranges[columnName];
            }
            else {
                if (this.vectors.find(vector => isNaN(vector[columnName]))) {
                    this.columns[columnName].distinct = Array.from(new Set([...this.vectors.map(vector => vector[columnName])]));
                    this.columns[columnName].isNumeric = false;
                }
                else {
                    this.columns[columnName].isNumeric = true;
                    this.columns[columnName].range = this.inferRangeForAttribute(columnName);
                }
            }
        });
        if ('algo' in this.columns)
            this.columns['algo'].featureType = FeatureType_1.FeatureType.Categorical;
        if ('groupLabel' in this.columns)
            this.columns['groupLabel'].featureType = FeatureType_1.FeatureType.Categorical;
        if ('clusterProbability' in this.columns)
            this.columns['clusterProbability'].featureType = FeatureType_1.FeatureType.Quantitative;
        if ('x' in this.columns)
            this.columns['x'].featureType = FeatureType_1.FeatureType.Quantitative;
        if ('y' in this.columns)
            this.columns['y'].featureType = FeatureType_1.FeatureType.Quantitative;
    }
    /**
     * Returns an array of columns that are available in the vectors
     */
    getColumns(excludeGenerated = false) {
        var vector = this.vectors[0];
        if (excludeGenerated) {
            return Object.keys(vector).filter(e => e != '__meta__' && !exports.EXCLUDED_COLUMNS_ALL.includes(e));
        }
        else {
            return Object.keys(vector).filter(e => e != '__meta__');
        }
    }
    /**
     * Returns true if the dataset contains the column.
     */
    hasColumn(column) {
        return this.getColumns().find(e => e == column) != undefined;
    }
    /**
     * Returns the vectors in this dataset as a 2d array, which
     * can be used as input for tsne for example.
     */
    asTensor(projectionColumns, samples, encodingMethod, normalizationMethod) {
        if (encodingMethod === undefined) {
            encodingMethod = ProjectionParamsDuck_1.EncodingMethod.ONEHOT;
        }
        if (normalizationMethod === undefined) {
            normalizationMethod = ProjectionParamsDuck_1.NormalizationMethod.STANDARDIZE;
        }
        var tensor = [];
        function oneHot(n, length) {
            var arr = new Array(length).fill(0);
            arr[n] = 1;
            return arr;
        }
        let lookup = {};
        (samples !== null && samples !== void 0 ? samples : this.vectors).forEach(vector => {
            var data = [];
            projectionColumns.forEach(entry => {
                let column = entry.name;
                if (this.columns[column].isNumeric) {
                    if (this.columns[column].range && entry.normalized) {
                        if (normalizationMethod === ProjectionParamsDuck_1.NormalizationMethod.STANDARDIZE) { // map to 0 mean and unit standarddeviation
                            let m, s;
                            if (column in lookup) {
                                m = lookup[column].mean;
                                s = lookup[column].std;
                            }
                            else {
                                m = NumTs_1.mean(this.vectors.map(v => +v[column]));
                                s = NumTs_1.std(this.vectors.map(v => +v[column]));
                                lookup[column] = {
                                    mean: m,
                                    std: s
                                };
                            }
                            if (s <= 0) // when all values are equal in a column, the standard deviation can be 0, which would lead to an error
                                s = 1;
                            data.push((+vector[column] - m) / s);
                        }
                        else { // map between [0;1]
                            let div = this.columns[column].range["max"] - this.columns[column].range["min"];
                            div = div > 0 ? div : 1;
                            data.push((+vector[column] - this.columns[column].range["min"]) / div);
                        }
                    }
                    else {
                        data.push(+vector[column]);
                    }
                }
                else {
                    if (encodingMethod === ProjectionParamsDuck_1.EncodingMethod.ONEHOT) { // Non numeric data can be converted using one-hot encoding
                        let hot_encoded = oneHot(this.columns[column].distinct.indexOf(vector[column]), this.columns[column].distinct.length);
                        data = data.concat(hot_encoded);
                    }
                    else { // or just be integer encoded
                        data.push(this.columns[column].distinct.indexOf(vector[column]));
                    }
                }
            });
            tensor.push(data);
        });
        var featureTypes = [];
        projectionColumns.forEach(entry => {
            let column = entry.name;
            switch (this.columns[column].featureType) {
                case FeatureType_1.FeatureType.Binary:
                    featureTypes.push(FeatureType_1.FeatureType.Binary);
                    break;
                case FeatureType_1.FeatureType.Categorical:
                    if (encodingMethod === ProjectionParamsDuck_1.EncodingMethod.ONEHOT) { // if the categorical attribute gets one hot encoded, we set all resulting columns to be binary
                        featureTypes.concat(Array(this.columns[column].distinct.length).fill(FeatureType_1.FeatureType.Binary));
                    }
                    else { // otherwise, it is declared as categorical column that contains integer because we can only handle integers in the distance metrics
                        featureTypes.push(FeatureType_1.FeatureType.Categorical);
                    }
                    break;
                case FeatureType_1.FeatureType.Date:
                    // TODO: handle Date types
                    break;
                case FeatureType_1.FeatureType.Ordinal:
                    // TODO: handle Ordinal types
                    break;
                case FeatureType_1.FeatureType.Quantitative:
                    featureTypes.push(FeatureType_1.FeatureType.Quantitative);
                    break;
                case FeatureType_1.FeatureType.String:
                    // TODO: handle String types
                    break;
                default:
                    break;
            }
        });
        return { tensor: tensor, featureTypes: featureTypes };
    }
    /**
     * Calculates the dataset bounds for this set, eg the minimum and maximum x,y values
     * which is needed for the zoom to work correctly
     */
    calculateBounds() {
        var xAxis = this.vectors.map(vector => vector.x);
        var yAxis = this.vectors.map(vector => vector.y);
        var minX = Math.min(...xAxis);
        var maxX = Math.max(...xAxis);
        var minY = Math.min(...yAxis);
        var maxY = Math.max(...yAxis);
        var scaleBase = 100;
        var absoluteMaximum = Math.max(Math.abs(minX), Math.abs(maxX), Math.abs(minY), Math.abs(maxY));
        this.bounds = {
            scaleBase: scaleBase,
            scaleFactor: absoluteMaximum / scaleBase,
            x: {
                min: minX,
                max: maxX
            },
            y: {
                min: minY,
                max: maxY
            }
        };
    }
    /**
     * Calculates the maximum path length for this dataset.
     */
    getMaxPathLength() {
        if (this.isSequential) {
            return Math.max(...this.segments.map(segment => segment.vectors.length));
        }
        else {
            return 1;
        }
    }
    /**
     * Infers an array of attributes that can be filtered after, these can be
     * categorical, sequential or continuous attribues.
     * @param {*} ranges
     */
    extractEncodingFeatures(ranges) {
        if (this.vectors.length <= 0) {
            return [];
        }
        var shape_options = [];
        var size_options = [];
        var transparency_options = [];
        var color_options = [];
        const columns = this.columns;
        var header = Object.keys(columns);
        // var header = Object.keys(this.vectors[0]).filter(a => a != "line");
        header.forEach(key => {
            if (key == PrebuiltFeatures.ClusterLabel) {
                color_options.push({
                    "key": key,
                    "name": key,
                    "type": "categorical"
                });
            }
            else {
                var shapes = ["circle", "star", "square", "cross"];
                switch (columns[key].featureType) {
                    case FeatureType_1.FeatureType.Binary:
                        color_options.push({
                            "key": key,
                            "name": key,
                            "type": "categorical"
                        });
                        shape_options.push({
                            "key": key,
                            "name": key,
                            "type": "categorical",
                            "values": columns[key].distinct.map((value, index) => {
                                return {
                                    from: value,
                                    to: shapes[index]
                                };
                            })
                        });
                        break;
                    case FeatureType_1.FeatureType.Categorical:
                        color_options.push({
                            "key": key,
                            "name": key,
                            "type": "categorical"
                        });
                        if (columns[key].distinct.length <= 4) {
                            var shapes = ["circle", "star", "square", "cross"];
                            shape_options.push({
                                "key": key,
                                "name": key,
                                "type": "categorical",
                                "values": columns[key].distinct.map((value, index) => {
                                    return {
                                        from: value,
                                        to: shapes[index]
                                    };
                                })
                            });
                        }
                        break;
                    case FeatureType_1.FeatureType.Date:
                        break;
                    case FeatureType_1.FeatureType.Ordinal:
                        color_options.push({
                            "key": key,
                            "name": key,
                            "type": "sequential",
                            "range": columns[key].range
                        });
                        transparency_options.push({
                            "key": key,
                            "name": key,
                            "type": "sequential",
                            "range": columns[key].range,
                            "values": {
                                range: [0.3, 1.0]
                            }
                        });
                        size_options.push({
                            "key": key,
                            "name": key,
                            "type": "sequential",
                            "range": columns[key].range,
                            "values": {
                                range: [1, 2]
                            }
                        });
                        break;
                    case FeatureType_1.FeatureType.Quantitative:
                        color_options.push({
                            "key": key,
                            "name": key,
                            "type": "sequential",
                            "range": columns[key].range
                        });
                        transparency_options.push({
                            "key": key,
                            "name": key,
                            "type": "sequential",
                            "range": columns[key].range,
                            "values": {
                                range: [0.3, 1.0]
                            }
                        });
                        size_options.push({
                            "key": key,
                            "name": key,
                            "type": "sequential",
                            "range": columns[key].range,
                            "values": {
                                range: [1, 2]
                            }
                        });
                        break;
                    case FeatureType_1.FeatureType.String:
                        break;
                    default:
                        break;
                }
            }
        });
        var options = [
            {
                "category": "shape",
                "attributes": shape_options
            },
            {
                "category": "size",
                "attributes": size_options
            },
            {
                "category": "transparency",
                "attributes": transparency_options
            },
            {
                "category": "color",
                "attributes": color_options
            }
        ];
        return options;
    }
}
exports.Dataset = Dataset;
