from math import log

import numpy as np
import pandas as pd

from ..lowcardinalitycolumn import LowCardinalityColumn
from ...reader import read_binary_uint64
from ...writer import write_binary_int64
from .intcolumn import (
    NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
)


class NumpyLowCardinalityColumn(LowCardinalityColumn):
    int_types = {
        0: NumpyUInt8Column,
        1: NumpyUInt16Column,
        2: NumpyUInt32Column,
        3: NumpyUInt64Column
    }

    def __init__(self, nested_column, **kwargs):
        super(NumpyLowCardinalityColumn, self).__init__(nested_column,
                                                        **kwargs)

    def _write_data(self, items, buf):
        # Do not write anything for empty column.
        # May happen while writing empty arrays.
        if not len(items):
            return

        # Replace nans with defaults if not nullabe.
        if isinstance(items, np.ndarray) and not self.nested_column.nullable:
            nulls = pd.isnull(items)
            items = np.where(nulls, self.nested_column.null_value, items)

        c = pd.Categorical(items)

        int_type = int(log(len(c.codes), 2) / 8)
        int_column = self.int_types[int_type]()

        serialization_type = self.serialization_type | int_type

        index = c.categories
        keys = c.codes

        if self.nested_column.nullable:
            # First element represents NULL if column is nullable.
            index = index.insert(0, self.nested_column.null_value)
            keys = keys + 1
            # Prevent null map writing. Reset nested column nullable flag.
            self.nested_column.nullable = False

        write_binary_int64(serialization_type, buf)
        write_binary_int64(len(index), buf)

        self.nested_column.write_data(index.to_numpy(items.dtype), buf)
        write_binary_int64(len(items), buf)
        int_column.write_items(keys, buf)

    def _read_data(self, n_items, buf, nulls_map=None):
        if not n_items:
            return tuple()

        serialization_type = read_binary_uint64(buf)

        # Lowest byte contains info about key type.
        key_type = serialization_type & 0xf
        keys_column = self.int_types[key_type]()

        nullable = self.nested_column.nullable
        # Prevent null map reading. Reset nested column nullable flag.
        self.nested_column.nullable = False

        index_size = read_binary_uint64(buf)
        index = self.nested_column.read_data(index_size, buf)

        read_binary_uint64(buf)  # number of keys
        keys = keys_column.read_data(n_items, buf)

        if nullable:
            # Shift all codes by one ("No value" code is -1 for pandas
            # categorical) and drop corresponding first index
            # this is analog of original operation:
            # index = (None, ) + index[1:]
            keys = np.array(keys, dtype='int64')  # deal with possible overflow
            keys = keys - 1
            index = index[1:]
        return pd.Categorical.from_codes(keys, index)


def create_numpy_low_cardinality_column(spec, column_by_spec_getter,
                                        column_options):
    inner = spec[15:-1]
    nested = column_by_spec_getter(inner)
    return NumpyLowCardinalityColumn(nested, **column_options)