#!/usr/bin/env python3

# Copyright 2017 Patrick O. Perry.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math

try:
    import property
except ModuleNotFoundError:
    from util import property

EMOJI_DATA = "data/ucd/emoji/emoji-data.txt"
GRAPHEME_BREAK_PROPERTY = "data/ucd/auxiliary/GraphemeBreakProperty.txt"
code_props = property.read(GRAPHEME_BREAK_PROPERTY)
emoji_props = property.read(EMOJI_DATA, sets=True)

for i in range(len(code_props)):
    if code_props[i] is None:
        code_props[i] = 'Other'

for i in emoji_props['Extended_Pictographic']:
    assert code_props[i] == 'Other'
    code_props[i] = 'Extended_Pictographic'

prop_names = set(code_props)
prop_names.remove('Other')


prop_vals = {}
prop_vals['Other'] = 0;

for p in sorted(prop_names):
    prop_vals[p] = len(prop_vals)


def compute_tables(block_size):
    nblock = len(code_props) // block_size
    stage1 = [None] * nblock
    stage2 = []
    stage2_dict = {}
    for i in range(nblock):
        begin = i * block_size
        end = begin + block_size
        block = tuple(code_props[begin:end])
        if block in stage2_dict:
            j = stage2_dict[block]
        else:
            j = len(stage2)
            stage2_dict[block] = j
            stage2.append(block)
        stage1[i] = j
    return (stage1,stage2)


def stage1_item_size(nstage2):
    nbyte = math.ceil(math.log(nstage2, 2) / 8)
    size = 2**math.ceil(math.log(nbyte, 2))
    return size

page_size = 4096
block_size = 256

nbytes = {}

best_block_size = 1
smallest_size = len(code_props)

for i in range(1,17):
    block_size = 2**i
    stage1,stage2 = compute_tables(block_size)

    nbyte1 = len(stage1) * stage1_item_size(len(stage2))
    nbyte2 = len(stage2) * block_size

    nbyte1 = math.ceil(nbyte1 / page_size) * page_size
    nbyte2 = math.ceil(nbyte2 / page_size) * page_size
    nbyte = nbyte1 + nbyte2
    nbytes[block_size] = nbyte

    if nbyte < smallest_size:
        smallest_size = nbyte
        best_block_size = block_size


block_size = best_block_size
stage1,stage2 = compute_tables(block_size)

type1_size = stage1_item_size(len(stage2))

if type1_size == 1:
    type1 = 'uint8_t'
elif type1_size == 2:
    type1 = 'uint16_t'
elif type1_size == 4:
    type1 = 'uint32_t'
else:
    type1 = 'uint64_t'

type2 = 'int8_t'



# Write graphbreak.h to stdout

print("/* This file is automatically generated. DO NOT EDIT!")
print("   Instead, edit gen-graphbreak.py and re-run.  */")
print("")
print("/*")
print(" * Unicode Grapheme_Break property values.")
print(" *")
print(" * Defined in UAX #29 \"Unicode Text Segmentation\"")
print(" *")
print(" *     http://www.unicode.org/reports/tr29/")
print(" *")
print(" * Section 4.1, Table 3.")
print(" *")
print(" *")
print(" * We use the two-stage lookup strategy described at")
print(" *")
print(" *     http://www.strchr.com/multi-stage_tables")
print(" *")
print(" */")
print("")
print("#ifndef UNICODE_GRAPHBREAK_H")
print("#define UNICODE_GRAPHBREAK_H")
print("")
print("#include <stdint.h>")
print("")
print("enum graph_break_prop {")
print("\tGRAPH_BREAK_OTHER = 0", end="")
for prop in sorted(prop_names):
    print(",\n\tGRAPH_BREAK_" + prop.upper() + " = " + str(prop_vals[prop]),
          end="")
print("\n};")
print("")
print("static const " + type1 + " graph_break_stage1[] = {")
for i in range(len(stage1) - 1):
    if i % 16  == 0:
        print("/* U+{:04X} */".format(i * block_size), end="")
    print("{0: >3},".format(stage1[i]), end="")
    if i % 16 == 15:
        print("")
print("{0: >3}".format(stage1[len(stage1) - 1]))
print("};")
print("")
print("static const " + type2 + " graph_break_stage2[][" +
        str(block_size) + "] = {")
#for i in range(len(stage2)):
for i in range(0,len(stage2)):
    print("  /* block " + str(i) + " */")
    print("  {", end="")
    for j in range(block_size):
        print("{0: >3}".format(prop_vals[stage2[i][j]]), end="")
        if j + 1 == block_size:
            print("\n  }", end="")
        else:
            print(",", end="")
            if j % 16 == 15:
                print("\n   ", end="")
    if i + 1 != len(stage2):
        print(",\n")
    else:
        print("")
print("};")

print("")
print("static int graph_break(int32_t code)")
print("{")
print("\tconst int32_t block_size = " + str(block_size) + ";")
print("\t" + type1 + " i = graph_break_stage1[code / block_size];")
print("\treturn graph_break_stage2[i][code % block_size];")
print("}")
print("")
print("#endif /* UNICODE_GRAPHBREAK_H */")
