Реализация алгоритма изнутри-снаружи в Python

Я реализовал внутренний-внешний алгоритм в Python для NLP, но я не уверен, что код оптимален. В заключение я сначала реализовал внутреннюю функцию, затем внешнюю функцию и, наконец, алгоритм изнутри-снаружи, используя оба. У вас есть предложения по оптимизации и упрощению кода, которые вы можете найти ниже? Заранее спасибо!

def inside_algorithm(words, rules):

    length = len(words)
    inside = {lhs: [[0] * length] * length for lhs in rules.non_terminals}

    for left_hand_side in rules.non_terminals:
        for i in range(length):
            word = words[i]
            if word in rules and left_hand_side in rules[word]:
                inside[left_hand_side][i][i] = rules[word][left_hand_side].prob
            else:
                inside[left_hand_side][i][i] = 0

    for left_hand_side in rules:
        for rule in rules[left_hand_side].values():
            for i in range(length - 1):
                for j in range(i, length):
                    for k in range(i, j):
                        product = 1
                        product *= rule.prob
                        product *= inside[rule.right_hand_side[0]][i][k]
                        product *= inside[rule.rhs[1]][k + 1][j]
                        inside[left_hand_side][i][j] += product

    return inside


def outside_algorithm(words, rules, inside):

    length = len(words)
    outside = {left_hand_side: [[0] * length] * length for left_hand_side in rules.non_terminals}
    outside[rules.start_symbol][0][length - 1] = 1

    for left_hand_side in rules:
        for rule in rules[left_hand_side].values():
            right_hand_side="|".join(rule.right_hand_side[::-1])
            if right_hand_side not in rules[left_hand_side]:
                continue

            right_rule = rules[left_hand_side][right_hand_side]

            for i in range(length - 1):
                for j in range(1, length):
                    if i == 0 and j == (length - 1):
                        continue

                    for k in range(i):
                        product = 1
                        product *= rule.probability
                        product *= inside[rule.right_hand_side[0]][k][i - 1]
                        product *= outside[left_hand_side][k][j]
                        outside[rule.right_hand_side[1]][i][j] += product

                    for k in range(j + 1, length):
                        product = 1
                        product *= right_rule.probability
                        product *= inside[rule.right_hand_side[0]][i][k]
                        product *= outside[left_hand_side][i][k]
                        outside[rule.right_hand_side[1]][i][j] += product

    return outside


def inside_outside_algorithm(words, rules, count):

    length = len(words)
    trees = (rules, words)
    top = []
    for tree in trees:
        if tree.root == 'TOP':
            top.append(tree)

    inside = inside_algorithm(words, rules)
    outside = outside_algorithm(words, rules, top)

    delta = inside[rules.start_symbol][0][length - 1]
    epsilon = {left_hand_side: [[0] * length] * length for left_hand_side in inside}

    for left_hand_side in epsilon:
        for i in range(length):
            for j in range(length):
                epsilon[left_hand_side][i][j] = inside[left_hand_side][i][j] * outside[left_hand_side][i][j]

    gamma = {}
    for left_hand_side in rules:
        for rule in rules[left_hand_side].values():
            gamma[rule] = [[[0] * length] * length] * length
            for i in range(length - 1):
                for j in range(i + 1, length):
                    for k in range(i, j):
                        gamma[rule][i][k][j] = outside[rule.left_hand_side][i][j] * 
                                               rule.probability * 
                                               inside[rule.right_hand_side[0]][i][k] * 
                                               inside[rule.right_hand_side[1]][k + 1][j]

    for left_hand_side in rules:
        if left_hand_side not in count:
            count[left_hand_side] = {}
        for rule in rules[left_hand_side].values():
            if tuple(rule.right_hand_side) not in count[left_hand_side]:
                count[left_hand_side][tuple(rule.right_hand_side)] = 0
            for i in range(length - 1):
                for j in range(i + 1, length):
                    for k in range(i, j):
                        count[left_hand_side][tuple(rule.right_hand_side)] += gamma[rule][i][k][j] / delta

    for theta in rules:
        for left_hand_side in rules[theta]:
            if left_hand_side not in count:
                count[left_hand_side] = {}

    for i in range(length):
        for left_hand_side in rules[words[i]]:
            if tuple([words[i]]) in count[left_hand_side]:
                count[left_hand_side][tuple([words[i]])] += epsilon[left_hand_side][i][i] / delta
            else:
                count[left_hand_side][tuple([words[i]])] = epsilon[left_hand_side][i][i] / delta

0

Добавить комментарий

Ваш адрес email не будет опубликован. Обязательные поля помечены *