Я реализовал внутренний-внешний алгоритм в Python для NLP, но я не уверен, что код оптимален. В заключение я сначала реализовал внутреннюю функцию, затем внешнюю функцию и, наконец, алгоритм изнутри-снаружи, используя оба. У вас есть предложения по оптимизации и упрощению кода, которые вы можете найти ниже? Заранее спасибо!
def inside_algorithm(words, rules):
length = len(words)
inside = {lhs: [[0] * length] * length for lhs in rules.non_terminals}
for left_hand_side in rules.non_terminals:
for i in range(length):
word = words[i]
if word in rules and left_hand_side in rules[word]:
inside[left_hand_side][i][i] = rules[word][left_hand_side].prob
else:
inside[left_hand_side][i][i] = 0
for left_hand_side in rules:
for rule in rules[left_hand_side].values():
for i in range(length - 1):
for j in range(i, length):
for k in range(i, j):
product = 1
product *= rule.prob
product *= inside[rule.right_hand_side[0]][i][k]
product *= inside[rule.rhs[1]][k + 1][j]
inside[left_hand_side][i][j] += product
return inside
def outside_algorithm(words, rules, inside):
length = len(words)
outside = {left_hand_side: [[0] * length] * length for left_hand_side in rules.non_terminals}
outside[rules.start_symbol][0][length - 1] = 1
for left_hand_side in rules:
for rule in rules[left_hand_side].values():
right_hand_side="|".join(rule.right_hand_side[::-1])
if right_hand_side not in rules[left_hand_side]:
continue
right_rule = rules[left_hand_side][right_hand_side]
for i in range(length - 1):
for j in range(1, length):
if i == 0 and j == (length - 1):
continue
for k in range(i):
product = 1
product *= rule.probability
product *= inside[rule.right_hand_side[0]][k][i - 1]
product *= outside[left_hand_side][k][j]
outside[rule.right_hand_side[1]][i][j] += product
for k in range(j + 1, length):
product = 1
product *= right_rule.probability
product *= inside[rule.right_hand_side[0]][i][k]
product *= outside[left_hand_side][i][k]
outside[rule.right_hand_side[1]][i][j] += product
return outside
def inside_outside_algorithm(words, rules, count):
length = len(words)
trees = (rules, words)
top = []
for tree in trees:
if tree.root == 'TOP':
top.append(tree)
inside = inside_algorithm(words, rules)
outside = outside_algorithm(words, rules, top)
delta = inside[rules.start_symbol][0][length - 1]
epsilon = {left_hand_side: [[0] * length] * length for left_hand_side in inside}
for left_hand_side in epsilon:
for i in range(length):
for j in range(length):
epsilon[left_hand_side][i][j] = inside[left_hand_side][i][j] * outside[left_hand_side][i][j]
gamma = {}
for left_hand_side in rules:
for rule in rules[left_hand_side].values():
gamma[rule] = [[[0] * length] * length] * length
for i in range(length - 1):
for j in range(i + 1, length):
for k in range(i, j):
gamma[rule][i][k][j] = outside[rule.left_hand_side][i][j] *
rule.probability *
inside[rule.right_hand_side[0]][i][k] *
inside[rule.right_hand_side[1]][k + 1][j]
for left_hand_side in rules:
if left_hand_side not in count:
count[left_hand_side] = {}
for rule in rules[left_hand_side].values():
if tuple(rule.right_hand_side) not in count[left_hand_side]:
count[left_hand_side][tuple(rule.right_hand_side)] = 0
for i in range(length - 1):
for j in range(i + 1, length):
for k in range(i, j):
count[left_hand_side][tuple(rule.right_hand_side)] += gamma[rule][i][k][j] / delta
for theta in rules:
for left_hand_side in rules[theta]:
if left_hand_side not in count:
count[left_hand_side] = {}
for i in range(length):
for left_hand_side in rules[words[i]]:
if tuple([words[i]]) in count[left_hand_side]:
count[left_hand_side][tuple([words[i]])] += epsilon[left_hand_side][i][i] / delta
else:
count[left_hand_side][tuple([words[i]])] = epsilon[left_hand_side][i][i] / delta