Difference between most probable and most correct answer
The following python notebook shows that the following example of distribution with n rows is a case where the most probable and most correct answers are different (see the two last blocks)
dist = { 1 : 0.75, 2 : 0.125, 3 : 0.125 } n = 8
import pprint # generate the possible worlds def possible_worlds(dist, n): worlds = [[]] for i in range(n): new_worlds = [] for w in worlds: for v in dist: if len(w) <= i: w.append(v) else: w[i] = v new_worlds.append(w.copy()) worlds = new_worlds return worlds print(len(possible_worlds(dist, n)))
6561
# compute a key for each world based on the values it contains def world_key(w): w.sort() count = 0 current = w[0] key = "" for i in range(len(w)): count+=1 if len(w)-1 == i or w[i+1] != current: key += "{}x{}".format(count, current) if len(w)-1 != i and w[i+1] != current: key += " " current = w[i+1] count = 0 return key # compute the probability of a world def world_prob(w, dist): prob = 1 for v in w: prob*=dist[v] return prob
# computes classes of worlds based on a function computing a key for each world # worlds are in the same class iff they have the same key def world_classes(dist, n, class_key=world_key): worlds = possible_worlds(dist, n) classes = {} for w in worlds: key = class_key(w) if key in classes: classes[key]["count"]+=1 classes[key]["class_prob"]+=world_prob(w, dist) classes[key]["possible_values"].add(world_key(w)) else: wp = world_prob(w, dist) classes[key] = { "world_ex": w, "possible_values": {world_key(w)}, "class_prob": wp, "count": 1 } return classes # the classes of possible worlds based on the function work_key pprint.pprint(world_classes(dist, n))
{'1x1 1x2 6x3': {'class_prob': 2.002716064453125e-05, 'count': 56, 'possible_values': {'1x1 1x2 6x3'}, 'world_ex': [1, 2, 3, 3, 3, 3, 3, 3]}, '1x1 2x2 5x3': {'class_prob': 6.008148193359375e-05, 'count': 168, 'possible_values': {'1x1 2x2 5x3'}, 'world_ex': [1, 2, 2, 3, 3, 3, 3, 3]}, '1x1 3x2 4x3': {'class_prob': 0.00010013580322265625, 'count': 280, 'possible_values': {'1x1 3x2 4x3'}, 'world_ex': [1, 2, 2, 2, 3, 3, 3, 3]}, '1x1 4x2 3x3': {'class_prob': 0.00010013580322265625, 'count': 280, 'possible_values': {'1x1 4x2 3x3'}, 'world_ex': [1, 2, 2, 2, 2, 3, 3, 3]}, '1x1 5x2 2x3': {'class_prob': 6.008148193359375e-05, 'count': 168, 'possible_values': {'1x1 5x2 2x3'}, 'world_ex': [1, 2, 2, 2, 2, 2, 3, 3]}, '1x1 6x2 1x3': {'class_prob': 2.002716064453125e-05, 'count': 56, 'possible_values': {'1x1 6x2 1x3'}, 'world_ex': [1, 2, 2, 2, 2, 2, 2, 3]}, '1x1 7x2': {'class_prob': 2.86102294921875e-06, 'count': 8, 'possible_values': {'1x1 7x2'}, 'world_ex': [1, 2, 2, 2, 2, 2, 2, 2]}, '1x1 7x3': {'class_prob': 2.86102294921875e-06, 'count': 8, 'possible_values': {'1x1 7x3'}, 'world_ex': [1, 3, 3, 3, 3, 3, 3, 3]}, '1x2 7x3': {'class_prob': 4.76837158203125e-07, 'count': 8, 'possible_values': {'1x2 7x3'}, 'world_ex': [2, 3, 3, 3, 3, 3, 3, 3]}, '2x1 1x2 5x3': {'class_prob': 0.0003604888916015625, 'count': 168, 'possible_values': {'2x1 1x2 5x3'}, 'world_ex': [1, 1, 2, 3, 3, 3, 3, 3]}, '2x1 2x2 4x3': {'class_prob': 0.0009012222290039062, 'count': 420, 'possible_values': {'2x1 2x2 4x3'}, 'world_ex': [1, 1, 2, 2, 3, 3, 3, 3]}, '2x1 3x2 3x3': {'class_prob': 0.001201629638671875, 'count': 560, 'possible_values': {'2x1 3x2 3x3'}, 'world_ex': [1, 1, 2, 2, 2, 3, 3, 3]}, '2x1 4x2 2x3': {'class_prob': 0.0009012222290039062, 'count': 420, 'possible_values': {'2x1 4x2 2x3'}, 'world_ex': [1, 1, 2, 2, 2, 2, 3, 3]}, '2x1 5x2 1x3': {'class_prob': 0.0003604888916015625, 'count': 168, 'possible_values': {'2x1 5x2 1x3'}, 'world_ex': [1, 1, 2, 2, 2, 2, 2, 3]}, '2x1 6x2': {'class_prob': 6.008148193359375e-05, 'count': 28, 'possible_values': {'2x1 6x2'}, 'world_ex': [1, 1, 2, 2, 2, 2, 2, 2]}, '2x1 6x3': {'class_prob': 6.008148193359375e-05, 'count': 28, 'possible_values': {'2x1 6x3'}, 'world_ex': [1, 1, 3, 3, 3, 3, 3, 3]}, '2x2 6x3': {'class_prob': 1.6689300537109375e-06, 'count': 28, 'possible_values': {'2x2 6x3'}, 'world_ex': [2, 2, 3, 3, 3, 3, 3, 3]}, '3x1 1x2 4x3': {'class_prob': 0.003604888916015625, 'count': 280, 'possible_values': {'3x1 1x2 4x3'}, 'world_ex': [1, 1, 1, 2, 3, 3, 3, 3]}, '3x1 2x2 3x3': {'class_prob': 0.00720977783203125, 'count': 560, 'possible_values': {'3x1 2x2 3x3'}, 'world_ex': [1, 1, 1, 2, 2, 3, 3, 3]}, '3x1 3x2 2x3': {'class_prob': 0.00720977783203125, 'count': 560, 'possible_values': {'3x1 3x2 2x3'}, 'world_ex': [1, 1, 1, 2, 2, 2, 3, 3]}, '3x1 4x2 1x3': {'class_prob': 0.003604888916015625, 'count': 280, 'possible_values': {'3x1 4x2 1x3'}, 'world_ex': [1, 1, 1, 2, 2, 2, 2, 3]}, '3x1 5x2': {'class_prob': 0.000720977783203125, 'count': 56, 'possible_values': {'3x1 5x2'}, 'world_ex': [1, 1, 1, 2, 2, 2, 2, 2]}, '3x1 5x3': {'class_prob': 0.000720977783203125, 'count': 56, 'possible_values': {'3x1 5x3'}, 'world_ex': [1, 1, 1, 3, 3, 3, 3, 3]}, '3x2 5x3': {'class_prob': 3.337860107421875e-06, 'count': 56, 'possible_values': {'3x2 5x3'}, 'world_ex': [2, 2, 2, 3, 3, 3, 3, 3]}, '4x1 1x2 3x3': {'class_prob': 0.02162933349609375, 'count': 280, 'possible_values': {'4x1 1x2 3x3'}, 'world_ex': [1, 1, 1, 1, 2, 3, 3, 3]}, '4x1 2x2 2x3': {'class_prob': 0.032444000244140625, 'count': 420, 'possible_values': {'4x1 2x2 2x3'}, 'world_ex': [1, 1, 1, 1, 2, 2, 3, 3]}, '4x1 3x2 1x3': {'class_prob': 0.02162933349609375, 'count': 280, 'possible_values': {'4x1 3x2 1x3'}, 'world_ex': [1, 1, 1, 1, 2, 2, 2, 3]}, '4x1 4x2': {'class_prob': 0.0054073333740234375, 'count': 70, 'possible_values': {'4x1 4x2'}, 'world_ex': [1, 1, 1, 1, 2, 2, 2, 2]}, '4x1 4x3': {'class_prob': 0.0054073333740234375, 'count': 70, 'possible_values': {'4x1 4x3'}, 'world_ex': [1, 1, 1, 1, 3, 3, 3, 3]}, '4x2 4x3': {'class_prob': 4.172325134277344e-06, 'count': 70, 'possible_values': {'4x2 4x3'}, 'world_ex': [2, 2, 2, 2, 3, 3, 3, 3]}, '5x1 1x2 2x3': {'class_prob': 0.0778656005859375, 'count': 168, 'possible_values': {'5x1 1x2 2x3'}, 'world_ex': [1, 1, 1, 1, 1, 2, 3, 3]}, '5x1 2x2 1x3': {'class_prob': 0.0778656005859375, 'count': 168, 'possible_values': {'5x1 2x2 1x3'}, 'world_ex': [1, 1, 1, 1, 1, 2, 2, 3]}, '5x1 3x2': {'class_prob': 0.0259552001953125, 'count': 56, 'possible_values': {'5x1 3x2'}, 'world_ex': [1, 1, 1, 1, 1, 2, 2, 2]}, '5x1 3x3': {'class_prob': 0.0259552001953125, 'count': 56, 'possible_values': {'5x1 3x3'}, 'world_ex': [1, 1, 1, 1, 1, 3, 3, 3]}, '5x2 3x3': {'class_prob': 3.337860107421875e-06, 'count': 56, 'possible_values': {'5x2 3x3'}, 'world_ex': [2, 2, 2, 2, 2, 3, 3, 3]}, '6x1 1x2 1x3': {'class_prob': 0.155731201171875, 'count': 56, 'possible_values': {'6x1 1x2 1x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 2, 3]}, '6x1 2x2': {'class_prob': 0.0778656005859375, 'count': 28, 'possible_values': {'6x1 2x2'}, 'world_ex': [1, 1, 1, 1, 1, 1, 2, 2]}, '6x1 2x3': {'class_prob': 0.0778656005859375, 'count': 28, 'possible_values': {'6x1 2x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 3, 3]}, '6x2 2x3': {'class_prob': 1.6689300537109375e-06, 'count': 28, 'possible_values': {'6x2 2x3'}, 'world_ex': [2, 2, 2, 2, 2, 2, 3, 3]}, '7x1 1x2': {'class_prob': 0.13348388671875, 'count': 8, 'possible_values': {'7x1 1x2'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 2]}, '7x1 1x3': {'class_prob': 0.13348388671875, 'count': 8, 'possible_values': {'7x1 1x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 3]}, '7x2 1x3': {'class_prob': 4.76837158203125e-07, 'count': 8, 'possible_values': {'7x2 1x3'}, 'world_ex': [2, 2, 2, 2, 2, 2, 2, 3]}, '8x1': {'class_prob': 0.1001129150390625, 'count': 1, 'possible_values': {'8x1'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 1]}, '8x2': {'class_prob': 5.960464477539063e-08, 'count': 1, 'possible_values': {'8x2'}, 'world_ex': [2, 2, 2, 2, 2, 2, 2, 2]}, '8x3': {'class_prob': 5.960464477539063e-08, 'count': 1, 'possible_values': {'8x3'}, 'world_ex': [3, 3, 3, 3, 3, 3, 3, 3]}}
# the classes of possible worlds where a class contains the world having the same sum of values pprint.pprint(world_classes(dist, n, sum))
{8: {'class_prob': 0.1001129150390625, 'count': 1, 'possible_values': {'8x1'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 1]}, 9: {'class_prob': 0.13348388671875, 'count': 8, 'possible_values': {'7x1 1x2'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 2]}, 10: {'class_prob': 0.2113494873046875, 'count': 36, 'possible_values': {'7x1 1x3', '6x1 2x2'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 3]}, 11: {'class_prob': 0.1816864013671875, 'count': 112, 'possible_values': {'5x1 3x2', '6x1 1x2 1x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 2, 3]}, 12: {'class_prob': 0.16113853454589844, 'count': 266, 'possible_values': {'4x1 4x2', '5x1 2x2 1x3', '6x1 2x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 3, 3]}, 13: {'class_prob': 0.10021591186523438, 'count': 504, 'possible_values': {'5x1 1x2 2x3', '4x1 3x2 1x3', '3x1 5x2'}, 'world_ex': [1, 1, 1, 1, 1, 2, 3, 3]}, 14: {'class_prob': 0.062064170837402344, 'count': 784, 'possible_values': {'4x1 2x2 2x3', '2x1 6x2', '3x1 4x2 1x3', '5x1 3x3'}, 'world_ex': [1, 1, 1, 1, 1, 3, 3, 3]}, 15: {'class_prob': 0.02920246124267578, 'count': 1016, 'possible_values': {'1x1 7x2', '2x1 5x2 1x3', '3x1 3x2 2x3', '4x1 1x2 3x3'}, 'world_ex': [1, 1, 1, 1, 2, 3, 3, 3]}, 16: {'class_prob': 0.0135384202003479, 'count': 1107, 'possible_values': {'1x1 6x2 1x3', '2x1 4x2 2x3', '3x1 2x2 3x3', '4x1 4x3', '8x2'}, 'world_ex': [1, 1, 1, 1, 3, 3, 3, 3]}, 17: {'class_prob': 0.004867076873779297, 'count': 1016, 'possible_values': {'1x1 5x2 2x3', '2x1 3x2 3x3', '3x1 1x2 4x3', '7x2 1x3'}, 'world_ex': [1, 1, 1, 2, 3, 3, 3, 3]}, 18: {'class_prob': 0.0017240047454833984, 'count': 784, 'possible_values': {'3x1 5x3', '6x2 2x3', '1x1 4x2 3x3', '2x1 2x2 4x3'}, 'world_ex': [1, 1, 1, 3, 3, 3, 3, 3]}, 19: {'class_prob': 0.0004639625549316406, 'count': 504, 'possible_values': {'5x2 3x3', '2x1 1x2 5x3', '1x1 3x2 4x3'}, 'world_ex': [1, 1, 2, 3, 3, 3, 3, 3]}, 20: {'class_prob': 0.00012433528900146484, 'count': 266, 'possible_values': {'2x1 6x3', '1x1 2x2 5x3', '4x2 4x3'}, 'world_ex': [1, 1, 3, 3, 3, 3, 3, 3]}, 21: {'class_prob': 2.3365020751953125e-05, 'count': 112, 'possible_values': {'3x2 5x3', '1x1 1x2 6x3'}, 'world_ex': [1, 2, 3, 3, 3, 3, 3, 3]}, 22: {'class_prob': 4.5299530029296875e-06, 'count': 36, 'possible_values': {'2x2 6x3', '1x1 7x3'}, 'world_ex': [1, 3, 3, 3, 3, 3, 3, 3]}, 23: {'class_prob': 4.76837158203125e-07, 'count': 8, 'possible_values': {'1x2 7x3'}, 'world_ex': [2, 3, 3, 3, 3, 3, 3, 3]}, 24: {'class_prob': 5.960464477539063e-08, 'count': 1, 'possible_values': {'8x3'}, 'world_ex': [3, 3, 3, 3, 3, 3, 3, 3]}}
# returns the class with the highest probability def most_probable_classes(classes): keys = [] prob = 0 for k in classes: if prob == classes[k]["class_prob"]: keys.append(k) if prob < classes[k]["class_prob"]: keys = [k] prob = classes[k]["class_prob"] return { "keys": keys, "prob": prob } # compute the most probable answers for a given aggregate function def most_probable_ans(dist, n, agg): classes = world_classes(dist, n, agg) answers = [] mc = most_probable_classes(classes) for k in mc["keys"]: answers.append({ "ans": k, "prob": mc["prob"], "possible_values": classes[k]["possible_values"] }) return answers print(most_probable_ans(dist, n, sum))
[{'ans': 10, 'prob': 0.2113494873046875, 'possible_values': {'7x1 1x3', '6x1 2x2'}}]
# compute the answer the most correct answer for a given aggregate function def most_correct_ans(dist, n, agg): classes = world_classes(dist, n) answers = [] mc = most_probable_classes(classes) for k in mc["keys"]: world_of_most_probable_class = classes[k]["world_ex"] answers.append({ "ans": agg(world_of_most_probable_class), "prob": mc["prob"], "possible_values": k }) return answers print(most_correct_ans(dist, n, sum))
[{'ans': 11, 'prob': 0.155731201171875, 'possible_values': '6x1 1x2 1x3'}]
The case with another distribution. It was almost a good example, but there are two most probable answers :
dist = { 0 : 0.5, 1 : 0.25, 2 : 0.25 } n = 4 print(most_correct_ans(dist, n, sum)) print(most_probable_ans(dist, n, sum))
[{'ans': 3, 'prob': 0.1875, 'possible_values': '2x0 1x1 1x2'}] [{'ans': 2, 'prob': 0.21875, 'possible_values': {'2x0 2x1', '3x0 1x2'}}, {'ans': 3, 'prob': 0.21875, 'possible_values': {'1x0 3x1', '2x0 1x1 1x2'}}]
import statistics dist = { 1 : 0.75, 2 : 0.125, 3 : 0.125 } n = 8 print(most_correct_ans(dist, n, statistics.median))
[{'ans': 1.0, 'prob': 0.8861846923828125, 'possible_values': {'6x1 1x2 1x3', '5x1 3x3', '8x1', '5x1 3x2', '5x1 1x2 2x3', '7x1 1x2', '5x1 2x2 1x3', '6x1 2x2', '7x1 1x3', '6x1 2x3'}}]