from math import log
import codecs

"""
Result:
    This class is used to store data about a given instance of an ambiguous word.
    guessDicts is a dictionary of dictionaries, keyed by classifier:
        guessDicts[cfier][guess] = a float, the confidence in that guess

Parameters:
    answers: A list of strings of the correct senses for this ambiguous word.
"""
class Result:
    def __init__(self, answers):
        self.answers = answers
        self.word = "_none_"
        self.guessDicts = {'bayes': {}, 'cos': {}, 'dlist': {}}
        
    def __getitem__(self, key):
        return self.guessDicts[key]
        
    def setAnswers(self, answers, word):
        self.answers = answers
        self.word = word
        
    def addGuess(self, cfier, guess, conf):
        if self.guessDicts.has_key(cfier):
            self.guessDicts[cfier][guess] = conf 
        else:
            self.guessDicts[cfier] = {guess:conf}
        
"""
Entry:
    This class is used to represent a given word in the context of an ambiguous word.
        
Parameters:
    word: a string, the ambiguous word
    tags: a dictionary of all the features of the word
        tags[head] = True if this is the ambiguous word
        tags['senses'] is a list of strings of the correct senses
"""
class Entry:
    def __init__(self, word, tags):
        self.word = word
        self.tags = tags

    def __str__(self):
        return '(%s, (%s))' % (codecs.encode(self.word, 'utf8'), \
           codecs.encode(', '.join(['%s=%s' % (x,y) for x,y in \
                                                    self.tags.items()]), 'utf8'))

"""
Instance:
    This class is used to represent an instance of an ambiguous word.
        
Parameters:
    context: a list of Entries representing the context of the ambiguous word
    offset:  the index of the ambiguous word in the context
    head:    the Entry of the ambiguous word
"""
class Instance:
    def __init__(self, context, offset, head):
        self.context = context
        self.offset = offset
        self.head = head

    def __str__(self):
        return '(%s, %d, (%s))' % \
            (codecs.encode(self.head.word, 'utf8'), self.offset, \
             ', '.join(['%s' % x for x in self.context]))

"""
Rule:
    This class is used to represent a rule for a decision list algorithm
        
Parameters:
    feature: a string, the feature that is being "decided" on
    sense:   the sense that will be chosen if this rule applies
    weight:  the likelihood that the sense is correct, given the feature
"""
class Rule:
    def __init__(self, feature, sense, weight):
        self.feature = feature
        self.sense = sense
        self.weight = weight
        
"""
ConfDict:
    This class is used to store information used to convert classifier
    scores into comparable confidences.  It takes classifier scores that
    have been normalized to be between 0 and 1, and it puts them into ten
    "buckets".  We then calculate the accuracy of the guesses in each "bucket",
    and use that accuracy for a confidence of the scores in each "bucket".
        
"""
class ConfDict:
    def __init__(self):
        numBuckets = 10
        self.cDict = {}
        self.fraction = 1.0 / float(numBuckets)
        for i in range(numBuckets):
            self.cDict[i] = {'correct': 0.0, 'total': 0.0, 'prob': 0.0}
        self.cDict[-1] = {'correct': 0.0, 'total': 0.0, 'prob': 0.0}
        self.cDict[10] = {'correct': 0.0, 'total': 0.0, 'prob': 1.0}
        
    def __getitem__(self, index):
        return self.cDict[index]['prob']
        
    """
    addConf:
        This function converts the confidence to the correct bucket and 
        increments the total number of guesses and the number of correct
        guesses for that bucket.

    Parameters:
        confidence: the normalized (between 0 and 1) confidence
        correct:    a boolean, whether the guess is correct
    """
    def addConf(self, confidence, correct):
        index = 0.0
        #selects the correct bucket
        while confidence > self.fraction:
            confidence -= self.fraction
            index += self.fraction
        index = max(0, min(int(10.1*index), 9))
        self.cDict[index]['total'] += 1.0
        if correct:  
            self.cDict[index]['correct'] += 1.0

    """
    countToProb:
        This function converts the counts of guesses in the confDict to
        probabilities.  It sets self.prob to the correct value.
    """
    def countToProb(self):
        for key in self.cDict.iterkeys():
            if self.cDict[key]['total'] > 0.0:
                self.cDict[key]['prob'] = \
                    self.cDict[key]['correct'] / self.cDict[key]['total']
            else:
                self.cDict[key]['prob'] = 0.0
            
    def writeToFile(self, filename):
        fileArray = []
        for key in self.cDict.iterkeys():
            fileArray.append("%f\t%f\t%f\t\n" % (key, self.cDict[key]['prob'], \
                                               self.cDict[key]['total']))
        fp = open(filename, "w")
        fp.writelines(fileArray)
        fp.close()

"""
Word:
    This class is used to store data about a given lexical element.
    It has a dictionary of Senses (sDict), a list of words that have been seen
    in any context of this word (cList), and other data structures.
        
Parameters:
    alpha: the alpha value used for alpha-smoothing in Bayes
    word:  a string, the ambiguous word
"""
class Word:
    def __init__(self, alpha, word = "__NONE__"):
        self.word = word
        self.cDict = {}
        self.cList = []
        self.sDict = {}
        self.idfVector= []
        self.ruleDict = {}
        self.ruleList = []
        self.fDict = {}    
        self.alpha = alpha
    
    """
    incFeature:
        This function increments the fDict, which stores the frequency of 
        each feature seen in the context of this word.
        
    Parameters:
        feature: the feature in question
        count:   the number of times that feature was encountered
    """
    def incFeature(self, feature, count):
        if self.fDict.has_key(feature):
            self.fDict[feature] += count
        else:
            self.fDict[feature] = count
    
    """
    addRule:
        This function adds a new rule to the ruleDict
        
    Parameters:
        feature: a string, the feature in question
        sense:   a string, the sense that will be chosen if this rule applies
        weight:  a float, the likelihood that the sense is correct, 
                 given the feature
    """
    def addRule(self, feature, sense, weight):
        alpha = self.alpha
        weight = (weight + alpha) / (self.fDict[feature] + len(self.sDict)*alpha)
        if self.ruleDict.has_key(feature):
            if self.ruleDict[feature].weight < weight:
                self.ruleDict[feature] = Rule(feature, sense, weight)
        else:
            self.ruleDict[feature] = Rule(feature, sense, weight)
            
    """
    ruleDictToList:
        This function converts the ruleDict to a list, sorted by weight
    """
    def ruleDictToList(self):
        for key in self.ruleDict.iterkeys():
            rule = self.ruleDict[key]
            self.ruleList.append(rule)
        self.ruleList.sort(lambda x, y: cmp(y.weight, x.weight))
    
    """
    ruleDictToList:
        This function returns the first rule that has a feature in this context
        and the given sense as a guess
        
    Parameters:
        sense:     a string, the sense being guessed
        featCount: a dictionary of the features seen in a given context
        
    Returns:
        rule: a Rule with the feature, sense, and weight chosen
    """
    def getDLGuess(self, sense, featCount):
        for rule in self.ruleList:
            if featCount.has_key(rule.feature) and rule.sense == sense:
                return rule
        return Rule("blaha", sense, 1. / len(self.sDict))
    
    """
    ruleDictToList:
        This function adds the words that have been seen to the cDict
        
    Parameters:
        wordList: a list of words seen in the context of this word
    """
    def addToContext(self, wordList):
        for word in wordList:
            if not self.cDict.has_key(word):
                self.cDict[word] = word
                
    """
    contextDictToList:
        This function converts the contextDict to a list.  It also creates the
        IDF vector
    """
    def contextDictToList(self):
        self.cList = self.cDict.keys()
        numSenses = float(len(self.sDict))
        for word in self.cList:
            self.incIDF(word, numSenses)
    
    """
    incIDF:
        This function adds a word to the IDF vector.
        
    Parameters:
        word:      a string, the word to add to the IDF vector.
        numSenses: an int, the total number of senses of the ambiguous word
    """
    def incIDF(self, word, numSenses):
        value= 0.0
        for sense in self.sDict.itervalues():
            if sense.featCount.has_key(word):
                value += 1.0
        if value == 0.0:
            print "word: %s numSenses: %d" % (word, numSenses)
        else:
            self.idfVector.append(log(numSenses / value))
    
    """
    createWordVectors:
        This function creates the representative vectors of each Sense.
    """
    def createWordVectors(self):
        for sense in self.sDict.itervalues():
            sense.wordVector = []
            for i in range(len(self.cList)):
                if sense.featCount.has_key(self.cList[i]):
                    value = float(sense.featCount[self.cList[i]]) * self.idfVector[i]
                else:
                    value = 0.0
                sense.wordVector.append(value)
 
"""
Sense:
    This class is used to represent a sense of an ambiguous word
        
Parameters:
    label: a string, the sense
"""               
class Sense:
    def __init__(self, label = "__NONE__"):
        self.label = label
        self.prob = 0
        self.score = 0
        self.occurrences = 0.0
        self.featCount = {}
        self.binRuleDict = {}
        self.binRuleList = []
        self.wordVector = []
        self.nWords = 0.0
        self.alpha = .0001
        
    def setAlpha(self, alpha):
        self.alpha = alpha
    
    def setProb(self, prob):
        self.occurances = 0.0
        self.prob = prob
    
    def resetScore(self):
        self.score = log(self.prob)
    
    def incOccurances(self):
        self.occurrences += 1.0
    
    """
    incScore:
        This function increments the Bayesian score of the sense, including alpha
        smoothing.
       
    Parameters:
        features: a list of features seen in the context
    """
    def incScore(self, features):
        score = 0.0
        for key in features:
            if self.featCount.has_key(key):
                score += self.featCount[key]
        self.score += log(self.smooth(score))
        
    def smooth(self, score):
        return(score + self.alpha) / (self.nWords + len(self.featCount)*self.alpha)
    
    """
    addWords:
        This function adds a list of features to the featCount dictionary of words
        that have been seen in the contexts of this sense.
        
    Parameters:
        features: a list of features seen in the context
    """        
    def addWords(self, features):
        for key in features:
            if self.featCount.has_key(key):
                    self.nWords += 1.0
                    self.featCount[key] += 1.0
            else:
                    self.nWords += 1.0
                    self.featCount[key] = 1.0
  
"""
Weight:
    This class is used to represent a weight for a classifier score
"""  
class Weight:
    def __init__(self):
        self.correct = 0.0
        self.total = 0.0
        self.prob = 0.0
    
    """
    addWord:
        This function scores a guess by incrementing the total counter and possibly
        the correct counter.
    
    Parameters:
        isCorr: a boolean, whether the classifier was correct on this word or not
    """
    def addWord(self, isCorr):
        if isCorr:
            self.correct += 1.0
        self.total += 1.0
        
    def calcProb(self):
        self.prob = self.correct/self.total

