import application.db2 as db
from flask import g
import nltk as nltk
from nltk.corpus import stopwords
import time
import heapq
from collections import Counter
import background.migrate


def search2(inputArr):
    indx = {}
    dbSession = db.Session()
    for inpu in inputArr:
        x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
            db.Ingredient).join(db.RecIngred).join(db.Recipe).all()

        indx[inpu] = [str(y[1]) for y in x]

    return(indx)


def stemInput(inputArr):
    inputArr2 = []

    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    for word in inputArr:
        if word in stopset:
            continue
        inputArr2.append(snowball.stem(word))
    return inputArr2
#


def getRecDict2(indx, inputArr):
    dbSession = db.Session()

    outDict = {}
    # 2d to 1d
    indx = sum(indx.values(), [])
    k = Counter(indx)
    indx = k.most_common(1000)
    indx = dict(indx)

    ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name).filter(
        db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
    ingredDict = {}
    # RezeptID, stemmed Ingred, full ingred Name
    # Dict spiegelt DB wieder, key, full ingred, stemmed
    for k, v, i in ingred:
        if k not in ingredDict:
            ingredDict[k] = {}
        if i not in ingredDict[k]:
            ingredDict[k][i] = []
        ingredDict[k][i].append(v)
    inputArr += defaultArr

    # checks overlay per recipeID 
    # itareate over ingreds and checks per stemmed ingred
    # returns accurate percentage of overlay
    # since overlay scare is the key of dict it is reduced by insignificant number to preserve all values  
    for key, value in ingredDict.items():
        overlay, missing = calcOverlay2(inputArr, value)
        while overlay in outDict.keys():
            overlay -= 0.0001
        outDict[overlay] = (int(key), missing)
    
    # return Dict with 20 highest value keys
    outDict2 = {}
    for key in heapq.nlargest(20, outDict.keys()):
        key2 = outDict[key][0]
        missing = outDict[key][1]
        rec = dbSession.query(db.Recipe).filter(
            db.Recipe.recipe_id == key2).first()
        outDict2[key] = (key2, rec.name, rec.url,  [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
                                                                                                db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], missing)
    return outDict2


def stem(l1):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
    l1 = [snowball.stem(l) for l in l1]
    return l1


def calcOverlay2(l1, l2):
    '''Calculates overlay and returns missing ingredients, [score (float), missing([])]'''
    counter = 0
    notIn = []
    for key, ll in l2.items():
        missing = True
        for l in ll:
            if l in l1:
                counter += 1
                missing = False
                break
        if missing:
            notIn.append(key)

    counter = counter / len(l2)
    return counter, notIn


# it is assumed that everyone has this
defaultArr = ["Wasser", "salz", "pfeffer"]