
// It's easier for me to see the data this way, but I'll sort into a flatter and easier
// to sort and search structure.
const data = {

    // I'm prepending a character to make sorting easier.

    a_cleaning: [
        'dishwasher soap', 'dish pellets', 'cascade',
        'laundry detergent',
        'tide',
        'toilet paper',
        'paper towel'
    ],
    b_grooming: [
        'soap',
        'shampoo',
        'conditioner',
        'deodorant',
        'toothbrush',
        'toothpaste',
        'floss'
    ],
    c_snacks: [
        'soda',
        'pepsi',
        'coke',
        'distilled water',
        'potato chips',
        'la croix',
        'chips', // 'blue mountain' // chips
        'tortillas chips',
        'cheezits',
        'goldfish'
    ],
    e_cereals: [
        'cheerios',
        'kix',
        'oatmeal',
        'corn flakes',
        'granola',
        'peanut butter',
    ],
    i_cans: [
        'cans',
        'soup',
        'black beans',
        'kidney beans',
        'garbanzo beans',
        'daddy beans',
    ],
    m_baking: [
        'flour',
        'brown sugar',
        'condensed milk',
        'vanilla extract',
        'cocoa powder',

        // list of spices
        "cumin",
    ],
    p_produce: [
        'alfalfa sprouts',
        'apple',
        'apricot',
        'asparagus', 'asparagas',
        'banana',
        'bell pepper',
        'broccoli', 'brocolli', 'broccolli',
        'brussel sprouts',
        'cabbage',
        'carrots',
        'celery',
        'corn',
        'garlic',
        'green beans',
        'mushrooms',
        'onions', 'onion', 'red onion',
        'orange',
        'potato', 'potatoes',
        'spinach',
        'tomato',
        'zucchinni', 'zuchinni', 'zucchini',
        'watermelon',
        'tofu'
    ],
    s_dairy: [
        'milk',
        'yogurt',
        'cheese',
        'butter',
        'sour cream',
        'cream',
        'rediwhip', 'redi-whip', 'reddi-wip',
    ],
    t_meats: [
        'beaf',
        'chicken breast',
        'chicken',
        'bacon',
        'salmon',
        'sausage'
    ],
    v_frozen: [
        'frozen',
        'ice cream'
    ]
};

function createFlatList(rawData) {
    const flatlist = {};

    Object.keys(rawData).forEach(function(key,index) {
        rawData[key].forEach(function (term, index) {
            flatlist[term] = key;
        });
    });

    return flatlist;
}

function createSortedKeys(flatlist) {
    return Object.keys(flatlist).sort();
}

const dataFlat = createFlatList(data);
const dataSortedKeys = createSortedKeys(dataFlat);

export function findCategory(itemName) {
    const name = itemName.toLowerCase();

    //let multiplePhraseMatch = false;
    let phraseMatch = null;
    const iMax = dataSortedKeys.length;
    for (let i = 0; i < iMax; i++) {
        const term = dataSortedKeys[i];
        // if we have an exact match, just return
        if (term === name) {
            return dataFlat[term];
        }

        // may happen more than once, we don't have a heuristic to solve for that yet
        if (name.indexOf(term) >= 0) {
            //if (phraseMatch) {
                // if we map multiple phrases, is it better to not bucket, or risk bucketing wrong?
            //    multiplePhraseMatch = true;
            //}
            phraseMatch = term;

            // I think we probably need to introduce 'weighting', eg. 'can' should weight heavier than 'cream' in 'cream of chicken soup'

            // Useful for debugging ...
            //console.log(`${itemName}: ${term}`);
        }
    }

    // if we haven't found an exact match, return the best we've found
    return phraseMatch ? dataFlat[phraseMatch] : null;
}
