Source code for pyfdc.pyfdc

# Accesses the food search endpoint
import collections
from typing import Sequence
import requests
import json
from pandas import DataFrame, json_normalize
from itertools import chain
from utils import key_signup
import os
from warnings import warn
import re 


[docs]class FoodDataCentral(object): """ This class provides access to and manipulation of the Food Data Central food search and details end points. For more details, please see: https://fdc.nal.usda.gov/api-guide.html This uses version one of the API access point. """ def __init__(self, api_key=None): if api_key is None: self.api_key = os.environ.get("pyfdc_key") if "pyfdc_key" in os.environ else key_signup() else: warn("Providing an api_key is discouraged, please consider using set_api_key.") self.api_key = api_key self.base_url = f"https://api.nal.usda.gov/fdc/v1/foods/search?api_key={self.api_key}" # alias camel with snake case # Allow for users to see what keys we have. self.available_targets = {"fdc_id": 'fdcId', "description": 'description', "scientific_name": 'scientificName', "common_names": 'commonNames', "additional_descriptions": 'additionalDescriptions', "gtin_upc": 'gtinUpc', "ndb_number": 'ndbNumber', "published_date": 'publicationDate', "brand_owner": 'brandOwner', "ingredients": 'ingredients', "score": 'score'}
[docs] def get_food_info_internal(self, search_phrase=None, ingredients=None, brand_owner=None, target=None, page_number=None, page_size=50, sort_field=None, sort_direction='asc'): """ :param brand_owner: str Defaults to None :param ingredients: str to limit the search to certain ingredients :param search_phrase: str A search phrase eg "chicken" :param target: A string or list specifying which of the available values should be returned. :param page_number: Page number. Defaults to 1. :param page_size: Number of results returned :param sort_field: A string specifying which field to use to sort the returned results. :param sort_direction: One of "asc" or "desc" to indicate an ascending or descending sort respectively. :return: A generator object with the required results. """ assert page_number is not None and isinstance(page_number, int), \ f"page_number should be an int not {type(page_number).__name__} " search_query = {'query': search_phrase, 'ingredients': ingredients, 'pageSize': page_size, 'pageNumber': page_number, 'sortBy': sort_field, 'sortOrder': sort_direction, 'brandOwner': brand_owner} # docs # https://fdc.nal.usda.gov/api-spec/fdc_api.html#/FDC/postFoodsSearch try: url_response = requests.get(self.base_url, params=search_query, headers={"User-Agent": "Mozilla-5.0"}) url_response.raise_for_status() unprocessed_result = json.loads(url_response.content)["foods"] except requests.exceptions.HTTPError: raise else: for x in unprocessed_result: yield [val for key_id, val in x.items() if key_id == self.available_targets[target]]
[docs] def get_food_info(self, search_phrase=None, target_fields=None, ingredients=None, brand_owner=None, page_number=1, page_size=50, sort_field=None, sort_direction='asc'): """ :param search_phrase: A character string to search for. :param target_fields: A list of targets eg ['fdc_id','description'] :param brand_owner: str Defaults to None :param ingredients: str to limit the search to certain ingredients :param search_phrase: str A search phrase eg "chicken" :param page_number: Page number. Defaults to 1. :param page_size: Number of results returned :param sort_field: A string specifying which field to use to sort the returned results. :param sort_direction: One of "asc" or "desc" to indicate an ascending or descending sort respectively. :return: A pandas DataFrame """ # TODO: Avoid two functions when one will do aka drop get_food_info_internal result = [] # Check that page number is not none and is an int (for now) if target_fields is None: warn("No target_fields were provided, returning fdc_id, ingredients, and description.") target_fields = ["fdc_id", "ingredients", "description"] if not isinstance(target_fields, (list, tuple)): raise TypeError(f"target should be a list or tuple not {type(target_fields).__name__}") for target_key in target_fields: if target_key not in self.available_targets.keys(): raise KeyError(f"target_key should be one of {self.available_targets.keys()} not {target_key}") result.append(list(self.get_food_info_internal(search_phrase=search_phrase, target=target_key, ingredients=ingredients, brand_owner=brand_owner, page_number=page_number, page_size=page_size, sort_field=sort_field, sort_direction=sort_direction))) return DataFrame(list(map(lambda x: list(chain.from_iterable(x)), result)), index=target_fields).transpose()
[docs] def get_food_details(self, fdc_id=None, target_field=None, result_format="full",nutrients=None): """ Accesses the FoodDetails EndPoint :param fdc_id: A FoodDataCentral Food ID :param target_field: A string indicating which field to return e.g nutrients If none is provided, a low level result will be returned :return: A DataFrame object with the desired results. """ try: # base_url = f"https://api.nal.usda.gov/fdc/v1/{fdc_id}?api_key={self.api_key}" # Replace in base url so we have only for a specific FDC ID. assert fdc_id is not None, "fdc_id should not be None" assert isinstance(fdc_id, int), f"fdc_id should be an int not {type(fdc_id).__name__}" base_url = self.base_url.replace("foods/search", f"food/{fdc_id}") base_url = base_url + "&format=" + result_format # print(base_url) if nutrients: base_url = base_url + "&nutrients=" + ",".join(nutrients) url_response = requests.get(base_url, headers={"User-Agent": "Mozilla-5.0"}) url_response.raise_for_status() result = url_response.json() except requests.exceptions.HTTPError: raise except AssertionError: raise else: if target_field is None: warn("No target_field was provided, returning low level results.") # Return a low level result that contains everything if it is not empty return DataFrame([(key, value) for key, value in result.items() if value]) else: # if len(target_field) > 1: # warn("More than one target field was requested, returning only the first") if target_field == "nutrients": result = json_normalize(result["foodNutrients"]) return result if target_field == "label_nutrients": if not "labelNutrients" in result.keys(): raise KeyError(f"FDC ID {fdc_id} has no label nutrients.") label_nutrients_df = json_normalize(result["labelNutrients"]) label_nutrients_df.columns = [re.sub(".value", "", x) for x in label_nutrients_df] return label_nutrients_df else: return result[target_field]