Source code for fetch_data.catalog

"""
Catalog
-------
Contains functions relating to reading in catalog files (YAML)
and ensuring that the entries are complete with metadata
"""


[docs]def read_catalog(catalog_name): """ Used to read YAML files that contain download information. Placeholders for ENV names can also be used. See dotenv documentation for more info. The yaml files are structured as shown below .. code-block:: yaml url: remote path to file/s. Can contain * dest: path where the file/s will be stored (supports ~) meta: # meta will be written to README.txt doi: url to the data source description: info about the data citation: how to cite this dataset Args: catalog_name (str): the path to the catalog Returns: YAMLdict: a dictionary with catalog entries that is displayed as a YAML file. Can be viewed as a dictionary with the :code:`dict` method. """ from dotenv import find_dotenv from envyaml import EnvYAML catalog_raw = EnvYAML( yaml_file=catalog_name, env_file=find_dotenv(), strict=True ).export() catalog = {} for k in catalog_raw: if ("." not in k) and (isinstance(catalog_raw[k], dict)): catalog[k] = catalog_raw[k] catalog = YAMLdict(catalog) return catalog
[docs]class YAMLdict(dict): """ A class that displays a dictionary in YAML format. The object is still a dictionary, it is just the representation that is displayed as a YAML dictionary. This makes it useful to create your own catalogs. You can use the method YAMLdict.dict to view the object in dictionary representation """ def _strip_strings(self, d): """Removes extra lines and spaces from strings in a dictionary""" for k, v in d.items(): if isinstance(v, dict): d[k] = self._strip_strings(v) elif isinstance(v, str): d[k] = v.strip() else: d[k] = v return d @property def dict(self): """returns a dictionary representation""" return dict(self) def _repr_html_(self): """returns YAML representation of a dictionary""" import yaml from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter import IPython dictionary = self._strip_strings(self) text = yaml.dump(self, default_flow_style=False, sort_keys=False) text = "\n".join([t[2:] for t in text.split("\n")[2:]]) lexer = get_lexer_by_name("yaml") formatter = HtmlFormatter() output = IPython.display.HTML( '<style type="text/css">{style}</style>{content}'.format( style=formatter.get_style_defs(".highlight"), content=highlight(text, lexer, formatter), ) ).data return output