Source code for pysatSpaceWeather.instruments.methods.lisird

#!/usr/bin/env python
# -*- coding: utf-8 -*-.
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3986138
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Provides support functions for the LASP LISIRD data base."""

import datetime as dt
import json
import numpy as np
import os
import pandas as pds
import requests

import pysat

ackn = "".join(["LASP Interactive Solar Irradiance Data Center provides ",
                "access to many solar datasets generated by researchers at ",
                "LASP and other institutions."])


[docs] def references(platform, name, tag, inst_id): """Provide references for different Instrument data products. Parameters ---------- platform : str Instrument platform name : str Instrument name tag : str Instrument tag inst_id : str Instrument ID Returns ------- refs : str String of references """ refs = {'sw': {'mgii': { 'composite': { '': ''.join(["Viereck, R. A., Floyd, L. E., Crane, P. C., Woods, ", "T. N., Knapp, B. G., Rottman, G., Weber, M., Puga,", " L. C., and DeLand, M. T. (2004), A composite Mg ", "II index spanning from 1978 to 2003, Space Weather", ", 2, S10005, doi:10.1029/2004SW000084."])}, 'sorce': { '': "\n".join([ "".join(["Snow, M, William E. McClintock, Thomas N. Woods, ", "Oran R. White, Jerald W. Harder, and Gary Rottman ", "(2005). The Mg II Index from SORCE, Solar Phys., ", "230, 1, 325-344."]), "".join(["Heath, D. and Schlesinger, B. (1986). The Mg 280-nm ", "doublet as a monitor of changes in solar ", "ultraviolet irradiance, JGR, 91, 8672-8682."])])}}}} return refs[platform][name][tag][inst_id]
[docs] def build_lisird_url(lisird_data_name, start, stop): """Build a LASP LISIRD direct download URL. Parameters ---------- lisird_data_name : str Name of the data set on the LISARD server start : dt.datetime Start time stop : dt.datetime Stop time Returns ------- url : str URL that will download the desired data """ # Define the formatting for the start and stop times tfmt = "%Y-%m-%dT%H:%M:%S.000Z" url = "".join(["https://lasp.colorado.edu/lisird/latis/dap/", lisird_data_name, ".json?&time>=", start.strftime(tfmt), '&time<=', stop.strftime(tfmt), "&format_time(yyyy-MM-dd'T'HH:mm:ss.SSS)"]) return url
[docs] def download(date_array, data_path, local_file_prefix, local_date_fmt, lisird_data_name, freq, update_files=False, fill_vals=None, mock_download_dir=None): """Download routine for LISIRD data. Parameters ---------- date_array : array-like Sequence of dates for which files will be downloaded. data_path : str Path to data directory. local_file_prefix : str Prefix for local files, e.g., 'tag_' or 'tag_monthly_' local_date_fmt : str String format for the local filename, e.g., '%Y-%m-%d' or '%Y-%m' lisird_data_name : str Name of the data set on the LISARD server freq : pds.DateOffset or dt.timedelta Offset to add to the start date to ensure all data is downloaded (inclusive) update_files : bool Re-download data for files that already exist if True (default=False) fill_vals : dict or NoneType Dict of fill values to replace with NaN by variable name or None to leave alone (default=None) mock_download_dir : str or NoneType Local directory with downloaded files or None. If not None, will process any files with the correct name and date (following the local file prefix and date format) as if they were downloaded (default=None) Raises ------ IOError If there is a gateway timeout when downloading data or an unknown mock download directory is supplied. KeyError If the `fill_vals` input does not match the downloaded data. """ # If a mock download directory was supplied, test to see it exists if mock_download_dir is not None: if not os.path.isdir(mock_download_dir): raise IOError('file location is not a directory: {:}'.format( mock_download_dir)) # Initialize the fill_vals dict, if necessary if fill_vals is None: fill_vals = {} # Cycle through all the dates for dl_date in date_array: # Build the local filename fname = ''.join([local_file_prefix, dl_date.strftime(local_date_fmt), '.txt']) local_file = os.path.join(data_path, fname) # Determine if the download should occur if update_files or not os.path.isfile(local_file): if mock_download_dir is None: # Get the URL for the desired data url = build_lisird_url(lisird_data_name, dl_date, dl_date + freq) # The data is returned as a JSON file req = requests.get(url) # Process the JSON file if req.text.find('Gateway Timeout') >= 0: raise IOError(''.join(['Gateway timeout when requesting ', 'file using command: ', url])) # Load the dict if text was retrieved json_dict = json.loads(req.text) if req.ok else {'': {}} else: # Get the local repository filename url = os.path.join(mock_download_dir, fname) if os.path.isfile(url): with open(url, 'r') as fpin: raw_txt = fpin.read() json_dict = json.loads(raw_txt) else: json_dict = {'': {}} if lisird_data_name in json_dict.keys(): raw_dict = json_dict[lisird_data_name] data = pds.DataFrame.from_dict(raw_dict['samples']) if data.empty: pysat.logger.warning("no data for {:}".format(dl_date)) else: # The URL specifies the time format, so break it down frac_sec = [int(tval.split('.')[-1]) for tval in data['time']] times = [dt.datetime.strptime(tval.split('.')[0], '%Y-%m-%dT%H:%M:%S') + dt.timedelta(microseconds=frac_sec[i] * 6) for i, tval in enumerate(data.pop('time'))] data.index = times # Replace fill value with NaNs for var in fill_vals.keys(): if var in data.columns: idx, = np.where(data[var] == fill_vals[var]) data.iloc[idx, :] = np.nan else: raise KeyError(''.join(['unknown fill value ', 'variable name supplied: ', var])) # Create a local CSV file data.to_csv(local_file, header=True) else: if len(json_dict.keys()) == 1 and '' in json_dict.keys(): pysat.logger.info("".join(["Data not downloaded for ", dl_date.strftime("%d %b %Y"), ", date may be out of range ", "for the database or data may ", "have been saved to an ", "unexpected filename: ", url])) else: raise IOError(''.join(['Returned unexpectedly formatted ', 'data using command: ', url])) return