Source code for pysatSpaceWeather.instruments.methods.f107

#!/usr/bin/env python
# -*- coding: utf-8 -*-.
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3986138
#
# Review Status for Classified or Controlled Information by NRL
# -------------------------------------------------------------
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------

"""Routines for the F10.7 solar index."""

import datetime as dt
import numpy as np

import pandas as pds
import pysat

import pysatSpaceWeather as pysat_sw
from pysatSpaceWeather.instruments.methods.general import is_fill_val



[docs]
def acknowledgements(tag):
    """Define the acknowledgements for the F10.7 data.

    Parameters
    ----------
    tag : str
        Tag of the space weather index

    Returns
    -------
    ackn : str
        Acknowledgements string associated with the appropriate F10.7 tag.

    """
    lisird = 'NOAA radio flux obtained through LISIRD'
    swpc = ''.join(['Prepared by the U.S. Dept. of Commerce, NOAA, Space ',
                    'Weather Prediction Center'])

    ackn = {'historic': lisird, 'prelim': swpc, 'daily': swpc,
            'forecast': swpc, '45day': swpc,
            'now': pysat_sw.instruments.methods.gfz.ackn}

    return ackn[tag]




[docs]
def references(tag):
    """Define the references for the F10.7 data.

    Parameters
    ----------
    tag : str
        Instrument tag for the F10.7 data.

    Returns
    -------
    refs : str
        Reference string associated with the appropriate F10.7 tag.

    """
    noaa_desc = ''.join(['Dataset description: ',
                         'https://www.ngdc.noaa.gov/stp/space-weather/',
                         'solar-data/solar-features/solar-radio/noontime-flux',
                         '/penticton/documentation/dataset-description',
                         '_penticton.pdf, accessed Dec 2020'])
    orig_ref = ''.join(["Covington, A.E. (1948), Solar noise observations on",
                        " 10.7 centimetersSolar noise observations on 10.7 ",
                        "centimeters, Proceedings of the IRE, 36(44), ",
                        "p 454-457."])
    swpc_desc = ''.join(['Dataset description: https://www.swpc.noaa.gov/',
                         'sites/default/files/images/u2/Usr_guide.pdf'])
    gfz_desc = ''.join(['Dataset description: https://kp.gfz-potsdam.de/app/',
                        'format/Kp_ap_Ap_SN_F107_format.txt'])

    refs = {'historic': "\n".join([noaa_desc, orig_ref]),
            'prelim': "\n".join([swpc_desc, orig_ref]),
            'now': "\n".join([gfz_desc, orig_ref]),
            'daily': "\n".join([swpc_desc, orig_ref]),
            'forecast': "\n".join([swpc_desc, orig_ref]),
            '45day': "\n".join([swpc_desc, orig_ref])}

    return refs[tag]




[docs]
def combine_f107(standard_inst, forecast_inst, start=None, stop=None):
    """Combine the output from the measured and forecasted F10.7 sources.

    Parameters
    ----------
    standard_inst : pysat.Instrument or NoneType
        Instrument object containing data for the 'sw' platform, 'f107' name,
        and 'historic', 'prelim', 'now', or 'daily' tag
    forecast_inst : pysat.Instrument or NoneType
        Instrument object containing data for the 'sw' platform, 'f107' name,
        and 'now', 'prelim', '45day' or 'forecast' tag
    start : dt.datetime or NoneType
        Starting time for combining data, or None to use earliest loaded
        date from the pysat Instruments (default=None)
    stop : dt.datetime or NoneType
        Ending time for combining data, or None to use the latest loaded date
        from the pysat Instruments (default=None)

    Returns
    -------
    f107_inst : pysat.Instrument
        Instrument object containing F10.7 observations for the desired period
        of time, merging the standard, 45day, and forecasted values based on
        their reliability

    Raises
    ------
    ValueError
        If appropriate time data is not supplied, or if the date range is badly
        formed.

    Notes
    -----
    Merging prioritizes the standard data, then the 45day data, and finally
    the forecast data.

    Will not attempt to download any missing data, but will load data.

    If no data is present, but dates are provided, supplies a series of fill
    values.

    """

    # Initialize metadata and flags
    notes = "Combines data from"
    stag = standard_inst.tag if len(standard_inst.tag) > 0 else 'default'
    tag = 'combined_{:s}_{:s}'.format(stag, forecast_inst.tag)
    inst_flag = 'standard'

    # If the start or stop times are not defined, get them from the Instruments
    if start is None:
        stimes = [inst.index.min() for inst in [standard_inst, forecast_inst]
                  if len(inst.index) > 0]
        start = min(stimes) if len(stimes) > 0 else None

    if stop is None:
        stimes = [inst.index.max() for inst in [standard_inst, forecast_inst]
                  if len(inst.index) > 0]
        stop = max(stimes) + pds.DateOffset(days=1) if len(stimes) > 0 else None

    if start is None or stop is None:
        raise ValueError(' '.join(("must either load in Instrument objects or",
                                   "provide starting and ending times")))

    if start >= stop:
        raise ValueError("date range is zero or negative")

    # Initialize the output instrument
    f107_inst = pysat.Instrument()
    f107_inst.inst_module = pysat_sw.instruments.sw_f107
    f107_inst.tag = tag
    f107_inst.date = start
    f107_inst.doy = np.int64(start.strftime("%j"))
    fill_val = None

    f107_times = list()
    f107_values = list()

    # Cycle through the desired time range
    itime = dt.datetime(start.year, start.month, start.day)
    while itime < stop and inst_flag is not None:
        # Load and save the standard data for as many times as possible
        if inst_flag == 'standard':
            # Test to see if data loading is needed
            if not np.any(standard_inst.index == itime):
                # Set the load kwargs, which vary by pysat version and tag
                load_kwargs = {'date': itime}

                if standard_inst.tag == 'daily':
                    # Add 30 days
                    load_kwargs['date'] += dt.timedelta(days=30)

                standard_inst.load(**load_kwargs)

            if standard_inst.empty:
                good_times = [False]
            else:
                good_times = ((standard_inst.index >= itime)
                              & (standard_inst.index < stop))

            if notes.find("standard") < 0:
                notes += " the {:} source ({:} to ".format(inst_flag,
                                                           itime.date())

            if np.any(good_times):
                if fill_val is None:
                    f107_inst.meta = standard_inst.meta
                    fill_val = f107_inst.meta['f107'][
                        f107_inst.meta.labels.fill_val]

                good_vals = np.array([not is_fill_val(val, fill_val) for val
                                      in standard_inst['f107'][good_times]])
                new_times = list(standard_inst.index[good_times][good_vals])
            else:
                new_times = []

            if len(new_times) > 0:
                f107_times.extend(new_times)
                new_vals = list(standard_inst['f107'][good_times][good_vals])
                f107_values.extend(new_vals)
                itime = f107_times[-1] + pds.DateOffset(days=1)
            else:
                inst_flag = 'forecast'
                notes += "{:})".format(itime.date())

        # Load and save the forecast data for as many times as possible
        if inst_flag == "forecast":
            # Determine which files should be loaded
            if len(forecast_inst.index) == 0:
                if len(forecast_inst.files.files) > 0:
                    files = np.unique(forecast_inst.files.files[itime:stop])
                else:
                    files = [None]  # No load, because no files are available
            else:
                files = [None]  # No load needed, if already initialized

            # Cycle through all possible files of interest, saving relevant
            # data
            for filename in files:
                if filename is not None:
                    forecast_inst.load(fname=filename)

                if notes.find("forecast") < 0:
                    notes += " the {:} source ({:} to ".format(inst_flag,
                                                               itime.date())

                # Determine which times to save
                if forecast_inst.empty:
                    good_vals = []
                else:
                    # Check in case there was a problem with the standard data
                    if fill_val is None:
                        f107_inst.meta = forecast_inst.meta
                        fill_val = f107_inst.meta['f107'][
                            f107_inst.meta.labels.fill_val]

                    # Get the good times and values
                    good_times = ((forecast_inst.index >= itime)
                                  & (forecast_inst.index < stop))
                    good_vals = np.array([
                        not is_fill_val(val, fill_val) for val
                        in forecast_inst['f107'][good_times]])

                # Save desired data and cycle time
                if len(good_vals) > 0:
                    new_times = list(forecast_inst.index[good_times][
                        good_vals])
                    f107_times.extend(new_times)
                    new_vals = list(
                        forecast_inst['f107'][good_times][good_vals])
                    f107_values.extend(new_vals)
                    itime = f107_times[-1] + pds.DateOffset(days=1)

            notes += "{:})".format(itime.date())

            inst_flag = None

    if inst_flag is not None:
        notes += "{:})".format(itime.date())

    # Determine if the beginning or end of the time series needs to be padded
    if len(f107_times) >= 2:
        freq = pysat.utils.time.calc_freq(f107_times)
    else:
        freq = None
    end_date = stop - pds.DateOffset(days=1)
    date_range = pds.date_range(start=start, end=end_date, freq=freq)

    if len(f107_times) == 0:
        f107_times = date_range

    if date_range[0] < f107_times[0]:
        # Extend the time and value arrays from their beginning with fill
        # values
        itime = abs(date_range - f107_times[0]).argmin()
        f107_times.reverse()
        f107_values.reverse()
        extend_times = list(date_range[:itime])
        extend_times.reverse()
        f107_times.extend(extend_times)
        f107_values.extend([fill_val for kk in extend_times])
        f107_times.reverse()
        f107_values.reverse()

    if date_range[-1] > f107_times[-1]:
        # Extend the time and value arrays from their end with fill values
        itime = abs(date_range - f107_times[-1]).argmin() + 1
        extend_times = list(date_range[itime:])
        f107_times.extend(extend_times)
        f107_values.extend([fill_val for kk in extend_times])

    # Save output data
    f107_inst.data = pds.DataFrame(f107_values, columns=['f107'],
                                   index=f107_times)

    # Resample the output data, filling missing values
    if (date_range.shape != f107_inst.index.shape
            or abs(date_range - f107_inst.index).max().total_seconds() > 0.0):
        f107_inst.data = f107_inst.data.resample(freq).asfreq()
        if np.isfinite(fill_val):
            f107_inst.data[np.isnan(f107_inst.data)] = fill_val

    # Update the metadata notes for this procedure
    notes += ", in that order"
    f107_inst.meta['f107'] = {f107_inst.meta.labels.notes: notes}

    return f107_inst




[docs]
def calc_f107a(f107_inst, f107_name='f107', f107a_name='f107a', min_pnts=41):
    """Calculate the 81 day mean F10.7.

    Parameters
    ----------
    f107_inst : pysat.Instrument
        pysat Instrument holding the F10.7 data
    f107_name : str
        Data column name for the F10.7 data (default='f107')
    f107a_name : str
        Data column name for the F10.7a data (default='f107a')
    min_pnts : int
        Minimum number of points required to calculate an average (default=41)

    Note
    ----
    Will not pad data on its own

    """

    # Test to see that the input data is present
    if f107_name not in f107_inst.data.columns:
        raise ValueError("unknown input data column: " + f107_name)

    # Test to see that the output data does not already exist
    if f107a_name in f107_inst.data.columns:
        raise ValueError("output data column already exists: " + f107a_name)

    if f107_name in f107_inst.meta:
        fill_val = f107_inst.meta[f107_name][f107_inst.meta.labels.fill_val]
    else:
        fill_val = np.nan

    # Calculate the rolling mean.  Since these values are centered but rolling
    # function doesn't allow temporal windows to be calculated this way, create
    # a hack for this.
    #
    # Ensure the data are evenly sampled at a daily frequency, since this is
    # how often F10.7 is calculated.
    f107_fill = f107_inst.data.resample('1D').asfreq()

    # Replace the time index with an ordinal
    time_ind = f107_fill.index
    f107_fill['ord'] = pds.Series([tt.toordinal() for tt in time_ind],
                                  index=time_ind)
    f107_fill.set_index('ord', inplace=True)

    # Calculate the mean
    f107_fill[f107a_name] = f107_fill[f107_name].rolling(window=81,
                                                         min_periods=min_pnts,
                                                         center=True).mean()

    # Replace the ordinal index with the time
    f107_fill['time'] = pds.Series(time_ind, index=f107_fill.index)
    f107_fill.set_index('time', inplace=True)

    # Resample to the original frequency, if it is not equal to 1 day
    freq = pysat.utils.time.calc_freq(f107_inst.index)
    if freq != "86400s":
        # Resample to the desired frequency
        f107_fill = f107_fill.resample(freq).ffill()

        # Save the output in a list
        f107a = list(f107_fill[f107a_name])

        # Fill any dates that fall just outside of the range
        time_ind = pds.date_range(f107_fill.index[0], f107_inst.index[-1],
                                  freq=freq)
        for itime in time_ind[f107_fill.index.shape[0]:]:
            if (itime - f107_fill.index[-1]).total_seconds() < 86400.0:
                f107a.append(f107a[-1])
            else:
                f107a.append(fill_val)

        # Redefine the Series
        f107_fill = pds.DataFrame({f107a_name: f107a}, index=time_ind)

    # There may be missing days in the output data, remove these
    if f107_inst.index.shape < f107_fill.index.shape:
        f107_fill = f107_fill.loc[f107_inst.index]

    # Save the data
    f107_inst[f107a_name] = f107_fill[f107a_name]

    # Update the metadata
    meta_dict = {f107_inst.meta.labels.units: 'SFU',
                 f107_inst.meta.labels.name: 'F10.7a',
                 f107_inst.meta.labels.desc: "81-day centered average of F10.7",
                 f107_inst.meta.labels.min_val: 0.0,
                 f107_inst.meta.labels.max_val: np.nan,
                 f107_inst.meta.labels.fill_val: fill_val,
                 f107_inst.meta.labels.notes:
                 ' '.join(('Calculated using data between',
                           '{:} and {:}'.format(f107_inst.index[0],
                                                f107_inst.index[-1])))}

    f107_inst.meta[f107a_name] = meta_dict

    return