Source code for pysatSpaceWeather.instruments.sw_f107

#!/usr/bin/env python
# -*- coding: utf-8 -*-.
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3986138
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Supports F10.7 index values.

Properties
----------
platform
    'sw'
name
    'f107'
tag
    - 'historic' LASP F10.7 data (downloads by month, loads by day)
    - 'prelim' Preliminary SWPC daily solar indices
    - 'now' A mix of nowcast and definitive values from GFZ
    - 'daily' Daily SWPC solar indices (contains last 30 days)
    - 'forecast' Grab forecast data from SWPC (next 3 days)
    - '45day' 45-Day Forecast data from the Air Force
inst_id
    - '' No distinction, may include observed, adjusted, or both
    - 'obs' Observed F10.7
    - 'adj' Adjusted F10.7

Examples
--------
Download and load all of the historic F10.7 data.  Note that it will not
stop on the current date, but a point in the past when post-processing has
been successfully completed.
::

    f107 = pysat.Instrument('sw', 'f107', tag='historic')
    f107.download(start=f107.lasp_stime, stop=f107.today())
    f107.load(date=f107.lasp_stime, end_date=f107.today())


Note
----
The forecast data is stored by generation date, where each file contains the
forecast for the next three days. Forecast data downloads are only supported
for the current day. When loading forecast data, the date specified with the
load command is the date the forecast was generated. The data loaded will span
three days. To always ensure you are loading the most recent data, load
the data with tomorrow's date.
::

    f107 = pysat.Instrument('sw', 'f107', tag='forecast')
    f107.download()
    f107.load(date=f107.tomorrow())


Warnings
--------
The 'forecast' F10.7 data loads three days at a time. Loading multiple files,
loading multiple days, the data padding feature, and multi_file_day feature
available from the pyast.Instrument object is not appropriate for 'forecast'
data.

Like 'forecast', the '45day' forecast loads a specific period of time (45 days)
and subsequent files contain overlapping data.  Thus, loading multiple files,
loading multiple days, the data padding feature, and multi_file_day feature
available from the pyast.Instrument object is not appropriate for '45day' data.

"""

import datetime as dt
import numpy as np
import os
import pandas as pds

import pysat

from pysatSpaceWeather.instruments import methods

# ----------------------------------------------------------------------------
# Instrument attributes

platform = 'sw'
name = 'f107'
tags = {'historic': 'Daily LASP value of F10.7',
        'prelim': 'Preliminary SWPC daily solar indices',
        'now': 'Nowcast and definitive data from GFZ',
        'daily': 'Daily SWPC solar indices (contains last 30 days)',
        'forecast': 'SWPC Forecast F107 data next (3 days)',
        '45day': 'Air Force 45-day Forecast'}

# Dict keyed by inst_id that lists supported tags for each inst_id
inst_ids = {'': [tag for tag in tags.keys() if tag != 'now'], 'obs': ['now'],
            'adj': ['now']}

# Dict keyed by inst_id that lists supported tags and a good day of test data
# generate todays date to support loading forecast data
now = dt.datetime.utcnow()
today = dt.datetime(now.year, now.month, now.day)
tomorrow = today + dt.timedelta(days=1)

# The LASP archive start day is also important
lasp_stime = dt.datetime(1947, 2, 14)

# ----------------------------------------------------------------------------
# Instrument test attributes

_test_dates = {'': {'historic': dt.datetime(2009, 1, 1),
                    'prelim': dt.datetime(2009, 1, 1),
                    'daily': today,
                    'forecast': tomorrow,
                    '45day': today},
               'obs': {'now': dt.datetime(2009, 1, 1)},
               'adj': {'now': dt.datetime(2009, 1, 1)}}

# Other tags assumed to be True
_test_download_ci = {'': {'prelim': False}}

# ----------------------------------------------------------------------------
# Instrument methods

preprocess = methods.general.preprocess



[docs]
def init(self):
    """Initialize the Instrument object with instrument specific values."""

    # Set the required Instrument attributes
    self.acknowledgements = methods.f107.acknowledgements(self.tag)
    self.references = methods.f107.references(self.tag)
    pysat.logger.info(self.acknowledgements)

    # Define the historic F10.7 starting time
    if self.tag == 'historic':
        self.lasp_stime = lasp_stime

    return




[docs]
def clean(self):
    """Clean the F10.7 data, empty function as this is not necessary."""

    return



# ----------------------------------------------------------------------------
# Instrument functions



[docs]
def load(fnames, tag='', inst_id=''):
    """Load F10.7 index files.

    Parameters
    ----------
    fnames : pandas.Series
        Series of filenames.
    tag : str
        Instrument tag. (default='')
    inst_id : str
        Instrument ID, not used. (default='')

    Returns
    -------
    data : pandas.DataFrame
        Object containing satellite data.
    meta : pysat.Meta
        Object containing metadata such as column names and units.

    See Also
    --------
    pysat.instruments.methods.general.load_csv_data

    Note
    ----
    Called by pysat. Not intended for direct use by user.

    """

    # Get the desired file dates and file names from the daily indexed list
    file_dates = list()
    if tag in ['historic', 'prelim', 'now']:
        unique_files = list()
        for fname in fnames:
            file_dates.append(dt.datetime.strptime(fname[-10:], '%Y-%m-%d'))
            if fname[0:-11] not in unique_files:
                unique_files.append(fname[0:-11])
        fnames = unique_files

    # Load the CSV data files
    data = pysat.instruments.methods.general.load_csv_data(
        fnames, read_csv_kwargs={"index_col": 0, "parse_dates": True})

    # Rename the GFZ variable name to be consistent with the other data sets
    if tag == 'now':
        data = data.rename(columns={'F{:s}'.format(inst_id): 'f107'})

    # If there is a date range, downselect here
    if len(file_dates) > 0:
        idx, = np.where((data.index >= min(file_dates))
                        & (data.index < max(file_dates) + dt.timedelta(days=1)))
        data = data.iloc[idx, :]

    # Initialize the metadata
    meta = pysat.Meta()
    desc_prefix = '' if inst_id == '' else '{:s} '.format(inst_id.capitalize())
    meta['f107'] = {meta.labels.units: 'SFU',
                    meta.labels.name: 'F10.7 cm solar index',
                    meta.labels.notes: '',
                    meta.labels.desc:
                    '{:s}F10.7 cm radio flux in Solar Flux Units (SFU)'.format(
                        desc_prefix),
                    meta.labels.fill_val: np.nan,
                    meta.labels.min_val: 0,
                    meta.labels.max_val: np.inf}

    if tag == 'historic':
        # LASP updated file format in June, 2022. Minimize impact downstream by
        # continuing use of `f107` as primary data product.
        if 'f107_adjusted' in data.columns:
            # There may be a mix of old and new data formats.
            if 'f107' in data.columns:
                # Only fill NaN in the `f107` and `f107_adjusted` columns
                # for consistency across both data sets
                data.loc[np.isnan(data['f107']), 'f107'] = data.loc[
                    np.isnan(data['f107']), 'f107_adjusted']

                data.loc[np.isnan(data['f107_adjusted']),
                         'f107_adjusted'] = data.loc[
                             np.isnan(data['f107_adjusted']), 'f107']
            else:
                data['f107'] = data['f107_adjusted']

            # Add metadata
            meta['f107_observed'] = meta['f107']
            raw_str = 'Raw F10.7 cm radio flux in Solar Flux Units (SFU)'
            meta['f107_observed'] = {meta.labels.desc: raw_str}

            meta['f107_adjusted'] = meta['f107_observed']
            norm_str = ''.join(['F10.7 cm radio flux in Solar Flux Units (SFU)',
                                ' normalized to 1-AU'])
            meta['f107_adjusted'] = {meta.labels.desc: norm_str}

            meta['f107'] = {
                meta.labels.desc: meta['f107_adjusted', meta.labels.desc]}

    return data, meta




[docs]
def list_files(tag='', inst_id='', data_path='', format_str=None):
    """List local F10.7 data files.

    Parameters
    ----------
    tag : str
        Instrument tag, accepts any value from `tags`. (default='')
    inst_id : str
        Instrument ID, not used. (default='')
    data_path : str
        Path to data directory. (default='')
    format_str : str or NoneType
        User specified file format.  If None is specified, the default
        formats associated with the supplied tags are used. (default=None)

    Returns
    -------
    out_files : pysat._files.Files
        A class containing the verified available files

    Note
    ----
    Called by pysat. Not intended for direct use by user.

    """

    if tag in ['historic', 'now']:
        # Files are by month, going to add date to monthly filename for
        # each day of the month. The load routine will load a month of
        # data and use the appended date to select out appropriate data.
        if format_str is None:
            if tag == 'historic':
                format_str = 'f107_monthly_{year:04d}-{month:02d}.txt'
            else:
                format_str = 'F{:s}_{{year:04d}}-{{month:02d}}.txt'.format(
                    inst_id)
        out_files = pysat.Files.from_os(data_path=data_path,
                                        format_str=format_str)
        if not out_files.empty:
            out_files.loc[out_files.index[-1] + pds.DateOffset(months=1)
                          - pds.DateOffset(days=1)] = out_files.iloc[-1]
            out_files = out_files.asfreq('D', 'pad')
            out_files = out_files + '_' + out_files.index.strftime(
                '%Y-%m-%d')

    elif tag == 'prelim':
        # Files are by year (and quarter)
        if format_str is None:
            format_str = ''.join(['f107_prelim_{year:04d}_{month:02d}',
                                  '_v{version:01d}.txt'])
        out_files = pysat.Files.from_os(data_path=data_path,
                                        format_str=format_str)

        if not out_files.empty:
            # Set each file's valid length at a 1-day resolution
            orig_files = out_files.sort_index().copy()
            new_files = list()

            for orig in orig_files.items():
                # Version determines each file's valid length
                version = np.int64(orig[1].split("_v")[1][0])
                doff = pds.DateOffset(years=1) if version == 2 \
                    else pds.DateOffset(months=3)
                istart = orig[0]
                iend = istart + doff - pds.DateOffset(days=1)

                # Ensure the end time does not extend past the number of
                # possible days included based on the file's download time
                fname = os.path.join(data_path, orig[1])
                dend = dt.datetime.utcfromtimestamp(os.path.getctime(fname))
                dend = dend - pds.DateOffset(days=1)
                if dend < iend:
                    iend = dend

                # Pad the original file index
                out_files.loc[iend] = orig[1]
                out_files = out_files.sort_index()

                # Save the files at a daily cadence over the desired period
                new_files.append(out_files.loc[istart:
                                               iend].asfreq('D', 'pad'))
            # Add the newly indexed files to the file output
            out_files = pds.concat(new_files, sort=True)
            out_files = out_files.dropna()
            out_files = out_files.sort_index()
            out_files = out_files + '_' + out_files.index.strftime('%Y-%m-%d')

    elif tag in ['daily', 'forecast', '45day']:
        out_files = methods.swpc.list_files(name, tag, inst_id, data_path,
                                            format_str=format_str)

    return out_files




[docs]
def download(date_array, tag, inst_id, data_path, update_files=False,
             mock_download_dir=None):
    """Download F107 index data from the appropriate repository.

    Parameters
    ----------
    date_array : array-like
        Sequence of dates for which files will be downloaded.
    tag : str
        Denotes type of file to load.
    inst_id : str
        Specifies the satellite ID for a constellation.
    data_path : str
        Path to data directory.
    update_files : bool
        Re-download data for files that already exist if True (default=False)
    mock_download_dir : str or NoneType
        Local directory with downloaded files or None. If not None, will
        process any files with the correct name and date as if they were
        downloaded (default=None)

    Raises
    ------
    IOError
        If a problem is encountered connecting to the gateway or retrieving
        data from the remote or local repository.

    Warnings
    --------
    Only able to download current forecast data, not archived forecasts.

    Note
    ----
    Called by pysat. Not intended for direct use by user.

    """
    # Download standard F107 data
    if tag == 'historic':
        # Test the date array, updating it if necessary
        if date_array.freq != 'MS':
            date_array = pysat.utils.time.create_date_range(
                dt.datetime(date_array[0].year, date_array[0].month, 1),
                date_array[-1], freq='MS')

        # Download from LASP, by month
        freq = pds.DateOffset(months=1, seconds=-1)
        methods.lisird.download(date_array, data_path, 'f107_monthly_',
                                '%Y-%m', 'noaa_radio_flux', freq, update_files,
                                {'f107_adjusted': -99999.0,
                                 'f107_observed': -99999.0},
                                mock_download_dir=mock_download_dir)

    elif tag == 'prelim':
        # Get the local files, to ensure that the version 1 files are
        # downloaded again if more data has been added
        local_files = list_files(tag, inst_id, data_path)

        # Cut the date from the end of the local files
        for i, lfile in enumerate(local_files):
            local_files[i] = lfile[:-11]

        methods.swpc.old_indices_dsd_download(
            name, date_array, data_path, local_files, today,
            mock_download_dir=mock_download_dir)
    elif tag == 'now':
        # Set the download input options
        gfz_data_name = 'F{:s}'.format(inst_id)
        local_file_prefix = '{:s}_'.format(gfz_data_name)

        # Call the download routine
        methods.gfz.json_downloads(date_array, data_path, local_file_prefix,
                                   "%Y-%m", gfz_data_name,
                                   pds.DateOffset(months=1, seconds=-1),
                                   update_files=update_files,
                                   mock_download_dir=mock_download_dir)

    elif tag == 'daily':
        methods.swpc.daily_dsd_download(name, today, data_path,
                                        mock_download_dir=mock_download_dir)

    elif tag == 'forecast':
        methods.swpc.solar_geomag_predictions_download(
            name, date_array, data_path, mock_download_dir=mock_download_dir)

    elif tag == '45day':
        methods.swpc.recent_ap_f107_download(
            name, date_array, data_path, mock_download_dir=mock_download_dir)

    return