#!/usr/bin/env python
# -*- coding: utf-8 -*-.
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3986138
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Supports F10.7 index values.
Properties
----------
platform
'sw'
name
'f107'
tag
- 'historic' LASP F10.7 data (downloads by month, loads by day)
- 'prelim' Preliminary SWPC daily solar indices
- 'now' A mix of nowcast and definitive values from GFZ
- 'daily' Daily SWPC solar indices (contains last 30 days)
- 'forecast' Grab forecast data from SWPC (next 3 days)
- '45day' 45-Day Forecast data from the Air Force
inst_id
- '' No distinction, may include observed, adjusted, or both
- 'obs' Observed F10.7
- 'adj' Adjusted F10.7
Examples
--------
Download and load all of the historic F10.7 data. Note that it will not
stop on the current date, but a point in the past when post-processing has
been successfully completed.
::
f107 = pysat.Instrument('sw', 'f107', tag='historic')
f107.download(start=f107.lasp_stime, stop=f107.today())
f107.load(date=f107.lasp_stime, end_date=f107.today())
Note
----
The forecast data is stored by generation date, where each file contains the
forecast for the next three days. Forecast data downloads are only supported
for the current day. When loading forecast data, the date specified with the
load command is the date the forecast was generated. The data loaded will span
three days. To always ensure you are loading the most recent data, load
the data with tomorrow's date.
::
f107 = pysat.Instrument('sw', 'f107', tag='forecast')
f107.download()
f107.load(date=f107.tomorrow())
Warnings
--------
The 'forecast' F10.7 data loads three days at a time. Loading multiple files,
loading multiple days, the data padding feature, and multi_file_day feature
available from the pyast.Instrument object is not appropriate for 'forecast'
data.
Like 'forecast', the '45day' forecast loads a specific period of time (45 days)
and subsequent files contain overlapping data. Thus, loading multiple files,
loading multiple days, the data padding feature, and multi_file_day feature
available from the pyast.Instrument object is not appropriate for '45day' data.
"""
import datetime as dt
import numpy as np
import os
import pandas as pds
import pysat
from pysatSpaceWeather.instruments import methods
# ----------------------------------------------------------------------------
# Instrument attributes
platform = 'sw'
name = 'f107'
tags = {'historic': 'Daily LASP value of F10.7',
'prelim': 'Preliminary SWPC daily solar indices',
'now': 'Nowcast and definitive data from GFZ',
'daily': 'Daily SWPC solar indices (contains last 30 days)',
'forecast': 'SWPC Forecast F107 data next (3 days)',
'45day': 'Air Force 45-day Forecast'}
# Dict keyed by inst_id that lists supported tags for each inst_id
inst_ids = {'': [tag for tag in tags.keys() if tag != 'now'], 'obs': ['now'],
'adj': ['now']}
# Dict keyed by inst_id that lists supported tags and a good day of test data
# generate todays date to support loading forecast data
now = dt.datetime.utcnow()
today = dt.datetime(now.year, now.month, now.day)
tomorrow = today + dt.timedelta(days=1)
# The LASP archive start day is also important
lasp_stime = dt.datetime(1947, 2, 14)
# ----------------------------------------------------------------------------
# Instrument test attributes
_test_dates = {'': {'historic': dt.datetime(2009, 1, 1),
'prelim': dt.datetime(2009, 1, 1),
'daily': today,
'forecast': tomorrow,
'45day': today},
'obs': {'now': dt.datetime(2009, 1, 1)},
'adj': {'now': dt.datetime(2009, 1, 1)}}
# Other tags assumed to be True
_test_download_ci = {'': {'prelim': False}}
# ----------------------------------------------------------------------------
# Instrument methods
preprocess = methods.general.preprocess
[docs]
def init(self):
"""Initialize the Instrument object with instrument specific values."""
# Set the required Instrument attributes
self.acknowledgements = methods.f107.acknowledgements(self.tag)
self.references = methods.f107.references(self.tag)
pysat.logger.info(self.acknowledgements)
# Define the historic F10.7 starting time
if self.tag == 'historic':
self.lasp_stime = lasp_stime
return
[docs]
def clean(self):
"""Clean the F10.7 data, empty function as this is not necessary."""
return
# ----------------------------------------------------------------------------
# Instrument functions
[docs]
def load(fnames, tag='', inst_id=''):
"""Load F10.7 index files.
Parameters
----------
fnames : pandas.Series
Series of filenames.
tag : str
Instrument tag. (default='')
inst_id : str
Instrument ID, not used. (default='')
Returns
-------
data : pandas.DataFrame
Object containing satellite data.
meta : pysat.Meta
Object containing metadata such as column names and units.
See Also
--------
pysat.instruments.methods.general.load_csv_data
Note
----
Called by pysat. Not intended for direct use by user.
"""
# Get the desired file dates and file names from the daily indexed list
file_dates = list()
if tag in ['historic', 'prelim', 'now']:
unique_files = list()
for fname in fnames:
file_dates.append(dt.datetime.strptime(fname[-10:], '%Y-%m-%d'))
if fname[0:-11] not in unique_files:
unique_files.append(fname[0:-11])
fnames = unique_files
# Load the CSV data files
data = pysat.instruments.methods.general.load_csv_data(
fnames, read_csv_kwargs={"index_col": 0, "parse_dates": True})
# Rename the GFZ variable name to be consistent with the other data sets
if tag == 'now':
data = data.rename(columns={'F{:s}'.format(inst_id): 'f107'})
# If there is a date range, downselect here
if len(file_dates) > 0:
idx, = np.where((data.index >= min(file_dates))
& (data.index < max(file_dates) + dt.timedelta(days=1)))
data = data.iloc[idx, :]
# Initialize the metadata
meta = pysat.Meta()
desc_prefix = '' if inst_id == '' else '{:s} '.format(inst_id.capitalize())
meta['f107'] = {meta.labels.units: 'SFU',
meta.labels.name: 'F10.7 cm solar index',
meta.labels.notes: '',
meta.labels.desc:
'{:s}F10.7 cm radio flux in Solar Flux Units (SFU)'.format(
desc_prefix),
meta.labels.fill_val: np.nan,
meta.labels.min_val: 0,
meta.labels.max_val: np.inf}
if tag == 'historic':
# LASP updated file format in June, 2022. Minimize impact downstream by
# continuing use of `f107` as primary data product.
if 'f107_adjusted' in data.columns:
# There may be a mix of old and new data formats.
if 'f107' in data.columns:
# Only fill NaN in the `f107` and `f107_adjusted` columns
# for consistency across both data sets
data.loc[np.isnan(data['f107']), 'f107'] = data.loc[
np.isnan(data['f107']), 'f107_adjusted']
data.loc[np.isnan(data['f107_adjusted']),
'f107_adjusted'] = data.loc[
np.isnan(data['f107_adjusted']), 'f107']
else:
data['f107'] = data['f107_adjusted']
# Add metadata
meta['f107_observed'] = meta['f107']
raw_str = 'Raw F10.7 cm radio flux in Solar Flux Units (SFU)'
meta['f107_observed'] = {meta.labels.desc: raw_str}
meta['f107_adjusted'] = meta['f107_observed']
norm_str = ''.join(['F10.7 cm radio flux in Solar Flux Units (SFU)',
' normalized to 1-AU'])
meta['f107_adjusted'] = {meta.labels.desc: norm_str}
meta['f107'] = {
meta.labels.desc: meta['f107_adjusted', meta.labels.desc]}
return data, meta
[docs]
def list_files(tag='', inst_id='', data_path='', format_str=None):
"""List local F10.7 data files.
Parameters
----------
tag : str
Instrument tag, accepts any value from `tags`. (default='')
inst_id : str
Instrument ID, not used. (default='')
data_path : str
Path to data directory. (default='')
format_str : str or NoneType
User specified file format. If None is specified, the default
formats associated with the supplied tags are used. (default=None)
Returns
-------
out_files : pysat._files.Files
A class containing the verified available files
Note
----
Called by pysat. Not intended for direct use by user.
"""
if tag in ['historic', 'now']:
# Files are by month, going to add date to monthly filename for
# each day of the month. The load routine will load a month of
# data and use the appended date to select out appropriate data.
if format_str is None:
if tag == 'historic':
format_str = 'f107_monthly_{year:04d}-{month:02d}.txt'
else:
format_str = 'F{:s}_{{year:04d}}-{{month:02d}}.txt'.format(
inst_id)
out_files = pysat.Files.from_os(data_path=data_path,
format_str=format_str)
if not out_files.empty:
out_files.loc[out_files.index[-1] + pds.DateOffset(months=1)
- pds.DateOffset(days=1)] = out_files.iloc[-1]
out_files = out_files.asfreq('D', 'pad')
out_files = out_files + '_' + out_files.index.strftime(
'%Y-%m-%d')
elif tag == 'prelim':
# Files are by year (and quarter)
if format_str is None:
format_str = ''.join(['f107_prelim_{year:04d}_{month:02d}',
'_v{version:01d}.txt'])
out_files = pysat.Files.from_os(data_path=data_path,
format_str=format_str)
if not out_files.empty:
# Set each file's valid length at a 1-day resolution
orig_files = out_files.sort_index().copy()
new_files = list()
for orig in orig_files.items():
# Version determines each file's valid length
version = np.int64(orig[1].split("_v")[1][0])
doff = pds.DateOffset(years=1) if version == 2 \
else pds.DateOffset(months=3)
istart = orig[0]
iend = istart + doff - pds.DateOffset(days=1)
# Ensure the end time does not extend past the number of
# possible days included based on the file's download time
fname = os.path.join(data_path, orig[1])
dend = dt.datetime.utcfromtimestamp(os.path.getctime(fname))
dend = dend - pds.DateOffset(days=1)
if dend < iend:
iend = dend
# Pad the original file index
out_files.loc[iend] = orig[1]
out_files = out_files.sort_index()
# Save the files at a daily cadence over the desired period
new_files.append(out_files.loc[istart:
iend].asfreq('D', 'pad'))
# Add the newly indexed files to the file output
out_files = pds.concat(new_files, sort=True)
out_files = out_files.dropna()
out_files = out_files.sort_index()
out_files = out_files + '_' + out_files.index.strftime('%Y-%m-%d')
elif tag in ['daily', 'forecast', '45day']:
out_files = methods.swpc.list_files(name, tag, inst_id, data_path,
format_str=format_str)
return out_files
[docs]
def download(date_array, tag, inst_id, data_path, update_files=False,
mock_download_dir=None):
"""Download F107 index data from the appropriate repository.
Parameters
----------
date_array : array-like
Sequence of dates for which files will be downloaded.
tag : str
Denotes type of file to load.
inst_id : str
Specifies the satellite ID for a constellation.
data_path : str
Path to data directory.
update_files : bool
Re-download data for files that already exist if True (default=False)
mock_download_dir : str or NoneType
Local directory with downloaded files or None. If not None, will
process any files with the correct name and date as if they were
downloaded (default=None)
Raises
------
IOError
If a problem is encountered connecting to the gateway or retrieving
data from the remote or local repository.
Warnings
--------
Only able to download current forecast data, not archived forecasts.
Note
----
Called by pysat. Not intended for direct use by user.
"""
# Download standard F107 data
if tag == 'historic':
# Test the date array, updating it if necessary
if date_array.freq != 'MS':
date_array = pysat.utils.time.create_date_range(
dt.datetime(date_array[0].year, date_array[0].month, 1),
date_array[-1], freq='MS')
# Download from LASP, by month
freq = pds.DateOffset(months=1, seconds=-1)
methods.lisird.download(date_array, data_path, 'f107_monthly_',
'%Y-%m', 'noaa_radio_flux', freq, update_files,
{'f107_adjusted': -99999.0,
'f107_observed': -99999.0},
mock_download_dir=mock_download_dir)
elif tag == 'prelim':
# Get the local files, to ensure that the version 1 files are
# downloaded again if more data has been added
local_files = list_files(tag, inst_id, data_path)
# Cut the date from the end of the local files
for i, lfile in enumerate(local_files):
local_files[i] = lfile[:-11]
methods.swpc.old_indices_dsd_download(
name, date_array, data_path, local_files, today,
mock_download_dir=mock_download_dir)
elif tag == 'now':
# Set the download input options
gfz_data_name = 'F{:s}'.format(inst_id)
local_file_prefix = '{:s}_'.format(gfz_data_name)
# Call the download routine
methods.gfz.json_downloads(date_array, data_path, local_file_prefix,
"%Y-%m", gfz_data_name,
pds.DateOffset(months=1, seconds=-1),
update_files=update_files,
mock_download_dir=mock_download_dir)
elif tag == 'daily':
methods.swpc.daily_dsd_download(name, today, data_path,
mock_download_dir=mock_download_dir)
elif tag == 'forecast':
methods.swpc.solar_geomag_predictions_download(
name, date_array, data_path, mock_download_dir=mock_download_dir)
elif tag == '45day':
methods.swpc.recent_ap_f107_download(
name, date_array, data_path, mock_download_dir=mock_download_dir)
return