Source code for pysatSpaceWeather.instruments.methods.f107

#!/usr/bin/env python
# -*- coding: utf-8 -*-.
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3986138
#
# Review Status for Classified or Controlled Information by NRL
# -------------------------------------------------------------
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------

"""Routines for the F10.7 solar index."""

import datetime as dt
import numpy as np

import pandas as pds
import pysat

import pysatSpaceWeather as pysat_sw
from pysatSpaceWeather.instruments.methods.general import is_fill_val


[docs] def acknowledgements(tag): """Define the acknowledgements for the F10.7 data. Parameters ---------- tag : str Tag of the space weather index Returns ------- ackn : str Acknowledgements string associated with the appropriate F10.7 tag. """ lisird = 'NOAA radio flux obtained through LISIRD' swpc = ''.join(['Prepared by the U.S. Dept. of Commerce, NOAA, Space ', 'Weather Prediction Center']) ackn = {'historic': lisird, 'prelim': swpc, 'daily': swpc, 'forecast': swpc, '45day': swpc, 'now': pysat_sw.instruments.methods.gfz.ackn} return ackn[tag]
[docs] def references(tag): """Define the references for the F10.7 data. Parameters ---------- tag : str Instrument tag for the F10.7 data. Returns ------- refs : str Reference string associated with the appropriate F10.7 tag. """ noaa_desc = ''.join(['Dataset description: ', 'https://www.ngdc.noaa.gov/stp/space-weather/', 'solar-data/solar-features/solar-radio/noontime-flux', '/penticton/documentation/dataset-description', '_penticton.pdf, accessed Dec 2020']) orig_ref = ''.join(["Covington, A.E. (1948), Solar noise observations on", " 10.7 centimetersSolar noise observations on 10.7 ", "centimeters, Proceedings of the IRE, 36(44), ", "p 454-457."]) swpc_desc = ''.join(['Dataset description: https://www.swpc.noaa.gov/', 'sites/default/files/images/u2/Usr_guide.pdf']) gfz_desc = ''.join(['Dataset description: https://kp.gfz-potsdam.de/app/', 'format/Kp_ap_Ap_SN_F107_format.txt']) refs = {'historic': "\n".join([noaa_desc, orig_ref]), 'prelim': "\n".join([swpc_desc, orig_ref]), 'now': "\n".join([gfz_desc, orig_ref]), 'daily': "\n".join([swpc_desc, orig_ref]), 'forecast': "\n".join([swpc_desc, orig_ref]), '45day': "\n".join([swpc_desc, orig_ref])} return refs[tag]
[docs] def combine_f107(standard_inst, forecast_inst, start=None, stop=None): """Combine the output from the measured and forecasted F10.7 sources. Parameters ---------- standard_inst : pysat.Instrument or NoneType Instrument object containing data for the 'sw' platform, 'f107' name, and 'historic', 'prelim', 'now', or 'daily' tag forecast_inst : pysat.Instrument or NoneType Instrument object containing data for the 'sw' platform, 'f107' name, and 'now', 'prelim', '45day' or 'forecast' tag start : dt.datetime or NoneType Starting time for combining data, or None to use earliest loaded date from the pysat Instruments (default=None) stop : dt.datetime or NoneType Ending time for combining data, or None to use the latest loaded date from the pysat Instruments (default=None) Returns ------- f107_inst : pysat.Instrument Instrument object containing F10.7 observations for the desired period of time, merging the standard, 45day, and forecasted values based on their reliability Raises ------ ValueError If appropriate time data is not supplied, or if the date range is badly formed. Notes ----- Merging prioritizes the standard data, then the 45day data, and finally the forecast data. Will not attempt to download any missing data, but will load data. If no data is present, but dates are provided, supplies a series of fill values. """ # Initialize metadata and flags notes = "Combines data from" stag = standard_inst.tag if len(standard_inst.tag) > 0 else 'default' tag = 'combined_{:s}_{:s}'.format(stag, forecast_inst.tag) inst_flag = 'standard' # If the start or stop times are not defined, get them from the Instruments if start is None: stimes = [inst.index.min() for inst in [standard_inst, forecast_inst] if len(inst.index) > 0] start = min(stimes) if len(stimes) > 0 else None if stop is None: stimes = [inst.index.max() for inst in [standard_inst, forecast_inst] if len(inst.index) > 0] stop = max(stimes) + pds.DateOffset(days=1) if len(stimes) > 0 else None if start is None or stop is None: raise ValueError(' '.join(("must either load in Instrument objects or", "provide starting and ending times"))) if start >= stop: raise ValueError("date range is zero or negative") # Initialize the output instrument f107_inst = pysat.Instrument() f107_inst.inst_module = pysat_sw.instruments.sw_f107 f107_inst.tag = tag f107_inst.date = start f107_inst.doy = np.int64(start.strftime("%j")) fill_val = None f107_times = list() f107_values = list() # Cycle through the desired time range itime = dt.datetime(start.year, start.month, start.day) while itime < stop and inst_flag is not None: # Load and save the standard data for as many times as possible if inst_flag == 'standard': # Test to see if data loading is needed if not np.any(standard_inst.index == itime): # Set the load kwargs, which vary by pysat version and tag load_kwargs = {'date': itime} if standard_inst.tag == 'daily': # Add 30 days load_kwargs['date'] += dt.timedelta(days=30) standard_inst.load(**load_kwargs) if standard_inst.empty: good_times = [False] else: good_times = ((standard_inst.index >= itime) & (standard_inst.index < stop)) if notes.find("standard") < 0: notes += " the {:} source ({:} to ".format(inst_flag, itime.date()) if np.any(good_times): if fill_val is None: f107_inst.meta = standard_inst.meta fill_val = f107_inst.meta['f107'][ f107_inst.meta.labels.fill_val] good_vals = np.array([not is_fill_val(val, fill_val) for val in standard_inst['f107'][good_times]]) new_times = list(standard_inst.index[good_times][good_vals]) else: new_times = [] if len(new_times) > 0: f107_times.extend(new_times) new_vals = list(standard_inst['f107'][good_times][good_vals]) f107_values.extend(new_vals) itime = f107_times[-1] + pds.DateOffset(days=1) else: inst_flag = 'forecast' notes += "{:})".format(itime.date()) # Load and save the forecast data for as many times as possible if inst_flag == "forecast": # Determine which files should be loaded if len(forecast_inst.index) == 0: if len(forecast_inst.files.files) > 0: files = np.unique(forecast_inst.files.files[itime:stop]) else: files = [None] # No load, because no files are available else: files = [None] # No load needed, if already initialized # Cycle through all possible files of interest, saving relevant # data for filename in files: if filename is not None: forecast_inst.load(fname=filename) if notes.find("forecast") < 0: notes += " the {:} source ({:} to ".format(inst_flag, itime.date()) # Determine which times to save if forecast_inst.empty: good_vals = [] else: # Check in case there was a problem with the standard data if fill_val is None: f107_inst.meta = forecast_inst.meta fill_val = f107_inst.meta['f107'][ f107_inst.meta.labels.fill_val] # Get the good times and values good_times = ((forecast_inst.index >= itime) & (forecast_inst.index < stop)) good_vals = np.array([ not is_fill_val(val, fill_val) for val in forecast_inst['f107'][good_times]]) # Save desired data and cycle time if len(good_vals) > 0: new_times = list(forecast_inst.index[good_times][ good_vals]) f107_times.extend(new_times) new_vals = list( forecast_inst['f107'][good_times][good_vals]) f107_values.extend(new_vals) itime = f107_times[-1] + pds.DateOffset(days=1) notes += "{:})".format(itime.date()) inst_flag = None if inst_flag is not None: notes += "{:})".format(itime.date()) # Determine if the beginning or end of the time series needs to be padded if len(f107_times) >= 2: freq = pysat.utils.time.calc_freq(f107_times) else: freq = None end_date = stop - pds.DateOffset(days=1) date_range = pds.date_range(start=start, end=end_date, freq=freq) if len(f107_times) == 0: f107_times = date_range if date_range[0] < f107_times[0]: # Extend the time and value arrays from their beginning with fill # values itime = abs(date_range - f107_times[0]).argmin() f107_times.reverse() f107_values.reverse() extend_times = list(date_range[:itime]) extend_times.reverse() f107_times.extend(extend_times) f107_values.extend([fill_val for kk in extend_times]) f107_times.reverse() f107_values.reverse() if date_range[-1] > f107_times[-1]: # Extend the time and value arrays from their end with fill values itime = abs(date_range - f107_times[-1]).argmin() + 1 extend_times = list(date_range[itime:]) f107_times.extend(extend_times) f107_values.extend([fill_val for kk in extend_times]) # Save output data f107_inst.data = pds.DataFrame(f107_values, columns=['f107'], index=f107_times) # Resample the output data, filling missing values if (date_range.shape != f107_inst.index.shape or abs(date_range - f107_inst.index).max().total_seconds() > 0.0): f107_inst.data = f107_inst.data.resample(freq).asfreq() if np.isfinite(fill_val): f107_inst.data[np.isnan(f107_inst.data)] = fill_val # Update the metadata notes for this procedure notes += ", in that order" f107_inst.meta['f107'] = {f107_inst.meta.labels.notes: notes} return f107_inst
[docs] def calc_f107a(f107_inst, f107_name='f107', f107a_name='f107a', min_pnts=41): """Calculate the 81 day mean F10.7. Parameters ---------- f107_inst : pysat.Instrument pysat Instrument holding the F10.7 data f107_name : str Data column name for the F10.7 data (default='f107') f107a_name : str Data column name for the F10.7a data (default='f107a') min_pnts : int Minimum number of points required to calculate an average (default=41) Note ---- Will not pad data on its own """ # Test to see that the input data is present if f107_name not in f107_inst.data.columns: raise ValueError("unknown input data column: " + f107_name) # Test to see that the output data does not already exist if f107a_name in f107_inst.data.columns: raise ValueError("output data column already exists: " + f107a_name) if f107_name in f107_inst.meta: fill_val = f107_inst.meta[f107_name][f107_inst.meta.labels.fill_val] else: fill_val = np.nan # Calculate the rolling mean. Since these values are centered but rolling # function doesn't allow temporal windows to be calculated this way, create # a hack for this. # # Ensure the data are evenly sampled at a daily frequency, since this is # how often F10.7 is calculated. f107_fill = f107_inst.data.resample('1D').asfreq() # Replace the time index with an ordinal time_ind = f107_fill.index f107_fill['ord'] = pds.Series([tt.toordinal() for tt in time_ind], index=time_ind) f107_fill.set_index('ord', inplace=True) # Calculate the mean f107_fill[f107a_name] = f107_fill[f107_name].rolling(window=81, min_periods=min_pnts, center=True).mean() # Replace the ordinal index with the time f107_fill['time'] = pds.Series(time_ind, index=f107_fill.index) f107_fill.set_index('time', inplace=True) # Resample to the original frequency, if it is not equal to 1 day freq = pysat.utils.time.calc_freq(f107_inst.index) if freq != "86400s": # Resample to the desired frequency f107_fill = f107_fill.resample(freq).ffill() # Save the output in a list f107a = list(f107_fill[f107a_name]) # Fill any dates that fall just outside of the range time_ind = pds.date_range(f107_fill.index[0], f107_inst.index[-1], freq=freq) for itime in time_ind[f107_fill.index.shape[0]:]: if (itime - f107_fill.index[-1]).total_seconds() < 86400.0: f107a.append(f107a[-1]) else: f107a.append(fill_val) # Redefine the Series f107_fill = pds.DataFrame({f107a_name: f107a}, index=time_ind) # There may be missing days in the output data, remove these if f107_inst.index.shape < f107_fill.index.shape: f107_fill = f107_fill.loc[f107_inst.index] # Save the data f107_inst[f107a_name] = f107_fill[f107a_name] # Update the metadata meta_dict = {f107_inst.meta.labels.units: 'SFU', f107_inst.meta.labels.name: 'F10.7a', f107_inst.meta.labels.desc: "81-day centered average of F10.7", f107_inst.meta.labels.min_val: 0.0, f107_inst.meta.labels.max_val: np.nan, f107_inst.meta.labels.fill_val: fill_val, f107_inst.meta.labels.notes: ' '.join(('Calculated using data between', '{:} and {:}'.format(f107_inst.index[0], f107_inst.index[-1])))} f107_inst.meta[f107a_name] = meta_dict return