Source code for pysatSpaceWeather.instruments.methods.f107

#!/usr/bin/env python
# -*- coding: utf-8 -*-.
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3986138
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------

"""Routines for the F10.7 solar index."""

import datetime as dt
import numpy as np
from packaging.version import Version

import pandas as pds
import pysat

import pysatSpaceWeather as pysat_sw


[docs] def acknowledgements(tag): """Define the acknowledgements for the F10.7 data. Parameters ---------- tag : str Tag of the space weather index Returns ------- ackn : str Acknowledgements string associated with the appropriate F10.7 tag. """ lisird = 'NOAA radio flux obtained through LISIRD' swpc = ''.join(['Prepared by the U.S. Dept. of Commerce, NOAA, Space ', 'Weather Prediction Center']) ackn = {'historic': lisird, 'prelim': swpc, 'daily': swpc, 'forecast': swpc, '45day': swpc, 'now': pysat_sw.instruments.methods.gfz.ackn} return ackn[tag]
[docs] def references(tag): """Define the references for the F10.7 data. Parameters ---------- tag : str Instrument tag for the F10.7 data. Returns ------- refs : str Reference string associated with the appropriate F10.7 tag. """ noaa_desc = ''.join(['Dataset description: ', 'https://www.ngdc.noaa.gov/stp/space-weather/', 'solar-data/solar-features/solar-radio/noontime-flux', '/penticton/documentation/dataset-description', '_penticton.pdf, accessed Dec 2020']) orig_ref = ''.join(["Covington, A.E. (1948), Solar noise observations on", " 10.7 centimetersSolar noise observations on 10.7 ", "centimeters, Proceedings of the IRE, 36(44), ", "p 454-457."]) swpc_desc = ''.join(['Dataset description: https://www.swpc.noaa.gov/', 'sites/default/files/images/u2/Usr_guide.pdf']) gfz_desc = ''.join(['Dataset description: https://kp.gfz-potsdam.de/app/', 'format/Kp_ap_Ap_SN_F107_format.txt']) refs = {'historic': "\n".join([noaa_desc, orig_ref]), 'prelim': "\n".join([swpc_desc, orig_ref]), 'now': "\n".join([gfz_desc, orig_ref]), 'daily': "\n".join([swpc_desc, orig_ref]), 'forecast': "\n".join([swpc_desc, orig_ref]), '45day': "\n".join([swpc_desc, orig_ref])} return refs[tag]
[docs] def combine_f107(standard_inst, forecast_inst, start=None, stop=None): """Combine the output from the measured and forecasted F10.7 sources. Parameters ---------- standard_inst : pysat.Instrument or NoneType Instrument object containing data for the 'sw' platform, 'f107' name, and 'historic', 'prelim', 'now', or 'daily' tag forecast_inst : pysat.Instrument or NoneType Instrument object containing data for the 'sw' platform, 'f107' name, and 'now', 'prelim', '45day' or 'forecast' tag start : dt.datetime or NoneType Starting time for combining data, or None to use earliest loaded date from the pysat Instruments (default=None) stop : dt.datetime or NoneType Ending time for combining data, or None to use the latest loaded date from the pysat Instruments (default=None) Returns ------- f107_inst : pysat.Instrument Instrument object containing F10.7 observations for the desired period of time, merging the standard, 45day, and forecasted values based on their reliability Raises ------ ValueError If appropriate time data is not supplied, or if the date range is badly formed. Notes ----- Merging prioritizes the standard data, then the 45day data, and finally the forecast data Will not attempt to download any missing data, but will load data """ # Initialize metadata and flags notes = "Combines data from" stag = standard_inst.tag if len(standard_inst.tag) > 0 else 'default' tag = 'combined_{:s}_{:s}'.format(stag, forecast_inst.tag) inst_flag = 'standard' # If the start or stop times are not defined, get them from the Instruments if start is None: stimes = [inst.index.min() for inst in [standard_inst, forecast_inst] if len(inst.index) > 0] start = min(stimes) if len(stimes) > 0 else None if stop is None: stimes = [inst.index.max() for inst in [standard_inst, forecast_inst] if len(inst.index) > 0] stop = max(stimes) + pds.DateOffset(days=1) if len(stimes) > 0 else None if start is None or stop is None: raise ValueError(' '.join(("must either load in Instrument objects or", "provide starting and ending times"))) if start >= stop: raise ValueError("date range is zero or negative") # Initialize the output instrument f107_inst = pysat.Instrument() f107_inst.inst_module = pysat_sw.instruments.sw_f107 f107_inst.tag = tag f107_inst.date = start f107_inst.doy = np.int64(start.strftime("%j")) fill_val = None f107_times = list() f107_values = list() # Cycle through the desired time range itime = dt.datetime(start.year, start.month, start.day) while itime < stop and inst_flag is not None: # Load and save the standard data for as many times as possible if inst_flag == 'standard': # Test to see if data loading is needed if not np.any(standard_inst.index == itime): # Set the load kwargs, which vary by pysat version and tag load_kwargs = {'date': itime} # TODO(#131): Remove version check after minimum version # supported is 3.2.0 if all([Version(pysat.__version__) > Version('3.0.1'), Version(pysat.__version__) < Version('3.2.0')]): load_kwargs['use_header'] = True if standard_inst.tag == 'daily': # Add 30 days load_kwargs['date'] += dt.timedelta(days=30) standard_inst.load(**load_kwargs) if standard_inst.empty: good_times = [False] else: good_times = ((standard_inst.index >= itime) & (standard_inst.index < stop)) if notes.find("standard") < 0: notes += " the {:} source ({:} to ".format(inst_flag, itime.date()) if np.any(good_times): if fill_val is None: f107_inst.meta = standard_inst.meta fill_val = f107_inst.meta['f107'][ f107_inst.meta.labels.fill_val] good_vals = standard_inst['f107'][good_times] != fill_val new_times = list(standard_inst.index[good_times][good_vals]) f107_times.extend(new_times) new_vals = list(standard_inst['f107'][good_times][good_vals]) f107_values.extend(new_vals) itime = f107_times[-1] + pds.DateOffset(days=1) else: inst_flag = 'forecast' notes += "{:})".format(itime.date()) # Load and save the forecast data for as many times as possible if inst_flag == "forecast": # Determine which files should be loaded if len(forecast_inst.index) == 0: files = np.unique(forecast_inst.files.files[itime:stop]) else: files = [None] # No load needed, if already initialized # Cycle through all possible files of interest, saving relevant # data for filename in files: if filename is not None: load_kwargs = {'fname': filename} # TODO(#131): Remove version check after minimum version # supported is 3.2.0 if all([Version(pysat.__version__) > Version('3.0.1'), Version(pysat.__version__) < Version('3.2.0')]): load_kwargs['use_header'] = True forecast_inst.load(**load_kwargs) if notes.find("forecast") < 0: notes += " the {:} source ({:} to ".format(inst_flag, itime.date()) # Check in case there was a problem with the standard data if fill_val is None: f107_inst.meta = forecast_inst.meta fill_val = f107_inst.meta['f107'][ f107_inst.meta.labels.fill_val] # Determine which times to save if forecast_inst.empty: good_vals = [] else: good_times = ((forecast_inst.index >= itime) & (forecast_inst.index < stop)) good_vals = forecast_inst['f107'][good_times] != fill_val # Save desired data and cycle time if len(good_vals) > 0: new_times = list( forecast_inst.index[good_times][good_vals]) f107_times.extend(new_times) new_vals = list( forecast_inst['f107'][good_times][good_vals]) f107_values.extend(new_vals) itime = f107_times[-1] + pds.DateOffset(days=1) notes += "{:})".format(itime.date()) inst_flag = None if inst_flag is not None: notes += "{:})".format(itime.date()) # Determine if the beginning or end of the time series needs to be padded if len(f107_times) >= 2: freq = pysat.utils.time.calc_freq(f107_times) else: freq = None end_date = stop - pds.DateOffset(days=1) date_range = pds.date_range(start=start, end=end_date, freq=freq) if len(f107_times) == 0: f107_times = date_range date_range = pds.date_range(start=start, end=end_date, freq=freq) if date_range[0] < f107_times[0]: # Extend the time and value arrays from their beginning with fill # values itime = abs(date_range - f107_times[0]).argmin() f107_times.reverse() f107_values.reverse() extend_times = list(date_range[:itime]) extend_times.reverse() f107_times.extend(extend_times) f107_values.extend([fill_val for kk in extend_times]) f107_times.reverse() f107_values.reverse() if date_range[-1] > f107_times[-1]: # Extend the time and value arrays from their end with fill values itime = abs(date_range - f107_times[-1]).argmin() + 1 extend_times = list(date_range[itime:]) f107_times.extend(extend_times) f107_values.extend([fill_val for kk in extend_times]) # Save output data f107_inst.data = pds.DataFrame(f107_values, columns=['f107'], index=f107_times) # Resample the output data, filling missing values if (date_range.shape != f107_inst.index.shape or abs(date_range - f107_inst.index).max().total_seconds() > 0.0): f107_inst.data = f107_inst.data.resample(freq).fillna(method=None) if np.isfinite(fill_val): f107_inst.data[np.isnan(f107_inst.data)] = fill_val # Update the metadata notes for this procedure notes += ", in that order" f107_inst.meta['f107'] = {f107_inst.meta.labels.notes: notes} return f107_inst
[docs] def calc_f107a(f107_inst, f107_name='f107', f107a_name='f107a', min_pnts=41): """Calculate the 81 day mean F10.7. Parameters ---------- f107_inst : pysat.Instrument pysat Instrument holding the F10.7 data f107_name : str Data column name for the F10.7 data (default='f107') f107a_name : str Data column name for the F10.7a data (default='f107a') min_pnts : int Minimum number of points required to calculate an average (default=41) Note ---- Will not pad data on its own """ # Test to see that the input data is present if f107_name not in f107_inst.data.columns: raise ValueError("unknown input data column: " + f107_name) # Test to see that the output data does not already exist if f107a_name in f107_inst.data.columns: raise ValueError("output data column already exists: " + f107a_name) if f107_name in f107_inst.meta: fill_val = f107_inst.meta[f107_name][f107_inst.meta.labels.fill_val] else: fill_val = np.nan # Calculate the rolling mean. Since these values are centered but rolling # function doesn't allow temporal windows to be calculated this way, create # a hack for this. # # Ensure the data are evenly sampled at a daily frequency, since this is # how often F10.7 is calculated. f107_fill = f107_inst.data.resample('1D').fillna(method=None) # Replace the time index with an ordinal time_ind = f107_fill.index f107_fill['ord'] = pds.Series([tt.toordinal() for tt in time_ind], index=time_ind) f107_fill.set_index('ord', inplace=True) # Calculate the mean f107_fill[f107a_name] = f107_fill[f107_name].rolling(window=81, min_periods=min_pnts, center=True).mean() # Replace the ordinal index with the time f107_fill['time'] = pds.Series(time_ind, index=f107_fill.index) f107_fill.set_index('time', inplace=True) # Resample to the original frequency, if it is not equal to 1 day freq = pysat.utils.time.calc_freq(f107_inst.index) if freq != "86400S": # Resample to the desired frequency f107_fill = f107_fill.resample(freq).ffill() # Save the output in a list f107a = list(f107_fill[f107a_name]) # Fill any dates that fall time_ind = pds.date_range(f107_fill.index[0], f107_inst.index[-1], freq=freq) for itime in time_ind[f107_fill.index.shape[0]:]: if (itime - f107_fill.index[-1]).total_seconds() < 86400.0: f107a.append(f107a[-1]) else: f107a.append(fill_val) # Redefine the Series f107_fill = pds.DataFrame({f107a_name: f107a}, index=time_ind) # There may be missing days in the output data, remove these if f107_inst.index.shape < f107_fill.index.shape: f107_fill = f107_fill.loc[f107_inst.index] # Save the data f107_inst[f107a_name] = f107_fill[f107a_name] # Update the metadata meta_dict = {f107_inst.meta.labels.units: 'SFU', f107_inst.meta.labels.name: 'F10.7a', f107_inst.meta.labels.desc: "81-day centered average of F10.7", f107_inst.meta.labels.min_val: 0.0, f107_inst.meta.labels.max_val: np.nan, f107_inst.meta.labels.fill_val: fill_val, f107_inst.meta.labels.notes: ' '.join(('Calculated using data between', '{:} and {:}'.format(f107_inst.index[0], f107_inst.index[-1])))} f107_inst.meta[f107a_name] = meta_dict return