#!/usr/bin/env python
# -*- coding: utf-8 -*-.
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3986138
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Provides routines that support SWPC space weather instruments."""
import datetime as dt
import ftplib
import numpy as np
import os
import pandas as pds
import sys
import pysat
from pysatSpaceWeather.instruments.methods import general
# ----------------------------------------------------------------------------
# Define the module variables
ackn = ''.join(['Prepared by the U.S. Dept. of Commerce, NOAA, Space ',
'Weather Prediction Center'])
forecast_warning = ''.join(['This routine can only download the current ',
'forecast, not archived forecasts'])
# ----------------------------------------------------------------------------
# Define the module functions
[docs]
def daily_dsd_download(name, today, data_path, mock_download_dir=None):
"""Download the daily NOAA Daily Solar Data indices.
Parameters
----------
name : str
Instrument name, expects one of 'f107', 'flare', 'ssn', or 'sbfield'.
today : dt.datetime
Datetime for current day.
data_path : str
Path to data directory.
mock_download_dir : str or NoneType
Local directory with downloaded files or None. If not None, will
process any files with the correct name and date as if they were
downloaded (default=None)
Raises
------
IOError
If an unknown mock download directory is supplied or the desired file
is missing.
Note
----
Note that the download path for the complementary Instrument will use
the standard pysat data paths
"""
pysat.logger.info('This routine only downloads the latest 30 day file')
# Get the file information
raw_txt = general.get_local_or_remote_text(
'https://services.swpc.noaa.gov/text/', mock_download_dir,
'daily-solar-indices.txt')
if raw_txt is None:
pysat.logger.info("".join(["Data not downloaded for ",
"daily-solar-indices.txt, data may have ",
"been saved to an unexpected filename."]))
else:
# Get the file paths and output names
file_paths = {data_name: data_path if name == data_name else
general.get_instrument_data_path(
'sw_{:s}'.format(data_name), tag='daily')
for data_name in ['f107', 'flare', 'ssn', 'sbfield']}
outfiles = {
data_name: os.path.join(file_paths[data_name], '_'.join([
data_name, 'daily', '{:s}.txt'.format(
today.strftime('%Y-%m-%d'))]))
for data_name in file_paths.keys()}
# Check that the directories exist
for data_path in file_paths.values():
pysat.utils.files.check_and_make_path(data_path)
# Save the output
rewrite_daily_solar_data_file(today.year, outfiles, raw_txt)
return
[docs]
def old_indices_dsd_download(name, date_array, data_path, local_files, today,
mock_download_dir=None):
"""Download the old NOAA Daily Solar Data indices.
Parameters
----------
name : str
Instrument name, expects one of 'f107', 'flare', 'ssn', or 'sbfield'.
date_array : array-like or pandas.DatetimeIndex
Array-like or index of datetimes to be downloaded.
data_path : str
Path to data directory.
local_files : pds.Series
A Series containing the local filenames indexed by time.
today : dt.datetime
Datetime for current day
mock_download_dir : str or NoneType
Local directory with downloaded files or None. If not None, will
process any files with the correct name and date as if they were
downloaded (default=None)
Raises
------
IOError
If an unknown mock download directory is supplied.
Note
----
Note that the download path for the complementary Instrument will use
the standard pysat data paths
"""
# Get the file paths
file_paths = {data_name: data_path if name == data_name else
general.get_instrument_data_path('sw_{:s}'.format(data_name),
tag='prelim')
for data_name in ['f107', 'flare', 'ssn', 'sbfield']}
# Check that the directories exist
for data_path in file_paths.values():
pysat.utils.files.check_and_make_path(data_path)
if mock_download_dir is None:
# Connect to the host, default port
ftp = ftplib.FTP('ftp.swpc.noaa.gov')
ftp.login() # User anonymous, passwd anonymous
ftp.cwd('/pub/indices/old_indices')
elif not os.path.isdir(mock_download_dir):
raise IOError('file location is not a directory: {:}'.format(
mock_download_dir))
bad_fname = list()
# To avoid downloading multiple files, cycle dates based on file length
dl_date = date_array[0]
while dl_date <= date_array[-1]:
# The file name changes, depending on how recent the requested data is
qnum = (dl_date.month - 1) // 3 + 1 # Integer floor division
qmonth = (qnum - 1) * 3 + 1
quar = 'Q{:d}_'.format(qnum)
fnames = ['{:04d}{:s}DSD.txt'.format(dl_date.year, ss)
for ss in ['_', quar]]
versions = ["01_v2", "{:02d}_v1".format(qmonth)]
vend = [dt.datetime(dl_date.year, 12, 31),
dt.datetime(dl_date.year, qmonth, 1)
+ pds.DateOffset(months=3) - pds.DateOffset(days=1)]
downloaded = False
rewritten = False
# Attempt the download(s)
for iname, fname in enumerate(fnames):
# Test to see if we already tried this filename
if fname in bad_fname:
continue
local_fname = fname
saved_fname = os.path.join(data_path, local_fname)
outfiles = {
data_name: os.path.join(file_paths[data_name], '_'.join(
[data_name, 'prelim', '{:04d}'.format(dl_date.year),
'{:s}.txt'.format(versions[iname])]))
for data_name in file_paths.keys()}
if os.path.isfile(outfiles[name]):
downloaded = True
# Check the date to see if this should be rewritten
checkfile = os.path.split(outfiles[name])[-1]
has_file = local_files == checkfile
if np.any(has_file):
if has_file[has_file].index[-1] < vend[iname]:
# This file will be updated again, but only attempt to
# do so if enough time has passed from the last time it
# was downloaded
yesterday = today - pds.DateOffset(days=1)
if has_file[has_file].index[-1] < yesterday:
rewritten = True
else:
# The file does not exist, if it can be downloaded, it
# should be 'rewritten'
rewritten = True
# Attempt to download if the file does not exist or if the
# file has been updated
if rewritten or not downloaded:
if mock_download_dir is None:
try:
sys.stdout.flush()
ftp.retrbinary('RETR ' + fname,
open(saved_fname, 'wb').write)
downloaded = True
pysat.logger.info(' '.join(('Downloaded file for ',
dl_date.strftime('%x'))))
except ftplib.error_perm as exception:
# Could not fetch, so cannot rewrite
rewritten = False
# Test for an error
if str(exception.args[0]).split(" ", 1)[0] != '550':
raise IOError(exception)
else:
# File isn't actually there, try the next name. The
# extra wrapping is for Windows, which can encounter
# permission errors when handling files.
attempt = 0
while attempt < 100:
try:
os.remove(saved_fname)
attempt = 100
except PermissionError:
attempt += 1
# Save this so we don't try again. Because there are
# two possible filenames for each time, it's ok if
# one isn't there. We just don't want to keep
# looking for it.
bad_fname.append(fname)
else:
# Set the saved filename
saved_fname = os.path.join(mock_download_dir, local_fname)
downloaded = True
if os.path.isfile(saved_fname):
rewritten = True
else:
pysat.logger.info("".join([saved_fname, "is missing, ",
"data may have been saved ",
"to an unexpected ",
"filename."]))
rewritten = False
# If the first file worked, don't try again
if downloaded:
break
if not downloaded:
pysat.logger.info(' '.join(('File not available for',
dl_date.strftime('%x'))))
elif rewritten:
with open(saved_fname, 'r') as fprelim:
lines = fprelim.read()
rewrite_daily_solar_data_file(dl_date.year, outfiles, lines)
if mock_download_dir is None:
# Only remove the file if it wasn't obtained from a local dir
os.remove(saved_fname)
# Cycle to the next date
dl_date = vend[iname] + pds.DateOffset(days=1)
# Close connection after downloading all dates
if mock_download_dir is None:
ftp.close()
return
[docs]
def rewrite_daily_solar_data_file(year, outfiles, lines):
"""Rewrite the SWPC Daily Solar Data files.
Parameters
----------
year : int
Year of data file (format changes based on date)
outfiles : dict
Output filenames for all relevant Instruments
lines : str
String containing all output data (result of 'read')
"""
# Get to the solar index data
if year > 2000:
raw_data = lines.split('#---------------------------------')[-1]
raw_data = raw_data.split('\n')[1:-1]
optical = True
else:
raw_data = lines.split('# ')[-1]
raw_data = raw_data.split('\n')
optical = False if raw_data[0].find('Not Available') or year == 1994 \
else True
istart = 7 if year < 2000 else 1
raw_data = raw_data[istart:-1]
# Parse the data
solar_times, data_dict = parse_daily_solar_data(raw_data, year, optical)
# Separate data by Instrument name
data_cols = {'f107': ['f107'],
'flare': ['goes_bgd_flux', 'c_flare', 'm_flare', 'x_flare',
'o1_flare', 'o2_flare', 'o3_flare', 'o1_flare',
'o2_flare', 'o3_flare', 'c_flare', 'm_flare',
'x_flare'],
'ssn': ['ssn', 'ss_area', 'new_reg'],
'sbfield': ['smf']}
for data_name in data_cols.keys():
name_dict = {dkey: data_dict[dkey] for dkey in data_dict.keys()
if dkey in data_cols[data_name]}
# Collect into DataFrame
data = pds.DataFrame(name_dict, index=solar_times)
# Write out as a file
data.to_csv(outfiles[data_name], header=True)
return
[docs]
def parse_daily_solar_data(data_lines, year, optical):
"""Parse the data in the SWPC daily solar index file.
Parameters
----------
data_lines : list
List of lines containing data
year : list
Year of file
optical : boolean
Flag denoting whether or not optical data is available
Returns
-------
dates : list
List of dates for each date/data pair in this block
values : dict
Dict of lists of values, where each key is the value name
"""
# Initialize the output
dates = list()
val_keys = ['f107', 'ssn', 'ss_area', 'new_reg', 'smf', 'goes_bgd_flux',
'c_flare', 'm_flare', 'x_flare', 'o1_flare', 'o2_flare',
'o3_flare']
optical_keys = ['o1_flare', 'o2_flare', 'o3_flare']
xray_keys = ['c_flare', 'm_flare', 'x_flare']
values = {kk: list() for kk in val_keys}
# Cycle through each line in this file
for line in data_lines:
# Split the line on whitespace
split_line = line.split()
# Format the date
dfmt = "%Y %m %d" if year > 1996 else "%d %b %y"
dates.append(dt.datetime.strptime(" ".join(split_line[0:3]), dfmt))
# Format the data values
j = 0
for i, kk in enumerate(val_keys):
if year == 1994 and kk == 'new_reg':
# New regions only in files after 1994
val = -999
elif np.any([year == 1994 and kk in xray_keys,
not optical and kk in optical_keys]):
# X-ray flares in files after 1994, optical flares come later
val = -1
else:
val = split_line[j + 3]
j += 1
if kk != 'goes_bgd_flux':
if val == "*":
val = -999 if i < 5 else -1
else:
val = int(val)
values[kk].append(val)
return dates, values
[docs]
def solar_geomag_predictions_download(name, date_array, data_path,
mock_download_dir=None):
"""Download the 3-day solar-geomagnetic predictions from SWPC.
Parameters
----------
name : str
Instrument name, expects one of 'kp', 'ap', 'stormprob', 'f107',
'flare', or 'polarcap'.
date_array : array-like or pandas.DatetimeIndex
Array-like or index of datetimes to be downloaded.
data_path : str
Path to data directory.
mock_download_dir : str or NoneType
Local directory with downloaded files or None. If not None, will
process any files with the correct name and date as if they were
downloaded (default=None)
Raises
------
IOError
If an unknown mock download directory is supplied or the desired file
is missing.
Note
----
Note that the download path for the complementary Instrument will use
the standard pysat data paths
"""
pysat.logger.info(forecast_warning)
# Get the file paths
file_paths = {data_name: data_path if name == data_name else
general.get_instrument_data_path(
'sw_{:s}'.format(data_name),
tag='forecast' if data_name == 'f107' else 'prediction')
for data_name in ['kp', 'ap', 'stormprob', 'f107', 'flare',
'polarcap']}
# Check that the directories exist
for data_path in file_paths.values():
pysat.utils.files.check_and_make_path(data_path)
# Get the file information
raw_txt = general.get_local_or_remote_text(
'https://services.swpc.noaa.gov/text/', mock_download_dir,
'3-day-solar-geomag-predictions.txt')
if raw_txt is None:
pysat.logger.info("".join(["Data not downloaded for ",
"3-day-solar-geomag-predictions.txt, data ",
"may have been saved to an unexpected ",
"filename."]))
else:
# Parse text to get the date the prediction was generated
date_str = raw_txt.split(':Issued: ')[-1].split(' UTC')[0]
dl_date = dt.datetime.strptime(date_str, '%Y %b %d %H%M')
# Parse the data to get the prediction dates
date_strs = raw_txt.split(':Prediction_dates:')[-1].split('\n')[0]
pred_times = [
dt.datetime.strptime(' '.join(date_str.split()), '%Y %b %d')
for date_str in date_strs.split(' ') if len(date_str) > 0]
# Separate out the data by chunks
ap_raw = raw_txt.split(':Geomagnetic_A_indices:')[-1]
kp_raw = raw_txt.split(':Pred_Mid_k:')[-1]
storm_raw = raw_txt.split(':Prob_Mid:')[-1]
pc_raw = raw_txt.split(':Polar_cap:')[-1]
f107_raw = raw_txt.split(':10cm_flux:')[-1]
flare_raw = raw_txt.split(':Whole_Disk_Flare_Prob:')[-1]
# Initalize the data for each data type
data_vals = {data_name: dict() for data_name in file_paths.keys()}
data_times = {data_name: pred_times for data_name in file_paths.keys()}
# Process the ap data
for line in ap_raw.split('\n'):
if line.find(":") == 0:
break
elif line.find("A_") == 0:
split_line = line.split()
if split_line[0] == "A_Planetary":
dkey = "daily_Ap"
else:
dkey = split_line[0]
data_vals['ap'][dkey] = [int(val) for val in split_line[1:]]
# Process the Kp data
hr_strs = ['00-03UT', '03-06UT', '06-09UT', '09-12UT', '12-15UT',
'15-18UT', '18-21UT', '21-00UT']
data_times['kp'] = pds.date_range(pred_times[0], periods=24, freq='3H')
for line in kp_raw.split('\n'):
if line.find("Prob_Mid") >= 0:
break
elif line.find("UT") > 0:
split_line = line.split()
reg, hr = split_line[0].split('/')
dkey = '{:s}_lat_Kp'.format(reg)
# Initalize the Kp data for this region
if dkey not in data_vals['kp'].keys():
data_vals['kp'][dkey] = np.full(shape=(24,),
fill_value=np.nan)
# Save the Kp data into the correct day and hour index
hr_index = hr_strs.index(hr)
data_vals['kp'][dkey][hr_index] = float(split_line[1])
data_vals['kp'][dkey][hr_index + 8] = float(split_line[2])
data_vals['kp'][dkey][hr_index + 16] = float(split_line[3])
# Process the storm probabilities
for line in storm_raw.split('\n'):
if line.find("Polar_cap") >= 0:
break
elif len(line) > 0:
split_line = line.split()
if split_line[0].find('/') > 0:
dkey = split_line[0].replace('/', '-Lat_')
data_vals['stormprob'][dkey] = [
int(val) for val in split_line[1:]]
# Process the polar cap prediction
data_vals['polarcap']['absorption_forecast'] = [
str_val for str_val in pc_raw.split('\n')[1].split()]
data_times['polarcap'] = [
ptimes for i, ptimes in enumerate(pred_times)
if i < len(data_vals['polarcap']['absorption_forecast'])]
# Process the F10.7 data
data_vals['f107']['f107'] = [
int(val) for val in f107_raw.split('\n')[1].split()]
# Process the flare data
dkey_root = 'Whole_Disk_Flare_Prob'
for line in flare_raw.split('\n'):
if len(line) > 0 and line.find("#") < 0:
if line.find(":") == 0:
dkey_root = line.split(":")[1]
else:
split_line = line.split()
if len(split_line) == 4:
dkey = "_".join([dkey_root, split_line[0]])
data_vals['flare'][dkey] = [
int(val) for val in split_line[1:]]
else:
data_vals['flare']['{:s}_Region'.format(dkey_root)] = [
int(split_line[0]), -1, -1]
data_vals['flare']['{:s}_Class_C'.format(dkey_root)] = [
int(split_line[1]), -1, -1]
data_vals['flare']['{:s}_Class_M'.format(dkey_root)] = [
int(split_line[2]), -1, -1]
data_vals['flare']['{:s}_Class_X'.format(dkey_root)] = [
int(split_line[3]), -1, -1]
data_vals['flare']['{:s}_Class_P'.format(dkey_root)] = [
int(split_line[4]), -1, -1]
# Save the data by type into files
for data_name in data_vals.keys():
# Put the data values into a nicer DataFrame
data = pds.DataFrame(data_vals[data_name],
index=data_times[data_name])
# Save the data as a CSV file
data_tag = 'forecast' if data_name == 'f107' else 'prediction'
data_file = '_'.join([data_name, data_tag,
'{:s}.txt'.format(dl_date.strftime(
'%Y-%m-%d'))])
data.to_csv(os.path.join(file_paths[data_name], data_file),
header=True)
return
[docs]
def geomag_forecast_download(name, date_array, data_path,
mock_download_dir=None):
"""Download the 3-day geomagnetic Kp, ap, and storm data from SWPC.
Parameters
----------
name : str
Instrument name, expects one of 'kp', 'ap', or 'stormprob'.
date_array : array-like or pandas.DatetimeIndex
Array-like or index of datetimes to be downloaded.
data_path : str
Path to data directory.
mock_download_dir : str or NoneType
Local directory with downloaded files or None. If not None, will
process any files with the correct name and date as if they were
downloaded (default=None)
Raises
------
IOError
If an unknown mock download directory is supplied or the desired file
is missing.
Note
----
Note that the download path for the complementary Instrument will use
the standard pysat data paths
"""
pysat.logger.info(forecast_warning)
# Get the file paths
file_paths = {data_name: data_path if name == data_name else
general.get_instrument_data_path(
'sw_{:s}'.format(data_name), tag='forecast')
for data_name in ['kp', 'ap', 'stormprob']}
# Check that the directories exist
for data_path in file_paths.values():
pysat.utils.files.check_and_make_path(data_path)
# Get the file information
raw_txt = general.get_local_or_remote_text(
'https://services.swpc.noaa.gov/text/', mock_download_dir,
'3-day-geomag-forecast.txt')
if raw_txt is None:
pysat.logger.info("".join(["Data not downloaded for ",
"3-day-geomag-forecast.txt, data may have ",
"been saved to an unexpected filename."]))
else:
# Parse text to get the date the prediction was generated
date_str = raw_txt.split(':Issued: ')[-1].split(' UTC')[0]
dl_date = dt.datetime.strptime(date_str, '%Y %b %d %H%M')
# Separate out the data by chunks
ap_raw = raw_txt.split('NOAA Ap Index Forecast')[-1]
kp_raw = raw_txt.split('NOAA Kp index forecast ')[-1]
storm_raw = raw_txt.split('NOAA Geomagnetic Activity Probabilities')[-1]
# Get dates of the forecasts
date_str = kp_raw[0:6] + ' ' + str(dl_date.year)
forecast_date = dt.datetime.strptime(date_str, '%d %b %Y')
# Strings we will use to parse the downloaded text for Kp
lines = ['00-03UT', '03-06UT', '06-09UT', '09-12UT', '12-15UT',
'15-18UT', '18-21UT', '21-00UT']
# Storage for daily Kp forecasts. Get values for each day, then combine
# them together
kp_day1 = []
kp_day2 = []
kp_day3 = []
for line in lines:
raw = kp_raw.split(line)[-1].split('\n')[0]
cols = raw.split()
kp_day1.append(float(cols[-3]))
kp_day2.append(float(cols[-2]))
kp_day3.append(float(cols[-1]))
kp_times = pds.date_range(forecast_date, periods=24, freq='3H')
kp_day = []
for dd in [kp_day1, kp_day2, kp_day3]:
kp_day.extend(dd)
# Put Kp data into nicer DataFrame
data_frames = {'kp': pds.DataFrame(kp_day, index=kp_times,
columns=['Kp'])}
# Parse the Ap data
ap_times = pds.date_range(dl_date - dt.timedelta(days=1), periods=5,
freq='1D')
obs_line = ap_raw.split('Observed Ap')[-1].split('\n')[0]
est_line = ap_raw.split('Estimated Ap')[-1].split('\n')[0]
pred_line = ap_raw.split('Predicted Ap')[-1].split('\n')[0]
ap_vals = [int(obs_line[-3:]), int(est_line[-3:])]
for ap_val in pred_line.split()[-1].split('-'):
ap_vals.append(int(ap_val))
# Put the Ap data into a nicer DataFrame
data_frames['ap'] = pds.DataFrame(ap_vals, index=ap_times,
columns=['daily_Ap'])
# Parse the storm probabilities
storm_dict = {}
for storm_line in storm_raw.split('\n')[1:5]:
storm_split = storm_line.split()
# Build the storm data column name
dkey = '_'.join(storm_split[:-1])
# Assign the storm probabilities
storm_dict[dkey] = [int(sp) for sp in storm_split[-1].split('/')]
# Put the storm probabilities into a nicer DataFrame
storm_times = pds.date_range(forecast_date, periods=3, freq='1D')
data_frames['stormprob'] = pds.DataFrame(storm_dict, index=storm_times)
# Save the data files
for data_name in data_frames.keys():
filename = '{:s}_forecast_{:s}.txt'.format(
data_name, dl_date.strftime('%Y-%m-%d'))
data_frames[data_name].to_csv(os.path.join(
file_paths[data_name], filename), header=True)
return
[docs]
def kp_ap_recent_download(name, date_array, data_path, mock_download_dir=None):
"""Download recent Kp and ap data from SWPC.
Parameters
----------
name : str
Instrument name, expects 'kp' or 'ap'.
date_array : array-like or pandas.DatetimeIndex
Array-like or index of datetimes to be downloaded.
data_path : str
Path to data directory.
mock_download_dir : str or NoneType
Local directory with downloaded files or None. If not None, will
process any files with the correct name and date as if they were
downloaded (default=None)
Raises
------
IOError
If an unknown mock download directory is supplied or the desired file
is missing.
Note
----
Note that the download path for the complementary Instrument will use
the standard pysat data paths
"""
pysat.logger.info(forecast_warning)
# Get the file paths
file_paths = {data_name: data_path if name == data_name else
general.get_instrument_data_path(
'sw_{:s}'.format(data_name), tag='recent')
for data_name in ['kp', 'ap']}
# Check that the directories exist
for data_path in file_paths.values():
pysat.utils.files.check_and_make_path(data_path)
# Get the file information
raw_txt = general.get_local_or_remote_text(
'https://services.swpc.noaa.gov/text/', mock_download_dir,
'daily-geomagnetic-indices.txt')
if raw_txt is None:
pysat.logger.info("".join(["Data not downloaded for ",
"daily-geomagnetic-indices.txt, data may ",
"have been saved to an unexpected ",
"filename."]))
else:
# Parse text to get the date the prediction was generated
date_str = raw_txt.split(':Issued: ')[-1].split('\n')[0]
dl_date = dt.datetime.strptime(date_str, '%H%M UT %d %b %Y')
# Data is the forecast value for the next three days
raw_data = raw_txt.split('# Date ')[-1]
# Keep only the middle bits that matter
raw_data = raw_data.split('\n')[1:-1]
# Hold times from the file
times = []
# Holds Kp and Ap values for each station
sub_kps = [[], [], []]
sub_aps = [[], [], []]
# Iterate through file lines and parse out the info we want
for line in raw_data:
times.append(dt.datetime.strptime(line[0:10], '%Y %m %d'))
# Pick out Kp values for each of the three columns. The columns
# used to all have integer values, but now some have floats.
kp_sub_lines = [line[17:33], line[40:56], line[63:]]
ap_sub_lines = [line[10:17], line[33:40], line[56:63]]
for i, sub_line in enumerate(kp_sub_lines):
# Process the Kp data, which has 3-hour values
split_sub = sub_line.split()
for ihr in np.arange(8):
if sub_line.find('.') < 0:
# These are integer values
sub_kps[i].append(
int(sub_line[(ihr * 2):((ihr + 1) * 2)]))
else:
# These are float values
sub_kps[i].append(np.float64(split_sub[ihr]))
# Process the Ap data, which has daily values
sub_aps[i].append(np.int64(ap_sub_lines[i]))
# Create times on 3 hour cadence
kp_times = pds.date_range(times[0], periods=(8 * 30), freq='3H')
# Put both data sets into DataFrames
data = {'kp': pds.DataFrame({'mid_lat_Kp': sub_kps[0],
'high_lat_Kp': sub_kps[1],
'Kp': sub_kps[2]}, index=kp_times),
'ap': pds.DataFrame({'mid_lat_Ap': sub_aps[0],
'high_lat_Ap': sub_aps[1],
'daily_Ap': sub_aps[2]}, index=times)}
# Write out the data sets as files
for dkey in data.keys():
data_file = '{:s}_recent_{:s}.txt'.format(
dkey, dl_date.strftime('%Y-%m-%d'))
data[dkey].to_csv(os.path.join(file_paths[dkey], data_file),
header=True)
return
[docs]
def recent_ap_f107_download(name, date_array, data_path,
mock_download_dir=None):
"""Download 45-day ap and F10.7 data from SWPC.
Parameters
----------
name : str
Instrument name, expects 'f107' or 'ap'.
date_array : array-like or pandas.DatetimeIndex
Array-like or index of datetimes to be downloaded.
data_path : str
Path to data directory.
mock_download_dir : str or NoneType
Local directory with downloaded files or None. If not None, will
process any files with the correct name and date as if they were
downloaded (default=None)
Raises
------
IOError
If an unknown mock download directory is supplied or the desored file
is missing.
Note
----
Note that the download path for the complementary Instrument will use
the standard pysat data paths
"""
pysat.logger.info(forecast_warning)
# Get the file paths
file_paths = {data_name: data_path if name == data_name else
general.get_instrument_data_path(
'sw_{:s}'.format(data_name), tag='45day')
for data_name in ['f107', 'ap']}
# Check that the directories exist
for data_path in file_paths.values():
pysat.utils.files.check_and_make_path(data_path)
# Get the file information
raw_txt = general.get_local_or_remote_text(
'https://services.swpc.noaa.gov/text/', mock_download_dir,
'45-day-ap-forecast.txt')
if raw_txt is None:
pysat.logger.info("".join(["Data not downloaded for ",
"45-day-ap-forecast.txt, data may have been",
" saved to an unexpected filename."]))
else:
# Parse text to get the date the prediction was generated
date_str = raw_txt.split(':Issued: ')[-1].split(' UTC')[0]
dl_date = dt.datetime.strptime(date_str, '%Y %b %d %H%M')
# Get to the forecast data
raw_data = raw_txt.split('45-DAY AP FORECAST')[-1]
# Grab Ap part
raw_ap = raw_data.split('45-DAY F10.7 CM FLUX FORECAST')[0]
raw_ap = raw_ap.split('\n')[1:-1]
# Get the F107
raw_f107 = raw_data.split('45-DAY F10.7 CM FLUX FORECAST')[-1]
raw_f107 = raw_f107.split('\n')[1:-4]
# Parse the data
ap_times, ap = parse_45day_block(raw_ap)
f107_times, f107 = parse_45day_block(raw_f107)
# Save the data in DataFrames
data = {'ap': pds.DataFrame(ap, index=ap_times, columns=['daily_Ap']),
'f107': pds.DataFrame(f107, index=f107_times, columns=['f107'])}
# Write out the data files
for data_name in data.keys():
file_name = '{:s}_45day_{:s}.txt'.format(
data_name, dl_date.strftime('%Y-%m-%d'))
data[data_name].to_csv(os.path.join(file_paths[data_name],
file_name), header=True)
return
[docs]
def parse_45day_block(block_lines):
"""Parse the data blocks used in the 45-day Ap and F10.7 Flux Forecast file.
Parameters
----------
block_lines : list
List of lines containing data in this data block
Returns
-------
dates : list
List of dates for each date/data pair in this block
values : list
List of values for each date/data pair in this block
"""
# Initialize the output
dates = list()
values = list()
# Cycle through each line in this block
for line in block_lines:
# Split the line on whitespace
split_line = line.split()
# Format the dates
dates.extend([dt.datetime.strptime(tt, "%d%b%y")
for tt in split_line[::2]])
# Format the data values
values.extend([int(vv) for vv in split_line[1::2]])
return dates, values
[docs]
def list_files(name, tag, inst_id, data_path, format_str=None):
"""List local files for Kp or ap data obtained from SWPC.
Parameters
----------
name : str
Instrument name.
tag : str
String specifying the database, expects 'def' (definitive) or 'now'
(nowcast)
inst_id : str
Specifies the instrument identification, not used.
data_path : str
Path to data directory.
format_str : str or NoneType
User specified file format. If None is specified, the default
formats associated with the supplied tags are used. (default=None)
Returns
-------
files : pysat._files.Files
A class containing the verified available files
"""
if format_str is None:
format_str = '_'.join([name, tag,
'{year:04d}-{month:02d}-{day:02d}.txt'])
files = pysat.Files.from_os(data_path=data_path, format_str=format_str)
# Pad list of files data to include most recent file under tomorrow
if not files.empty:
pds_offset = dt.timedelta(days=1)
files.loc[files.index[-1] + pds_offset] = files.values[-1]
files.loc[files.index[-1] + pds_offset] = files.values[-1]
return files