Source code for corrections.ensemble

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
The ensemble photometry detrending class.

.. codeauthor:: Derek Buzasi
.. codeauthor:: Oliver J. Hall
.. codeauthor:: Lindsey Carboneau
.. codeauthor:: Filipe Pereira
.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""

import numpy as np
import os.path
from bottleneck import nanmedian, nanstd, nansum
from timeit import default_timer
import logging
from scipy.optimize import minimize # minimize_scalar
from sklearn.neighbors import NearestNeighbors
import copy
from .plots import plt, save_figure
from .quality import CorrectorQualityFlags
from . import BaseCorrector, STATUS

#--------------------------------------------------------------------------------------------------
[docs] class EnsembleCorrector(BaseCorrector): #----------------------------------------------------------------------------------------------
[docs] def __init__(self, *args, **kwargs): """ Initialize the correction object. Parameters: *args: Arguments for the BaseCorrector class **kwargs: Keyword Arguments for the BaseCorrector class """ super().__init__(*args, **kwargs) logger = logging.getLogger(__name__) self.debug = logger.isEnabledFor(logging.DEBUG) # Cache of NearestNeighbors objects that will be filled by the get_nearest_neighbors method: self._nearest_neighbors = {}
#----------------------------------------------------------------------------------------------
[docs] def get_nearest_neighbors(self, lc, n_neighbors): """ Find the nearest neighbors to the given target in pixel-space. Parameters: lc (:class:`lightkurve.TessLightCurve`): Lightcurve object for target obtained from :func:`load_lightcurve`. n_neighbors (int): Number of targets to return. Returns: list: List of `priority` identifiers of the `n_neighbors` nearest stars. These values can be passed directly to :func:`load_lightcurve` to load the lightcurves of the targets. .. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk> """ sector = lc.sector camera = lc.camera ccd = lc.ccd cadence = lc.meta['task']['cadence'] key = (sector, cadence, camera, ccd) # Check if the NearestNeighbors object already exists for this camera and CCD. # If not create it, and store it for later use. if key not in self._nearest_neighbors: # StarID, pixel positions are retrieved from the database: select_params = ["todolist.priority", "pos_row", "pos_column"] search_params = [ 'status={:d}'.format(STATUS.OK.value), # Only including targets with status=OK from photometry "method_used='aperture'", # Only including aperature photometry targets f"camera={camera:d}", f"ccd={ccd:d}", f"sector={sector:d}", f"cadence={cadence:d}", "mean_flux>0" ] db_raw = self.search_database(select=select_params, search=search_params) priority = np.asarray([row['priority'] for row in db_raw], dtype='int64') pixel_coords = np.array([[row['pos_row'], row['pos_column']] for row in db_raw]) # Create the NearestNeighbors object: nn = NearestNeighbors(n_neighbors=n_neighbors+1, metric='euclidean') nn.fit(pixel_coords) # Save the NearestNeighbors object and the corresponding list of priorities # to the class variable, so it can be reused the next time a target is requested # on the same camera and CCD: self._nearest_neighbors[key] = {'nn': nn, 'priority': priority} # Get the NearestNeighbor object for the given camera and CCD: nn = self._nearest_neighbors[key]['nn'] priority = self._nearest_neighbors[key]['priority'] # Pixel coordinates of the target: X = np.array([[lc.meta['task']['pos_row'], lc.meta['task']['pos_column']]]) # Use the NearestNeighbor object to find the targets closest to the main target: distance_index = nn.kneighbors(X, n_neighbors=min(n_neighbors+1, len(priority)), return_distance=False) nearby_stars = priority[distance_index.flatten()] # Remove the main target from the list, if it is included: indx = (nearby_stars != lc.meta['task']['priority']) nearby_stars = nearby_stars[indx] # Return the list of nearby stars: logger = logging.getLogger(__name__) logger.debug(nearby_stars) return nearby_stars
#----------------------------------------------------------------------------------------------
[docs] def add_ensemble_member(self, lc, next_star_lc): """ Add a given target to the ensemble list Parameters: lc (:class:`lightkurve.TessLightCurve`): Lightcurve for target obtained from :func:`load_lightcurve`. next_star_lc (:class:`lightkurve.TessLightCurve`): Lightcurve for star to add to ensemble obtained from :func:`load_lightcurve`. Returns: tuple: - ndarray: Lightcurve (flux) to add to ensemble. - float: Fitted background correction (B_zeta). .. codeauthor:: Lindsey Carboneau .. codeauthor:: Derek Buzasi """ # Median subtracted flux of target and ensemble candidate target_flux_median = nanmedian(lc.flux) mtarget_flux = lc.flux - target_flux_median ens_flux = next_star_lc.flux ensemble_flux_median = nanmedian(ens_flux) mens_flux = ens_flux - ensemble_flux_median # 2 sigma ens2sig = 2 * nanstd(mens_flux) targ2sig = 2 * nanstd(mtarget_flux) # absolute balue abstarg = np.absolute(mtarget_flux) absens = np.absolute(mens_flux) # sigma clip the flux used to fit, but don't use that flux again with np.errstate(invalid='ignore'): clip_target_flux = np.where((abstarg < targ2sig) & (absens < ens2sig), mtarget_flux, np.NaN) clip_ens_flux = np.where((abstarg < targ2sig) & (absens < ens2sig), mens_flux, np.NaN) # Define function to be minimized to obtain background-offset correction: clip_ens_abs_flux = clip_ens_flux + ensemble_flux_median term1 = (clip_target_flux + target_flux_median) / nanmedian(clip_target_flux + target_flux_median) def func1(scaleK): temp = term1 - ((clip_ens_abs_flux + scaleK)/nanmedian(clip_ens_abs_flux + scaleK)) return nansum((temp - nanmedian(temp))**2) res = minimize(func1, 100, method='Powell') bzeta = float(res.x) # Sanity checks: if not res.success: raise RuntimeError('Bzeta: Minimization not successful: ' + res.message) ens_flux = ens_flux + bzeta return ens_flux/nanmedian(ens_flux), bzeta
#----------------------------------------------------------------------------------------------
[docs] def apply_ensemble(self, lc, lc_ensemble, lc_corr): """ Apply the ensemble correction method to the target light curve Parameters: lc (:class:`lightkurve.TessLightCurve`): Lightcurve object for target obtained from :func:`load_lightcurve`. lc_ensemble (list): List of ensemble members flux as ndarrays lc_corr (`TESSLightCurve` object): Lightcurve object which stores the ensemble corrected flux values. Returns: :class:`lightkurve.TessLightCurve`: The updated object with corrected flux values. .. codeauthor:: Lindsey Carboneau .. codeauthor:: Derek Buzasi """ # Calculate the median of all the ensemble lightcurves for each timestamp: lc_medians = nanmedian(np.asarray(lc_ensemble), axis=0) def func2(scalef): num1 = nansum(np.abs(np.diff(lc.flux / (lc_medians + scalef)))) denom1 = nanmedian(lc.flux / (lc_medians + scalef)) return num1/denom1 res = minimize(func2, 1.0) # , method='Powell' k_corr = float(res.x) # Sanity checks: if not res.success: logger = logging.getLogger(__name__) logger.warning('Sanity check: Minimization not successful: %s', res.message) #raise RuntimeError('Sanity check: Minimization not successful: ' + res.message) # Correct the lightcurve: lc_corr /= k_corr + lc_medians lc_corr /= nanmedian(lc_corr.flux) lc_corr *= nanmedian(lc.flux) return lc_corr
#----------------------------------------------------------------------------------------------
[docs] def do_correction(self, lc): """ Function that takes all input stars for a sector and uses them to find a detrending function using ensemble photometry for a star 'star_names[ifile]', where ifile is the index for the star in the star_array and star_names list. Parameters: lc (:class:`lightkurve.TessLightCurve`): Raw lightcurve stored in a TessLightCurve object. Returns: :class:`lightkurve.TessLightCurve`: Corrected lightcurve stored in a TessLightCurve object. :class:`STATUS`: The status of the correction. """ logger = logging.getLogger(__name__) logger.info("DataSource=%s, Cadence=%d", lc.meta['task']['datasource'], lc.meta['task']['cadence']) # Settings: drange_lim = 1.0 # Limit on differenced range - not in log10! drange_relfactor = 10 # Limit on differenced range, relative to target d.range. frange_lim = 0.4 # Limit on flux range. min_star_count = 5 # Minimum number of stars to have in the ensemble. max_neighbors = 100 # Maximal number of neighbors to check. # Clean up the lightcurve by removing nans and ignoring data points with bad quality flags # these values need to be removed, or they will affect the ensemble later lc_quality_mask = ~CorrectorQualityFlags.filter(lc.quality) lc.flux[lc_quality_mask] = np.NaN lc_corr = lc.copy() # Set up basic statistical parameters for the light curves. # frange is the light curve range from the 5th to the 95th percentile, # drange is the relative standard deviation of the differenced light curve (to whiten the noise) target_flux_median = lc.meta['task']['mean_flux'] target_frange = np.diff(np.nanpercentile(lc.flux, [5, 95])) / target_flux_median target_drange = nanstd(np.diff(lc.flux)) / target_flux_median logger.debug("Main target: drange=%f, frange=%f", target_drange, target_frange) logger.debug("lc size: %f", len(lc.flux)) # Query for a list of the nearest neigbors to test: nearby_stars = self.get_nearest_neighbors(lc, max_neighbors) logger.debug("Nearby stars: %s", nearby_stars) # Define variables to use in the loop to build the ensemble of stars # List of star indexes to be included in the ensemble temp_list = [] lc_ensemble = [] # Test values store current best correction for testing vs. additional ensemble members # NOTE: this might be done more efficiently with further functionalization of the loop below and updated loop bounds/conditions! test_corr = [] test_ens = [] test_list = [] fom = np.nan # Start loop to build ensemble ensemble_start = default_timer() # Loop through the neighbors to build the ensemble: for next_star_index in nearby_stars: # Get lightkurve for next star closest to target: next_star_lc = self.load_lightcurve(next_star_index) # Remove bad points by setting them to NaN: next_star_lc_quality_mask = ~CorrectorQualityFlags.filter(next_star_lc.quality) next_star_lc.flux[next_star_lc_quality_mask] = np.NaN # Compute the rest of the statistical parameters for the next star to be added to the ensemble. frange = np.diff(np.nanpercentile(next_star_lc.flux, [5, 95])) / next_star_lc.meta['task']['mean_flux'] drange = nanstd(np.diff(next_star_lc.flux)) / next_star_lc.meta['task']['mean_flux'] logger.debug("drange=%f, frange=%f", drange, frange) logger.debug("lc size: %f", len(next_star_lc)) # Stars are added to ensemble if they fulfill the requirements. These are (1) drange less than min_range, (2) drange less than 10 times the # drange of the target (to ensure exclusion of relatively noisy stars), and frange less than 0.03 (to exclude highly variable stars) if drange < drange_lim and drange < drange_relfactor*target_drange and frange < frange_lim: # Add the star to the ensemble: lc_add_ensemble, bzeta = self.add_ensemble_member(lc, next_star_lc) temp_list.append({'priority:': next_star_index, 'starid': next_star_lc.targetid, 'bzeta': bzeta}) lc_ensemble.append(lc_add_ensemble) # Pause the loop if we have reached the desired number of stars, and check the correction: if len(temp_list) >= min_star_count: if np.isnan(fom): # the first time we hit the minimum ensemble size, try to correct the target # storing all these as 'test' - we'll revert to these values if adding the next star doesn't 'surpass the test' # `deepcopy` because otherwise it uses pointers and overwrites the value we need test_corr = self.apply_ensemble(lc, lc_ensemble, lc_corr) test_ens = copy.deepcopy(lc_ensemble) test_list = copy.deepcopy(temp_list) test_fom = nansum(np.abs(np.diff(lc_corr.flux))) fom = copy.deepcopy(test_fom) ######## # NOTE: I think this can be more efficiently, but I'm scared to 'fix' what is currently working - LC ######## else: # see if one more member makes the ensemble 'surpass the test' lc_corr = self.apply_ensemble(lc, lc_ensemble, lc_corr) fom = nansum(np.abs(np.diff(lc_corr.flux))) if fom < test_fom: # the correction "got better" (less noisy) so update and try against another additional member test_corr = lc_corr test_ens = lc_ensemble test_list = temp_list test_fom = fom else: # adding the next neighbor did not improve the correction, so we're done lc_corr = test_corr lc_ensemble = test_ens temp_list = test_list break # Ensure that we reached the minimum number of stars in the ensemble: if len(temp_list) < min_star_count: logger.error("Not enough stars for ensemble") return None, STATUS.ERROR logger.info("Build ensemble, Time: %f", default_timer()-ensemble_start) logger.debug("len(lc) vs len(ens): %f vs %f", len(lc), len(lc_ensemble[0])) # Convert to parts-per-million: corr_median = nanmedian(lc_corr.flux) lc_corr = 1e6*(lc_corr/corr_median - 1) # We probably want to return additional information, including the list of stars in the ensemble, and potentially other things as well. logger.info(temp_list) self.ensemble_starlist = { 'starids': [tl['starid'] for tl in temp_list], 'bzetas': [tl['bzeta'] for tl in temp_list] } # Set additional headers for FITS output: lc_corr.meta['additional_headers']['ENS_MED'] = (corr_median, 'Median of corrected light curve before ppm') lc_corr.meta['additional_headers']['ENS_NUM'] = (len(temp_list), 'Number of targets in ensemble') lc_corr.meta['additional_headers']['ENS_DLIM'] = (drange_lim, 'Limit on differenced range metric') lc_corr.meta['additional_headers']['ENS_DREL'] = (drange_relfactor, 'Limit on relative diff. range') lc_corr.meta['additional_headers']['ENS_RLIM'] = (frange_lim, 'Limit on flux range metric') # Set some meta-data which is used in diagnostics only: lc_corr.meta['FOM'] = test_fom #------------------------------------------------------------------------------------------ if self.plot and self.debug: fig = plt.figure() ax = fig.add_subplot(111) ax.plot(lc.time, nanmedian(np.asarray(lc_ensemble), axis=0), label='Medians') save_figure(os.path.join(self.plot_folder(lc), 'ensemble_lc_medians'), fig=fig) plt.close(fig) logger.debug("Status of correction: %d", np.sum(np.isfinite(lc_corr.flux))) return lc_corr, STATUS.OK