#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Data Validation module for TASOC Pipeline.
.. codeauthor:: Mikkel N. Lund <mikkelnl@phys.au.dk>
.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""
import os
import logging
import numpy as np
import sqlite3
from tqdm import tqdm
import itertools
# Plotting:
from .plots import matplotlib as mpl, plots_interactive
# Data Validation methods in separate sub-packages:
from .noise_metrics import noise_metrics
from .pixinaperture import pixinaperture
from .stampsize import stampsize
from .contam import contam
from .mag_dist import mag_dist
from .mag2flux import mag2flux
from .cleanup import cleanup
from .camera_overlap import camera_overlap
from .calctime import calctime, calctime_corrections
from .waittime import waittime
from .haloswitch import haloswitch
# Local packages:
from .status import STATUS
from .quality import DatavalQualityFlags
from .utilities import CounterFilter, find_lightcurve_files
from .version import get_version
#--------------------------------------------------------------------------------------------------
[docs]
class DataValidation(object):
[docs]
def __init__(self, todo_file, output_folder=None, corr=False, validate=True,
colorbysector=False, ext='png', showplots=False, sysnoise=5.0):
"""
Initialize DataValidation object.
Parameters:
todo_file (str): TODO-file or directory to load from.
output_folder (str): Directory in which to place output.
corr (bool): Include corrected data checks. Default is False.
validate (bool): Save validation in TODO-file. Default is True.
colorbysector (bool): Color relevant plots by sector instead of the defaults.
ext (str): File-extension of plots. Default is ``'png'``.
showplots (bool): Show plots? Defaults is ``False``.
sysnoise (float): Systematic noise level for noise model. Default is 5 ppm/hr.
.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""
logger = logging.getLogger('dataval')
# Store inputs:
if os.path.isdir(todo_file):
# If it was just a directory, then append the default todo-file:
self.input_folder = os.path.abspath(todo_file)
todo_file = os.path.join(self.input_folder, 'todo.sqlite')
else:
self.input_folder = os.path.abspath(os.path.dirname(todo_file))
self.extension = ext
self.show = showplots
self.outfolder = output_folder
self.sysnoise = sysnoise
self.doval = validate
self.color_by_sector = colorbysector
self.corr = corr
self.corr_method = None
# Other settings:
self.random_seed = 2187
self._random_state = None
if self.corr:
self.dataval_table = 'datavalidation_corr'
subdir = 'corr'
else:
self.dataval_table = 'datavalidation_raw'
subdir = 'raw'
if not self.doval:
self.dataval_table += '_temp'
logfilename = 'dataval.log'
else:
logfilename = 'dataval_save.log'
# Make sure it is an absolute path:
todo_file = os.path.abspath(todo_file)
logger.info("Loading input data from '%s'", todo_file)
if not os.path.isfile(todo_file):
raise FileNotFoundError(f"TODO file not found: '{todo_file}'")
# Open the SQLite file:
self.conn = sqlite3.connect(todo_file)
self.conn.row_factory = sqlite3.Row
self.cursor = self.conn.cursor()
self.cursor.execute("PRAGMA foreign_keys=ON;")
# Check if corrections have been run:
self.cursor.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='diagnostics_corr';")
self.corrections_done = bool(self.cursor.fetchone()[0] == 1)
if self.corr and not self.corrections_done:
self.close()
raise ValueError("Can not run dataval on corr when corrections have not been run")
# Add method_used to the diagnostics table if it doesn't exist:
self.cursor.execute("PRAGMA table_info(diagnostics)")
if 'method_used' not in [r['name'] for r in self.cursor.fetchall()]:
# Since this one is NOT NULL, we have to do some magic to fill out the
# new column after creation, by finding keywords in other columns.
# This can be a pretty slow process, but it only has to be done once.
logger.debug("Adding method_used column to diagnostics")
self.cursor.execute("ALTER TABLE diagnostics ADD COLUMN method_used TEXT NOT NULL DEFAULT 'aperture';")
for m in ('aperture', 'halo', 'psf', 'linpsf'):
self.cursor.execute("UPDATE diagnostics SET method_used=? WHERE priority IN (SELECT priority FROM todolist WHERE method=?);", [m, m])
self.cursor.execute("UPDATE diagnostics SET method_used='halo' WHERE method_used='aperture' AND errors LIKE '%Automatically switched to Halo photometry%';")
self.conn.commit()
# Add the CADENCE column to todolist, if it doesn't exist:
# This is only for backwards compatibility.
self.cursor.execute("PRAGMA table_info(todolist)")
existing_columns = [r['name'] for r in self.cursor.fetchall()]
if 'cadence' not in existing_columns:
logger.debug("Adding CADENCE column to todolist")
self.cursor.execute("ALTER TABLE todolist ADD COLUMN cadence INTEGER DEFAULT NULL;")
self.cursor.execute("UPDATE todolist SET cadence=1800 WHERE datasource='ffi' AND sector < 27;")
self.cursor.execute("UPDATE todolist SET cadence=600 WHERE datasource='ffi' AND sector >= 27 AND sector <= 55;")
self.cursor.execute("UPDATE todolist SET cadence=120 WHERE datasource!='ffi' AND sector < 27;")
self.cursor.execute("SELECT COUNT(*) AS antal FROM todolist WHERE cadence IS NULL;")
if self.cursor.fetchone()['antal'] > 0:
self.close()
raise ValueError("TODO-file does not contain CADENCE information and it could not be determined automatically. Please recreate TODO-file.")
self.conn.commit()
# Get the corrector that was run on this TODO-file, if the corr_settings table is available:
if self.corr:
self.cursor.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='corr_settings';")
if self.cursor.fetchone()[0] == 1:
self.cursor.execute("SELECT corrector FROM corr_settings LIMIT 1;")
row = self.cursor.fetchone()
if row is not None and row['corrector'] is not None:
self.corr_method = row['corrector'].strip()
subdir += '-' + row['corrector'].strip()
# Create table for data-validation:
# Depending if we are saving the results or not (self.doval) we are creating
# it either as a real table, or as a TEMPORARY table, which will only exist in memory
# as long as the database connection is opened.
logger.info("Creating datavalidation table...")
self.cursor.execute('DROP TABLE IF EXISTS ' + self.dataval_table + ';')
if self.doval:
self.cursor.execute("CREATE TABLE IF NOT EXISTS " + self.dataval_table + """ (
priority INTEGER PRIMARY KEY ASC NOT NULL,
dataval INTEGER NOT NULL DEFAULT 0,
approved BOOLEAN,
FOREIGN KEY (priority) REFERENCES todolist(priority) ON DELETE CASCADE ON UPDATE CASCADE
);""")
else:
# Temporary tables can not use foreign keys to real tables, which is why
# we are not putting in the same foreign key here.
self.cursor.execute("CREATE TEMPORARY TABLE " + self.dataval_table + """ (
priority INTEGER PRIMARY KEY ASC NOT NULL,
dataval INTEGER NOT NULL DEFAULT 0,
approved BOOLEAN
);""")
# Fill out the table with zero dataval and NULL in approved:
logger.info("Initializing datavalidation table...")
self.cursor.execute("INSERT INTO " + self.dataval_table + " (priority) SELECT priority FROM todolist;")
self.conn.commit()
# Create a couple of indicies on the two columns:
logger.info("Creating indicies on datavalidation table...")
self.cursor.execute("CREATE INDEX IF NOT EXISTS " + self.dataval_table + "_approved_idx ON " + self.dataval_table + " (approved);")
self.cursor.execute("CREATE INDEX IF NOT EXISTS " + self.dataval_table + "_dataval_idx ON " + self.dataval_table + " (dataval);")
self.conn.commit()
logger.info("Creating lightcurve indicies...")
self.cursor.execute("CREATE INDEX IF NOT EXISTS diagnostics_lightcurve_idx ON diagnostics (lightcurve);")
if self.corrections_done:
self.cursor.execute("CREATE INDEX IF NOT EXISTS diagnostics_corr_lightcurve_idx ON diagnostics_corr (lightcurve);")
self.conn.commit()
# Create output directory:
if self.outfolder is None:
self.outfolder = os.path.join(self.input_folder, 'data_validation', subdir)
os.makedirs(self.outfolder, exist_ok=True)
logger.info("Putting output data in '%s'", self.outfolder)
# Also write any logging output to the
formatter = logging.Formatter('%(asctime)s - %(levelname)-7s - %(funcName)-10.10s - %(message)s')
self._filehandler = logging.FileHandler(os.path.join(self.outfolder, logfilename), mode='w')
self._filehandler.setFormatter(formatter)
self._filehandler.setLevel(logging.INFO)
logger.addHandler(self._filehandler)
# Add a CounterFilter to the logger, which will count the number of log-records
# being passed through the logger. Can be used to count the number of errors/warnings:
self._counterfilter = CounterFilter()
logger.addFilter(self._counterfilter)
# Log the version of the data validation being run:
logger.info("Data Validation version: %s", get_version())
# Write to log if we are saving or not:
if self.doval:
logger.info("Saving final validations in TODO-file.")
else:
logger.info("Not saving final validations in TODO-file.")
# Get the range of Tmags in the tables:
tmag_limits = self.search_database(select=['MIN(tmag) AS tmag_min', 'MAX(tmag) AS tmag_max'])[0]
self.tmag_limits = (tmag_limits['tmag_min']-0.5, tmag_limits['tmag_max']+0.5)
# Get the distinct list of available cadences:
self.cadences = self.search_database(select='cadence', distinct=True, order_by='cadence')
self.cadences = [int(cad['cadence']) for cad in self.cadences]
# Plot settings:
if self.show:
plots_interactive()
mpl.style.use(os.path.join(os.path.dirname(__file__), 'dataval.mplstyle'))
mpl.rcParams['savefig.format'] = self.extension
#----------------------------------------------------------------------------------------------
def __enter__(self):
return self
#----------------------------------------------------------------------------------------------
def __exit__(self, *args):
self.close()
#----------------------------------------------------------------------------------------------
def __del__(self):
self.close()
#----------------------------------------------------------------------------------------------
[docs]
def close(self):
"""Close DataValidation object and all associated objects."""
mpl.style.use('default')
if hasattr(self, 'cursor'):
try:
self.cursor.close()
except sqlite3.ProgrammingError:
pass
if hasattr(self, 'conn'):
self.conn.close()
# Close the logging FileHandler:
if hasattr(self, '_filehandler'):
logger = logging.getLogger('dataval')
self._filehandler.close()
logger.removeHandler(self._filehandler)
#----------------------------------------------------------------------------------------------
@property
def logcounts(self):
return self._counterfilter.counter
#----------------------------------------------------------------------------------------------
@property
def random_state(self):
if self._random_state is None:
self._random_state = np.random.default_rng(self.random_seed)
return self._random_state
#----------------------------------------------------------------------------------------------
[docs]
def search_database(self, select=None, search=None, order_by=None, limit=None, distinct=False,
joins=None):
"""
Search list of lightcurves and return a list of tasks/stars matching the given criteria.
Parameters:
search (list of strings or None): Conditions to apply to the selection of stars
from the database.
order_by (list, string or None): Column to order the database output by.
limit (int or None): Maximum number of rows to retrieve from the database.
If limit is None, all the rows are retrieved.
distinct (boolean): Boolean indicating if the query should return unique elements only.
Returns:
list of dicts: Returns all stars retrieved by the call to the database as dicts/tasks
that can be consumed directly by load_lightcurve
.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""
logger = logging.getLogger('dataval')
# Which columns to select from the tables:
if select is None:
select = '*'
elif isinstance(select, (list, tuple)):
select = ",".join(select)
# Search constraints:
# Default is to only pass through targets that made the last step successfully
default_search = ['status IN (1,3)']
if self.corr:
default_search.append('corr_status IN (1,3)')
if isinstance(search, str): search = [search,]
search = default_search if search is None else default_search + search
search = "WHERE " + " AND ".join(search)
if order_by is None:
order_by = ''
elif isinstance(order_by, (list, tuple)):
order_by = " ORDER BY " + ",".join(order_by)
elif isinstance(order_by, str):
order_by = " ORDER BY " + order_by
limit = '' if limit is None else " LIMIT %d" % limit
# Which tables to join together:
default_joins = ['INNER JOIN diagnostics ON todolist.priority=diagnostics.priority']
if self.corrections_done:
default_joins.append('LEFT JOIN diagnostics_corr ON todolist.priority=diagnostics_corr.priority')
joins = default_joins if joins is None else default_joins + joins
# Create query:
query = "SELECT {distinct:s}{select:s} FROM todolist {joins:s} {search:s}{order_by:s}{limit:s};".format(
distinct='DISTINCT ' if distinct else '',
select=select,
joins=' '.join(joins),
search=search,
order_by=order_by,
limit=limit
)
# Ask the database:
logger.debug("Running query: %s", query)
self.cursor.execute(query)
return [dict(row) for row in self.cursor.fetchall()]
#----------------------------------------------------------------------------------------------
[docs]
def update_dataval(self, priorities, values):
"""
Update data validation table in database.
Parameters:
priorities (array): Array of priorities.
values (array): Array of data validation flags to be assigned each corresponding priority.
.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""
logger = logging.getLogger('dataval')
values = np.asarray(values, dtype='int32')
v = [(int(val), int(pri)) for pri, val in zip(priorities, values) if val != 0]
if v:
self.cursor.executemany("UPDATE " + self.dataval_table + " SET dataval=(dataval | ?) WHERE priority=?;", v)
self.conn.commit()
logger.info("Updated %d entries of %d possible.", self.cursor.rowcount, len(v))
else:
logger.info("Nothing to update.")
#----------------------------------------------------------------------------------------------
[docs]
def validate(self):
"""
Run all validations and write out summary.
.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""
logger = logging.getLogger('dataval')
logger.info('--------------------------------------------------------')
# Run the cleanup as the first, since this may actually change things:
self.cleanup()
# Run all the validation subroutines:
self.basic()
self.mag2flux()
self.pixinaperture()
self.stampsize()
self.contam()
self.noise_metrics()
self.mag_dist()
self.calctime()
self.calctime_corrections()
self.waittime()
self.haloswitch()
self.camera_overlap()
# All the data validation flags are now saved in the database table, so let's combine
# them and mark which targets should be approved:
logger.info('--------------------------------------------------------')
logger.info("Setting approved flags...")
self.cursor.execute("UPDATE " + self.dataval_table + " SET approved=1 WHERE dataval=0;")
self.cursor.execute("UPDATE " + self.dataval_table + " SET approved=(dataval & %d = 0) WHERE dataval > 0;" % DatavalQualityFlags.DEFAULT_BITMASK)
self.cursor.execute("UPDATE " + self.dataval_table + " SET approved=0 WHERE priority IN (SELECT priority FROM todolist WHERE status NOT IN ({ok:d},{warning:d}));".format(
ok=STATUS.OK.value,
warning=STATUS.WARNING.value,
))
if self.corr:
self.cursor.execute("UPDATE " + self.dataval_table + " SET approved=0 WHERE priority IN (SELECT priority FROM todolist WHERE corr_status NOT IN ({ok:d},{warning:d}));".format(
ok=STATUS.OK.value,
warning=STATUS.WARNING.value,
))
self.conn.commit()
# Check that all entries have been set:
self.cursor.execute("SELECT COUNT(*) AS antal FROM " + self.dataval_table + " WHERE approved IS NULL;")
if self.cursor.fetchone()['antal'] > 0:
logger.error("Not all approved were set")
# Write out summary of validations
logger.info('--------------------------------------------------------')
logger.info("Summary of approved and rejected targets:")
for camera, ccd in itertools.product((1,2,3,4), (1,2,3,4)):
self.cursor.execute("SELECT COUNT(*) AS antal FROM todolist INNER JOIN " + self.dataval_table + " ON todolist.priority=" + self.dataval_table + ".priority WHERE status!=? AND camera=? AND ccd=? AND approved=1;", (
STATUS.SKIPPED.value,
camera,
ccd
))
count_approved = self.cursor.fetchone()['antal']
self.cursor.execute("SELECT COUNT(*) AS antal FROM todolist INNER JOIN " + self.dataval_table + " ON todolist.priority=" + self.dataval_table + ".priority WHERE status!=? AND camera=? AND ccd=? AND approved=0;", (
STATUS.SKIPPED.value,
camera,
ccd
))
count_notapproved = self.cursor.fetchone()['antal']
percent = 100*count_notapproved/(count_notapproved + count_approved)
logger.info(" CAMERA=%d, CCD=%d: %.2f%% (%d rejected, %d approved)", camera, ccd, percent, count_notapproved, count_approved)
self.cursor.execute("SELECT COUNT(*) AS antal FROM todolist INNER JOIN " + self.dataval_table + " ON todolist.priority=" + self.dataval_table + ".priority WHERE status!={skipped:d} AND approved=1;".format(
skipped=STATUS.SKIPPED.value
))
count_total_approved = self.cursor.fetchone()['antal']
self.cursor.execute("SELECT COUNT(*) AS antal FROM todolist INNER JOIN " + self.dataval_table + " ON todolist.priority=" + self.dataval_table + ".priority WHERE status!={skipped:d} AND approved=0;".format(
skipped=STATUS.SKIPPED.value
))
count_total_notapproved = self.cursor.fetchone()['antal']
percent = 100*count_total_notapproved/(count_total_notapproved + count_total_approved)
logger.info(" TOTAL: %.2f%% (%d rejected, %d approved)", percent, count_total_notapproved, count_total_approved)
logger.info("Reasons for rejections:")
for b in range(14): # TODO: Loop over DatavalQualityFlags instead - requires it to be a real enum
flag = 2**b
# Only show flags that cause rejection:
if flag & DatavalQualityFlags.DEFAULT_BITMASK == 0:
continue
# Count the number of targets where the flag is set:
self.cursor.execute("SELECT COUNT(*) AS antal FROM todolist INNER JOIN " + self.dataval_table + " ON todolist.priority=" + self.dataval_table + ".priority WHERE status IN (:ok,:warning) AND dataval > 0 AND dataval & :dataval != 0;", {
'ok': STATUS.OK.value,
'warning': STATUS.WARNING.value,
'dataval': flag
})
count_flag = self.cursor.fetchone()['antal']
percent = 100*count_flag/count_total_notapproved
logger.info(" %s: %d (%.2f%%)", flag, count_flag, percent)
logger.info('--------------------------------------------------------')
#----------------------------------------------------------------------------------------------
[docs]
def basic(self, errors_ratio_warn=0.05, errors_ratio_err=0.10):
"""
Perform basic checks of the TODO-file and the lightcurve files.
Parameters:
errors_ratio_warn (float, optional): Fraction of ERRORs to OK and WARNINGs
to warn about. Default=5%.
errors_ratio_err (float, optional): Fraction of ERRORs to OK and WARNINGs
to throw error about. Default=10%.
.. codeauthor:: Rasmus Handberg <rasmush@phys.au.dk>
"""
logger = logging.getLogger('dataval')
logger.info('Testing basics...')
tqdm_settings = {'disable': None if logger.isEnabledFor(logging.INFO) else True}
# Status that we should check for in the database. They should not be present if the
# processing was completed correctly:
bad_status = str(STATUS.UNKNOWN.value) + ',' + str(STATUS.STARTED.value) + ',' + str(STATUS.ABORT.value)
# Check the status of the photometry:
self.cursor.execute("SELECT COUNT(*) FROM todolist WHERE status IS NULL OR status IN (" + bad_status + ");")
rowcount = self.cursor.fetchone()[0]
if rowcount:
logger.error("%d entries have not had PHOTOMETRY run", rowcount)
else:
logger.info("All PHOTOMETRY has been run.")
# Summary of all photometry status:
logger.info("Summary of photometry status:")
self.cursor.execute("SELECT status,COUNT(*) AS antal FROM todolist WHERE status NOT IN ({ok:d},{warning:d}) GROUP BY status;".format(
ok=STATUS.OK.value,
warning=STATUS.WARNING.value
))
total_bad_phot_status = 0
for sta in self.cursor.fetchall():
total_bad_phot_status += sta['antal']
logger.info(" %s: %d", STATUS(sta['status']).name, sta['antal'])
logger.info(" TOTAL: %d", total_bad_phot_status)
# Warn if it seems that there is a large number of ERROR, compared to OK and WARNING:
logger.info("Checking number of photometry errors:")
for camera, ccd in itertools.product((1,2,3,4), (1,2,3,4)):
self.cursor.execute("SELECT COUNT(*) FROM todolist WHERE status IN ({ok:d},{warning:d}) AND camera={camera:d} AND ccd={ccd:d};".format(
ok=STATUS.OK.value,
warning=STATUS.WARNING.value,
camera=camera,
ccd=ccd
))
count_good = self.cursor.fetchone()[0]
self.cursor.execute("SELECT COUNT(*) FROM todolist WHERE status={error:d} AND camera={camera:d} AND ccd={ccd:d};".format(
error=STATUS.ERROR.value,
camera=camera,
ccd=ccd
))
count_errors = self.cursor.fetchone()[0]
ratio = count_errors/(count_good + count_errors) if count_good + count_errors > 0 else 0
if ratio > errors_ratio_warn or ratio > errors_ratio_err:
loglevel = logging.ERROR if ratio > errors_ratio_err else logging.WARNING
logger.log(loglevel, " CAMERA=%d, CCD=%d: High number of errors detected: %.2f%% (%d errors, %d good)",
camera, ccd, 100*ratio, count_errors, count_good)
else:
logger.info(" CAMERA=%d, CCD=%d: %.2f%% (%d errors, %d good)",
camera, ccd, 100*ratio, count_errors, count_good)
# Check that everything that should have, has a diagnostics entry:
# Ignore status=SKIPPED, since these will not have a diagnostics entry.
self.cursor.execute("SELECT * FROM todolist LEFT JOIN diagnostics ON todolist.priority=diagnostics.priority WHERE diagnostics.priority IS NULL AND status != {skipped:d};".format(
skipped=STATUS.SKIPPED
))
rowcount = len(self.cursor.fetchall())
logger.log(logging.ERROR if rowcount else logging.INFO, "%d entries missing in DIAGNOSTICS", rowcount)
# Check photometry_skipped table. All stars marked as SKIPPED in photometry should
# have an entry explaining which target that was responsible for it being skipped:
# NOTE: This will currently fail for most TODO-files due to a bug/feature in the photometry
# code, where an entry is not created in all cases.
self.cursor.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='photometry_skipped';")
if self.cursor.fetchone()[0] == 1:
self.cursor.execute("SELECT COUNT(*) FROM todolist LEFT JOIN photometry_skipped ON todolist.priority=photometry_skipped.priority WHERE status={skipped:d} AND photometry_skipped.priority IS NULL;".format(
skipped=STATUS.SKIPPED
))
rowcount = self.cursor.fetchone()[0]
# TODO: For now, this is just a warning, because there is a known bug in photometry
# causing this to often having non-zero number of missing entries.
logger.log(logging.WARNING if rowcount else logging.INFO, "%d entries missing in PHOTOMETRY_SKIPPED", rowcount)
else:
logger.warning("PHOTOMETRY_SKIPPED table not found!")
# Check the status of corrections:
if self.corr:
self.cursor.execute("SELECT COUNT(*) FROM todolist WHERE corr_status IS NULL OR corr_status IN (" + bad_status + ");")
rowcount = self.cursor.fetchone()[0]
if rowcount:
logger.error("%d entries have not had CORRECTIONS run", rowcount)
else:
logger.info("All CORRECTIONS have been run.")
# Warn if it seems that there is a large number of ERROR, compared to OK and WARNING:
logger.info("Checking number of correction errors:")
for camera, ccd in itertools.product((1,2,3,4), (1,2,3,4)):
self.cursor.execute("SELECT COUNT(*) FROM todolist WHERE corr_status IN ({ok:d},{warning:d}) AND camera={camera:d} AND ccd={ccd:d};".format(
ok=STATUS.OK.value,
warning=STATUS.WARNING.value,
camera=camera,
ccd=ccd
))
count_good = self.cursor.fetchone()[0]
self.cursor.execute("SELECT COUNT(*) FROM todolist WHERE corr_status={error:d} AND camera={camera:d} AND ccd={ccd:d};".format(
error=STATUS.ERROR.value,
camera=camera,
ccd=ccd
))
count_errors = self.cursor.fetchone()[0]
ratio = count_errors/(count_good + count_errors) if count_good + count_errors > 0 else 0
if ratio > errors_ratio_warn or ratio > errors_ratio_err:
loglevel = logging.ERROR if ratio > errors_ratio_err else logging.WARNING
logger.log(loglevel, " CAMERA=%d, CCD=%d: High number of errors detected: %.2f%% (%d errors, %d good)",
camera, ccd, 100*ratio, count_errors, count_good)
else:
logger.info(" CAMERA=%d, CCD=%d: %.2f%% (%d errors, %d good)",
camera, ccd, 100*ratio, count_errors, count_good)
# Check that everything that should have, has a diagnostics_corr entry:
# Ignore status=SKIPPED, since these will not have a diagnostics_corr entry.
self.cursor.execute("SELECT * FROM todolist LEFT JOIN diagnostics_corr ON todolist.priority=diagnostics_corr.priority WHERE diagnostics_corr.priority IS NULL AND corr_status != {skipped:d};".format(
skipped=STATUS.SKIPPED
))
rowcount = len(self.cursor.fetchall())
logger.log(logging.ERROR if rowcount else logging.INFO, "%d entries missing in DIAGNOSTICS_CORR", rowcount)
# Check for specific errors that should be flagged:
# Patterns can contain wildcards (% or _):
specific_errors = [
'FileNotFoundError',
'sqlite3.%',
'TargetNotFoundError', # custom "error" set in photometry.TaskManager.save_result
'TypeError',
'Could not save lightcurve file'
]
logger.info("Checking for specific errors...")
tbls = ('diagnostics', 'diagnostics_corr') if self.corr else ('diagnostics',)
for tbl, keyword in itertools.product(tbls, specific_errors):
self.cursor.execute('SELECT COUNT(*) FROM ' + tbl + ' WHERE errors IS NOT NULL AND errors LIKE "%' + keyword + ': %";')
count_specificerror = self.cursor.fetchone()[0]
logger.log(logging.ERROR if count_specificerror else logging.INFO,
" %s (%s): %d", keyword, tbl, count_specificerror)
# Check if any raw lightcurve files are missing:
logger.info("Checking if any raw lightcurve files are missing...")
missing_phot_lightcurves = 0
missing_phot_lightcurves_list = os.path.join(self.outfolder, 'missing_raw.txt')
with open(missing_phot_lightcurves_list, 'w') as fid:
self.cursor.execute("SELECT todolist.priority,lightcurve FROM todolist LEFT JOIN diagnostics ON todolist.priority=diagnostics.priority WHERE status IN (1,3);")
for row in tqdm(self.cursor.fetchall(), **tqdm_settings):
if row['lightcurve'] is None or \
not os.path.isfile(os.path.join(self.input_folder, row['lightcurve'])) or \
os.path.getsize(os.path.join(self.input_folder, row['lightcurve'])) == 0:
missing_phot_lightcurves += 1
fid.write("{priority:6d} {lightcurve:s}\n".format(**row))
if missing_phot_lightcurves == 0:
logger.info("All photometry lightcurves avaliable.")
os.remove(missing_phot_lightcurves_list)
else:
logger.error("%d missing photometry lightcurves.", missing_phot_lightcurves)
# Check of any corrected lightcurve files are missing:
if self.corr:
logger.info("Checking if any corrected lightcurve files are missing...")
missing_corr_lightcurves = 0
missing_corr_lightcurves_list = os.path.join(self.outfolder, 'missing_corr.txt')
with open(missing_corr_lightcurves_list, 'w') as fid:
self.cursor.execute("SELECT todolist.priority,diagnostics_corr.lightcurve FROM todolist LEFT JOIN diagnostics_corr ON todolist.priority=diagnostics_corr.priority WHERE corr_status IN (1,3);")
for row in tqdm(self.cursor.fetchall(), **tqdm_settings):
if row['lightcurve'] is None or \
not os.path.isfile(os.path.join(self.input_folder, row['lightcurve'])) or \
os.path.getsize(os.path.join(self.input_folder, row['lightcurve'])) == 0:
missing_corr_lightcurves += 1
fid.write("{priority:6d} {lightcurve:s}\n".format(**row))
if missing_corr_lightcurves == 0:
logger.info("All corrected lightcurves avaliable.")
os.remove(missing_corr_lightcurves_list)
else:
logger.error("%d missing corrected lightcurves.", missing_corr_lightcurves)
# Checking for leftover lightcurve files:
logger.info("Checking for any leftover orphaned lightcurve files...")
leftover_lightcurves = 0
leftover_lightcurves_list = os.path.join(self.outfolder, 'orphaned_lightcurves.txt')
with open(leftover_lightcurves_list, 'w') as fid:
logger.info(" Checking for orphaned raw lightcurves...")
for fname in tqdm(find_lightcurve_files(self.input_folder, 'tess*-tasoc_lc.fits.gz'), **tqdm_settings):
# Find relative path to find in database:
relpath = os.path.relpath(fname, self.input_folder)
logger.debug("Checking: %s", relpath)
self.cursor.execute("SELECT * FROM diagnostics WHERE lightcurve=?;", [relpath])
if self.cursor.fetchone() is None:
leftover_lightcurves += 1
fid.write(relpath + "\n")
if self.corr:
logger.info(" Checking for orphaned corrected lightcurves...")
if self.corr_method is None:
logger.error("Correction method not given")
fname_filter = {'ensemble': 'ens', 'cbv': 'cbv', 'kasoc_filter': 'kf', None: '*'}[self.corr_method]
for fname in tqdm(find_lightcurve_files(self.input_folder, 'tess*-tasoc-%s_lc.fits.gz' % fname_filter), **tqdm_settings):
# Find relative path to find in database:
relpath = os.path.relpath(fname, self.input_folder)
logger.debug("Checking: %s", relpath)
self.cursor.execute("SELECT * FROM diagnostics_corr WHERE lightcurve=?;", [relpath])
if self.cursor.fetchone() is None:
leftover_lightcurves += 1
fid.write(relpath + "\n")
if leftover_lightcurves == 0:
logger.info("No orphaned lightcurves.")
os.remove(leftover_lightcurves_list)
else:
logger.error("%d orphaned lightcurves.", leftover_lightcurves)
#----------------------------------------------------------------------------------------------
[docs]
def pixinaperture(self):
pixinaperture(self)
#----------------------------------------------------------------------------------------------
[docs]
def stampsize(self):
stampsize(self)
#----------------------------------------------------------------------------------------------
[docs]
def noise_metrics(self):
noise_metrics(self)
#----------------------------------------------------------------------------------------------
[docs]
def contam(self):
contam(self)
#----------------------------------------------------------------------------------------------
[docs]
def mag_dist(self):
mag_dist(self)
#----------------------------------------------------------------------------------------------
[docs]
def mag2flux(self):
mag2flux(self)
#----------------------------------------------------------------------------------------------
[docs]
def cleanup(self):
cleanup(self)
#----------------------------------------------------------------------------------------------
[docs]
def camera_overlap(self):
camera_overlap(self)
#----------------------------------------------------------------------------------------------
[docs]
def calctime(self):
calctime(self)
#----------------------------------------------------------------------------------------------
[docs]
def calctime_corrections(self):
calctime_corrections(self)
#----------------------------------------------------------------------------------------------
[docs]
def waittime(self):
waittime(self)
#----------------------------------------------------------------------------------------------
[docs]
def haloswitch(self):
haloswitch(self)