# Copyright (C) 2024 ASTRON (Netherlands Institute for Radio Astronomy)
# SPDX-License-Identifier: Apache-2.0
import dataclasses
from typing import Optional, Tuple
import numpy as np
import pandas as pd
from sourcefinder import image
from sourcefinder.accessors import open as pyse_open
from sourcefinder.accessors import sourcefinder_image_from_accessor
from sourcefinder.config import Conf
from trap.frequency_bands import FrequencyBands
from trap.log import log_time, logger
PYSE_OUT_COLUMNS = {
"ra": "ra",
"dec": "dec",
"ra_fit_err": "ra_err",
"dec_fit_err": "dec_err",
"peak_flux": "peak",
"peak_flux_err": "peak_err",
"int_flux": "flux",
"int_flux_err": "flux_err",
"significance_detection_level": "sig",
"semimajor_axis": "smaj_asec",
"semiminor_axis": "smin_asec",
"parallactic_angle": "theta_celes",
"error_radius": "error_radius",
"chisq": "chisq",
"reduced_chisq": "reduced_chisq",
}
# Currently, PySE has two different ways of returning results based on the configuration.
# Where possible it returns a Pandas DataFrame as of v0.5.0, but this is not yet
# always the case. This should always be an Pandas DataFrame after the following
# issue has been completed: https://github.com/transientskp/pyse/issues/90
PYSE_OUT_COLUMNS_LEGACY = {
**PYSE_OUT_COLUMNS,
"ew_sys_err": "ew_sys_err",
"ns_sys_err": "ns_sys_err",
"gaussian_fit": "gaussian",
}
[docs]
def rms(data: np.ndarray) -> np.ndarray:
"""Returns the RMS of the data about the median.
Args:
data: a numpy array
"""
data -= np.median(data)
return np.sqrt((data**2).sum() / data.size)
[docs]
def subregion(data: np.ndarray, reduction_factor: float = 2) -> np.ndarray:
"""Take the center region of the data where the size of the center region is defined by reduction_factor.
Parameters
----------
data: np.ndarray
The image data to clip.
reduction_factor: float
The factor by which to decrease the image. This applies to each axis, so if ``reduction_factor=2`` and
the image is of shape 100x100, then the shape of the clipped center region is 50x50. This effectively
reduces the image size by a factor 4 (10.000 pixels to 2500 pixels).
If ``reduction_factor=1``, the original 100x100 pixels are returned.
It is also possibly to supply a floating point number. For the same 100x100 image, if a
``reduction_factor=1.5`` is supplied, the resulting slice is of shape (67,67).
The way to make sense of this is that `100/1.5=66.6667`.
Examples
--------
>>> import numpy as np
>>> data = np.array([
... [0, 1, 2, 1, 0],
... [1, 2, 3, 2, 1],
... [2, 3, 4, 3, 2],
... [1, 2, 3, 2, 1],
... [0, 1, 2, 1, 0]
... ])
>>> subregion(data, 2)
array([[2, 3, 2],
[3, 4, 3],
[2, 3, 2]])
..
Returns
-------
numpy.ndarray: A slice of the original data at the center of the data.
"""
height, width = data.shape
center_x = width / 2
center_y = height / 2
offset_x = center_x / reduction_factor
offset_y = center_y / reduction_factor
# Make sure the slice is centered around the origin
if center_x % 2 == 0:
slice_x = slice(int(center_x - offset_x), int(center_x + offset_x))
else:
slice_x = slice(int(center_x - offset_x), int(center_x + offset_y + 1))
if center_y % 2 == 0:
slice_y = slice(int(center_y - offset_y), int(center_y + offset_y))
else:
slice_y = slice(int(center_y - offset_y), int(center_y + offset_y + 1))
return data[slice_y, slice_x] # Note that numpy indexing is in order (y,x)
[docs]
def clip(data: np.ndarray, sigma: float = 3) -> np.ndarray:
"""Remove all values above a threshold from the array.
Uses iterative clipping at sigma value until nothing more is getting clipped.
Parameters
----------
data: numpy.ndarray
The data to clip
sigma: float
The amount of standard deviations that determine the threshold for clipping
returns
-------
np.ndarray
The data with the high values removed
"""
raveled = data.ravel()
median = np.median(raveled)
std = np.std(raveled)
newdata = raveled[np.abs(raveled - median) <= sigma * std]
if len(newdata) and len(newdata) != len(raveled):
return clip(newdata, sigma)
else:
return newdata
[docs]
@log_time()
def read_pyse_image(
path: str,
freq_bands: Optional[FrequencyBands] = None,
margin=0,
radius=1500,
back_size_x=50,
back_size_y=50,
reduction_factor_for_rms=4,
rms_min=0,
rms_max=1,
force_beam=True,
ew_sys_err=0,
ns_sys_err=0,
deblend_nthresh=0,
**pyse_conf,
) -> Tuple[image.ImageData, dict, bool]:
"""
Read an image with PySE that can be used in functions like :func:`sources_from_fits_pyse` and :func:`force_fit`.
Some basic image metadata is also read and calculated. The metadata fields are:
- rms
The rms of the inner region of the image. The size of this inner region is determined by ``reduction_factor_for_rms``.
- rms_min
Lowest value of the RMS background map. Note that this is different from ``rms`` which uses the center region of the image rather than a RMS background map.
- rms_max
Largest value of the RMS background map. Note that this is different from ``rms`` which uses the center region of the image rather than a RMS background map.
- freq_eff
Effective frequency of the image in Hz.
That is, the mean frequency of all the visibility data which comprises this image.
- freq_bw
The frequency bandwidth of this image in Hz.
- taustart_ts
Timestamp of the start of the observaion.
- url
Location of the image
- rb_smaj
The semi-major axis of the restoring beam, in degrees.
- rb_smin
The semi-minor axis of the restoring beam, in degrees.
- rb_pa
The position angle of the restoring beam (from north to east to the major axis), in degrees.
- centre_ra
The central right-ascention coordinate (J2000) (or pointing centre) of the region, in degrees.
- centre_dec
The central declination coordinate (J2000) (or pointing centre) of the region, in degrees.
- xtr_radius
The radius of the circular mask used for source extraction, in degrees.
The image is also checked for quality. The quality check involves a check for the values where
the image is rejected if it has all nodta values (nan, inf, -inf) or if the ``rms`` is below
the ``rms_min`` or above the ``rms_max`` values. These min and max values are set in the :ref:`input arguments <input_arguments>`.
Parameters
----------
fits_path: str
The path to the .fits file containing the image of which the sources are to be extracted.
margin: int
The margin in pixels from the edge of the image within which sources are ignored.
This exclusion area combines with radius.
radius: int,
The radius in pixels around the center of the image, outside of which sources are ignored.
This exclusion area combines with margin.
back_size_x: int
Widht of the background boxes as uses in SEP.
See https://sep.readthedocs.io/en/v1.1.x/api/sep.Background.html#sep.Background
back_size_y: int
Height of the background boxes as used in SEP.
See https://sep.readthedocs.io/en/v1.1.x/api/sep.Background.html#sep.Background
reduction_factor_for_rms: float
The reduction_factor passed to :func:`subregion` used to slice the data at the center
before calculating the rms.
ew_sys_err: float
Systematic errors in units of arcseconds which augment the sourcefinder-measured errors on source positions when performing source association. These variables refer to an absolute angular error along an east-west and north-south axis respectively. (NB Although these values are stored during the source-extraction process, they affect the source-association process.)
ns_sys_err: float
Same as ew_sys_err but in perpendicular direction
Returns: (sourcefinder.image.ImageData, dict, bool)
A tuple containing:
- A PySE image that can be used for source extraction
- Metadata of the image
- Whether the image is rejected or not
"""
if freq_bands is None:
freq_bands = FrequencyBands()
im = pyse_open(path)
source_params = set(PYSE_OUT_COLUMNS.values())
conf = Conf(
image=dict(
margin=margin,
radius=radius,
back_size_x=back_size_x,
back_size_y=back_size_y,
vectorized=True,
force_beam=force_beam,
ew_sys_err=ew_sys_err,
ns_sys_err=ns_sys_err,
deblend_nthresh=deblend_nthresh,
**pyse_conf,
),
export=dict(
pandas_df=not deblend_nthresh, # https://git.astron.nl/RD/trap/-/issues/35
source_params=list(source_params),
),
)
pyse_im = sourcefinder_image_from_accessor(
im,
conf=conf,
)
# Extract metadata from image
pyse_im_meta = im.extract_metadata()
try:
# Note: If pyse_im.data is empty, pyse_im.rmsmap errors on an assert, see:
# https://github.com/transientskp/pyse/blob/a43b64d684775605051adf9f754bb0ce6eda3493/sourcefinder/image.py#L246
# 7 April 2025, Timo Millenaar
rmsmap = pyse_im.rmsmap
rms_min = rmsmap.min()
rms_max = rmsmap.max()
except AssertionError:
rms_min = float("nan")
rms_max = float("nan")
freq_band_id = freq_bands.get_frequency_band(
pyse_im_meta["freq_eff"], pyse_im_meta["freq_bw"]
)
im_meta = dict(
rms=rms(clip(subregion(pyse_im.data.data, reduction_factor_for_rms))),
rms_min=rms_min,
rms_max=rms_max,
freq_band=freq_band_id,
freq_eff=pyse_im_meta["freq_eff"],
freq_bw=pyse_im_meta["freq_bw"],
taustart_ts=pyse_im_meta["taustart_ts"],
url=pyse_im_meta["url"],
rb_smaj=pyse_im_meta["beam_smaj_pix"] * np.abs(pyse_im_meta["deltax"]),
rb_smin=pyse_im_meta["beam_smin_pix"] * np.abs(pyse_im_meta["deltay"]),
rb_pa=np.rad2deg(pyse_im_meta["beam_pa_rad"]),
centre_ra=pyse_im_meta["centre_ra"],
centre_dec=pyse_im_meta["centre_decl"],
xtr_radius=radius,
)
# Reject image if all data is nan or inf or when the rms value is not within the supplied bounds
rejected = False
if np.all(~np.isfinite(pyse_im.data)):
rejected = True
logger.info(f"Image rejected because it contains no data: {im_meta['url']}")
if not rms_min < im_meta["rms"] < rms_max:
rejected = True
logger.info(
f"Image rejected because the rms ({im_meta['rms']}) is not within the supplied thresholds: rms_min: {rms_min}, rms_max: {rms_max}. Image path: {im_meta['url']}"
)
im_meta["rejected"] = int(rejected)
return pyse_im, im_meta, rejected
[docs]
@log_time()
def sources_from_fits_pyse(
pyse_im: image.ImageData,
*,
rejected: bool = False,
detection_threshold: float = 8,
analysis_threshold: float = 3,
deblend_nthresh: float = 0,
) -> pd.DataFrame:
"""Extract sources from an image using PySE.
Parameters
----------
pyse_im: sourcefinder.image.ImageData
The pyse image as read using :func:`read_pyse_image`
detection_threshold: float
The detection threshold, as a multiple of the RMS
noise. At least one pixel in a source must exceed this value
for it to be regarded as significant.
analysis_threshold: float
Analysis threshold, as a multiple of the RMS
noise. All the pixels within the island that exceed
this will be used when fitting the source.
deblend_nthresh: int
Number of subthresholds to use for
deblending. Set to 0 to disable.
Returns
-------
`pandas.DataFrame`
A dataframe where each row is an obtained source.
The columns contain the attributes of the corces.
Column names with explenation:
ra [deg]: float
Right ascension coordinate of the source
dec [deg]: float
Declination coordinate of the source
ra_fit_err [deg]: float
1-sigma error from the gaussian fit in right ascension.
Note that for a source located towards the poles the ra_fit_err
increases with absolute declination.
dec_fit_err [deg]: float
1-sigma error from the gaussian fit in declination
peak_flux [Jy]: float
peak_flux_err [Jy]: float
int_flux [Jy]: float
int_flux_err [Jy]: float
significance_detection_level: float
semimajor_axis [arcsec]: float
semiminor_axis [arcsec]: float
parallactic_angle [deg]: float
ew_sys_err [arcsec]: float
Telescope dependent systematic error in east-west direction.
ns_sys_err [arcsec]: float
Telescope dependent systematic error in north-south direction.
error_radius [arcsec]: float
A pessimistic on-sky position error estimate in arcsec.
gaussian_fit: bool
chisq: float
reduced_chisq: float
"""
if rejected:
sources = pd.DataFrame({}, columns=PYSE_OUT_COLUMNS.keys())
sources.index.name = "id"
return sources
# Note: for explanation of variables see tkp/db/general.py::insert_extracted_sources:
# https://github.com/transientskp/tkp/blob/b34582712b82b888a5a7b51b3ee371e682b8c349/tkp/db/general.py#L106
extraction_results = pyse_im.extract()
if isinstance(extraction_results, pd.DataFrame):
# We inherit the dtypes from pyse, which in general store position as a float64 and flux as a float32
extraction_results.rename(
lambda name: name.split(".")[-1].lower(), axis="columns", inplace=True
)
extraction_results = extraction_results[PYSE_OUT_COLUMNS.values()]
sources = extraction_results.rename(
columns={v: k for k, v in PYSE_OUT_COLUMNS.items()}
)
sources["ns_sys_err"] = np.float64(pyse_im.conf.image.ns_sys_err)
sources["ew_sys_err"] = np.float64(pyse_im.conf.image.ew_sys_err)
sources["gaussian_fit"] = True
else:
extraction_results = [
r.serialize(pyse_im.conf, every_parm=True) for r in extraction_results
]
sources = pd.DataFrame(
extraction_results, columns=pyse_im.conf.export.source_params
)
sources["ns_sys_err"] = np.float64(pyse_im.conf.image.ns_sys_err)
sources["ew_sys_err"] = np.float64(pyse_im.conf.image.ew_sys_err)
sources["gaussian_fit"] = True
sources.rename(
columns={v: k for (k, v) in PYSE_OUT_COLUMNS.items()}, inplace=True
)
# uncertainty_ew: sqrt of quadratic sum of systematic error and error_radius
# divided by 3600 because uncertainty in degrees and others in arcsec.
sources["uncertainty_ew"] = (
np.sqrt(sources["ew_sys_err"] ** 2 + sources["error_radius"] ** 2) / 3600.0
).astype("float64")
# uncertainty_ns: sqrt of quadratic sum of systematic error and error_radius
# divided by 3600 because uncertainty in degrees and others in arcsec.
sources["uncertainty_ns"] = (
np.sqrt(sources["ns_sys_err"] ** 2 + sources["error_radius"] ** 2) / 3600.0
).astype("float64")
logger.info(f"Found {len(sources)} sources")
sources.int_flux = sources.int_flux.astype(np.float64)
sources.int_flux_err = sources.int_flux_err.astype(np.float64)
sources.peak_flux = sources.peak_flux.astype(np.float64)
sources.peak_flux_err = sources.peak_flux_err.astype(np.float64)
sources.index.name = "id"
return sources
[docs]
def force_fit(
pyse_im: image.ImageData,
positions: np.ndarray,
ew_sys_err: float = 10,
ns_sys_err: float = 10,
) -> Tuple[pd.DataFrame, np.ndarray]:
"""
Fit the specified locations using PySE.
Parameters
----------
pyse_im: sourcefinder.image.ImageData
The pyse image as read using :func:`read_pyse_image`
positions: np.ndarray
A numpy array with the positions of the sources to be force fitted in the form [[ra_1, dec_1], [ra_2, dec_2]]
ew_sys_err [arcsec]: float
Telescope dependent systematic error in east-west direction.
ns_sys_err [arcsec]: float
Telescope dependent systematic error in north-south direction.
Returns
-------
pd.DataFrame
A dataframe with the force-fitted sources
"""
box_in_beampix = 10
boxsize = box_in_beampix * max(pyse_im.beam[0], pyse_im.beam[1])
ids = np.arange(len(positions))
# Some fits could have been dropped by PySE. Also return the fit_ids so the caller can know what ids were dropped
forced_fits, fit_ids = pyse_im.fit_fixed_positions(
positions, boxsize, ids=range(len(positions))
)
conf = Conf(
image=pyse_im.conf.image,
export=dataclasses.replace(
pyse_im.conf.export, source_params=list(PYSE_OUT_COLUMNS_LEGACY.values())
),
)
force_fit_results = [r.serialize(conf, every_parm=True) for r in forced_fits]
forced_sources = pd.DataFrame(
force_fit_results, columns=PYSE_OUT_COLUMNS_LEGACY.keys()
)
# uncertainty_ew: sqrt of quadratic sum of systematic error and error_radius
# divided by 3600 because uncertainty in degrees and others in arcsec.
forced_sources["uncertainty_ew"] = (
np.sqrt(forced_sources["ew_sys_err"] ** 2 + forced_sources["error_radius"] ** 2)
/ 3600.0
)
# uncertainty_ns: sqrt of quadratic sum of systematic error and error_radius
# divided by 3600 because uncertainty in degrees and others in arcsec.
forced_sources["uncertainty_ns"] = (
np.sqrt(forced_sources["ns_sys_err"] ** 2 + forced_sources["error_radius"] ** 2)
/ 3600.0
)
logger.info(f"Forced fit for {len(forced_sources)} sources")
return forced_sources, fit_ids