Source code for yadism.output

"""Output related utilities.

For the main output (that is the computed |PDF|
independent |DIS| operator) three outputs are provided:

- tar archive, containing metadata and binary :mod:`numpy.lib.format` arrays
  (this is the **suggested** output format)
- single file `yaml <https://yaml.org/>`_ output: a single human readable (but
  possibly huge) file
- `PineAPPL <https://github.com/N3PDF/pineappl>`_ interpolation grid: very
  useful to store in a standard format (supporting also non-|DIS| processes) and
  interfacing to other codes (but *no loading* is supported from this format)

"""

import copy
import pathlib
import tarfile
import tempfile

import numpy as np
import pandas as pd
import yaml
from eko.couplings import Couplings, couplings_mod_ev
from eko.io import dictlike, runcards, types
from eko.matchings import Atlas, nf_default
from eko.quantities.heavy_quarks import MatchingScales

from . import observable_name as on
from .esf.result import ESFResult, EXSResult


[docs] class MaskedPDF: """Mask some pids of a PDF set to be 0. Parameters ---------- lhapdf_like : callable object that provides an xfxQ2 callable (as `lhapdf <https://lhapdf.hepforge.org/>`_ and :class:`ekomark.toyLH.toyPDF` do) (and thus is in flavor basis) active_pids : list[int] active PIDs """ def __init__(self, lhapdf_like, active_pids): self.parent = lhapdf_like self.active_pids = active_pids
[docs] def __getattr__(self, name): return self.parent.__getattribute__(name)
[docs] def xfxQ2(self, pid, x, Q2): """Fake lhapdf-like wrapper.""" return self.parent.xfxQ2(pid, x, Q2) if pid in self.active_pids else 0.0
[docs] class Output(dict): """Wrapper for the output to help with application to PDFs and dumping to file.""" theory = None observables = None
[docs] def apply_pdf(self, lhapdf_like): r"""Compute all observables for the given PDF. Parameters ---------- lhapdf_like : object object that provides an xfxQ2 callable (as `lhapdf <https://lhapdf.hepforge.org/>`_ and :class:`ekomark.toyLH.toyPDF` do) (and thus is in flavor basis) Returns ------- ret : :class:`PDFOutput` output dictionary with all structure functions for all x, Q2, result and error """ return self.apply_pdf_theory(lhapdf_like, self.theory)
[docs] def apply_pdf_theory(self, lhapdf_like, theory): r"""Compute all observables for the given PDF. Parameters ---------- lhapdf_like : object object that provides an xfxQ2 callable (as `lhapdf <https://lhapdf.hepforge.org/>`_ and :class:`ekomark.toyLH.toyPDF` do) (and thus is in flavor basis) theory : dict theory dictionary Returns ------- ret : :class:`PDFOutput` output dictionary with all structure functions for all x, Q2, result and error """ new_eko_theory = runcards.Legacy(theory=theory, operator={}).new_theory method = runcards.Legacy.MOD_EV2METHOD.get(theory["ModEv"], theory["ModEv"]) method = dictlike.load_enum(types.EvolutionMethod, method) method = couplings_mod_ev(method) masses = [mq**2 for mq, _ in new_eko_theory.heavy.masses] thresholds_ratios = np.power(new_eko_theory.heavy.matching_ratios, 2) sc = Couplings( couplings=new_eko_theory.couplings, order=new_eko_theory.order, method=method, masses=masses, hqm_scheme=new_eko_theory.heavy.masses_scheme, thresholds_ratios=thresholds_ratios.tolist(), ) atlas = Atlas( matching_scales=MatchingScales(masses * thresholds_ratios), origin=(theory["Qref"] ** 2, theory["nfref"]), ) fns = theory["FNS"] if "FFNS" in fns or "FFN0" in fns: alpha_s = lambda muR: sc.a_s(muR**2, nf_to=theory["NfFF"]) * 4.0 * np.pi elif fns == "ZM-VFNS": alpha_s = ( lambda muR: sc.a_s(muR**2, nf_to=nf_default(muR**2, atlas)) * 4.0 * np.pi ) else: raise ValueError(f"Scheme '{fns}' not recognized.") alpha_qed = lambda _muR: theory["alphaqed"] return self.apply_pdf_alphas_alphaqed_xir_xif( lhapdf_like, alpha_s, alpha_qed, theory["XIR"], theory["XIF"] )
[docs] def apply_pdf_alphas_alphaqed_xir_xif( self, lhapdf_like, alpha_s, alpha_qed, xiR, xiF ): r"""Compute all observables for the given PDF. Parameters ---------- lhapdf_like : object object that provides an xfxQ2 callable (as `lhapdf <https://lhapdf.hepforge.org/>`_ and :class:`ekomark.toyLH.toyPDF` do) (and thus is in flavor basis) alpha_s : callable :math:`\alpha_s(\mu_R)`, the running strong coupling alpha_qed : callable :math:`\alpha(\mu_R)`, the running fine structure constant xiR : float ratio renormalization scale to |EW| boson virtuality (linear!) xiF : float ratio factorization scale to |EW| boson virtuality (linear!) Returns ------- ret : :class:`PDFOutput` output dictionary with all structure functions for all :math:`x`, :math:`Q^2`, result and error """ # iterate ret = PDFOutput() xgrid = self["xgrid"]["grid"] # dispatch onto result for obs in self: if not on.ObservableName.is_valid(obs): continue if self[obs] is None: continue ret[obs] = [] for kin in self[obs]: ret[obs].append( kin.apply_pdf( lhapdf_like, self["pids"], xgrid, alpha_s, alpha_qed, xiR, xiF ) ) return ret
[docs] def get_raw(self): """Serialize result as dict. This maps the original numpy matrices to lists. Returns ------- out : dict dictionary which will be written on output """ out = {} # dump raw elements for f in self: out[f] = copy.copy(self[f]) out["pids"] = list(self["pids"]) # make raw lists for k in ["xgrid"]: out[k]["grid"] = self[k]["grid"] out[k]["log"] = self[k]["log"] for obs in self: if not on.ObservableName.is_valid(obs): continue if self[obs] is None: continue out[obs] = [] for kin in self[obs]: out[obs].append(kin.get_raw()) return out
[docs] def dump_yaml(self, stream=None): """Serialize result as YAML. Parameters ---------- stream : None or stream if given, dump is written on it Returns ------- dump : any result of dump(output, stream), i.e. a string, if no stream is given or Null, if self is written sucessfully to stream """ # TODO explicitly silence yaml out = self.get_raw() out["theory"] = self.theory out["observables"] = self.observables return yaml.dump(out, stream, default_flow_style=None)
[docs] def dump_yaml_to_file(self, filename): """Write YAML representation to a file. Parameters ---------- filename : str or os.PathLike target file name Returns ------- ret : any result of dump(output, stream), i.e. Null if written sucessfully """ with open(filename, "w", encoding="utf8") as f: ret = self.dump_yaml(f) return ret
[docs] def dump_tar(self, tarpath, runcards=True): """Serialize output in a tar archive. This is the favorite *native* output. This results in a considerably smaller output file, with respect to the 'yaml' serialization, but it looses the readability. In most cases, there is no advantage in having a readable DIS operator, so they are serialized in binary arrays (see :mod:`numpy.lib.format`). This **only supports** observables with a uniform order across kinematical point (as they are generated by :class:`yadism.runner.Runner`), since they are stored in a unique array. They don't have to be uniform across different observables (since they are stored in different arrays anyhow). Parameters ---------- tarpath : str or os.PathLike target file path (it has to have the '.tar' extension) Raises ------ AssertionError If orders are not uniform within a single observable. """ tarpath = pathlib.Path(tarpath) if tarpath.suffix != ".tar": raise ValueError(f"'{tarpath}' is not a valid tar filename, wrong suffix") with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) metadata = {} for metafield, metavalue in self.items(): if not on.ObservableName.is_valid(metafield) or metavalue is None: # this way even scalar types are properly casted # it works for sure for: None, bool, int, float, str # and so everything we need (what is safe in yaml) metadata[metafield] = np.array(metavalue).tolist() else: kinematics = {} for key in metavalue[0].get_raw(): if key != "orders": kinematics[key] = [] orders_first = [] values = [] errors = [] for esfres in metavalue: orders = [] esf_values = [] esf_errors = [] for key, value in esfres.get_raw().items(): if key != "orders": kinematics[key].append(value) else: for order in value: orders.append(order["order"]) esf_values.append(order["values"]) esf_errors.append(order["errors"]) if len(orders_first) == 0: orders_first = orders else: assert orders_first == orders values.append(esf_values) errors.append(esf_errors) np.savez_compressed( tmpdir / metafield, values=np.array(values), errors=np.array(errors), ) metadata[metafield] = dict( orders=orders_first, kinematics=kinematics ) (tmpdir / "metadata.yaml").write_text( yaml.safe_dump(metadata, default_flow_style=None), encoding="utf-8" ) if runcards: runcards = tmpdir / "runcards" runcards.mkdir() (runcards / "theory.yaml").write_text( yaml.safe_dump(self.theory), encoding="utf-8" ) (runcards / "observables.yaml").write_text( yaml.safe_dump(self.observables, default_flow_style=None), encoding="utf-8", ) with tarfile.open(tarpath, "w") as tar: tar.add(tmpdir, arcname=tarpath.stem)
[docs] @classmethod def load_tar(cls, tarpath): """Deserialize output object from tar. It loads an :meth:`Output.dump_tar` generated tar file into an :class:`Output` object. Parameters ---------- tarpath : str or os.PathLike target file path (it has to be a 'tar' archive) Returns ------- :class:`Output` loaded object """ tarpath = pathlib.Path(tarpath) # Initialize output object out = cls() with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) with tarfile.open(tarpath, "r") as tar: tar.extractall(tmpdir) # The internal name is the one that has been used to save, since the # tar could have been renamed in the meanwhile, it might be unknown, # but there is only a single folder innerdir = list(tmpdir.glob("*"))[0] runcards = innerdir / "runcards" out.theory = yaml.safe_load((runcards / "theory.yaml").read_text()) out.observables = yaml.safe_load( (runcards / "observables.yaml").read_text() ) metadata = yaml.safe_load((innerdir / "metadata.yaml").read_text()) for metafield, metavalue in metadata.items(): if not on.ObservableName.is_valid(metafield) or metavalue is None: ar = np.array(metavalue) if ar.ndim > 0: out[metafield] = ar else: out[metafield] = metavalue else: op = np.load(innerdir / f"{metafield}.npz") kinvars, kinvalues = list(zip(*metavalue["kinematics"].items())) kinematics = [dict(zip(kinvars, kin)) for kin in zip(*kinvalues)] orders = metavalue["orders"] Result = ESFResult if "y" not in kinvars else EXSResult results = [] for kin, val, err in zip(kinematics, op["values"], op["errors"]): res = kin.copy() res["orders"] = [ dict(zip(["order", "values", "errors"], ordvalerr)) for ordvalerr in zip(orders, val, err) ] results.append(Result.from_document(res)) out[metafield] = results return out
[docs] @classmethod def load_yaml(cls, stream): """Load YAML representation from stream. Parameters ---------- stream : any source stream Returns ------- obj : :class:`Output` loaded object """ obj = yaml.safe_load(stream) # make list numpy for k in ["xgrid"]: obj[k]["grid"] = np.array(obj[k]["grid"]) obj[k]["log"] = obj[k]["log"] for obs in obj: if not on.ObservableName.is_valid(obs): continue if obj[obs] is None: continue Result = ESFResult if "y" not in obj[obs][0] else EXSResult for j, kin in enumerate(obj[obs]): obj[obs][j] = Result.from_document(kin) out = cls(obj) out.theory = obj["theory"] out.observables = obj["observables"] del out["theory"] del out["observables"] return out
[docs] @classmethod def load_yaml_from_file(cls, filename): """Load YAML representation from file. Parameters ---------- filename : str or os.PathLike source file name Returns ------- obj : :class:`Output` loaded object """ obj = None with open(filename) as o: obj = cls.load_yaml(o) return obj
[docs] class PDFOutput(Output): """Wrapper for the PDF output to help with dumping to file."""
[docs] def get_raw(self): """Convert the object into a native Python dictionary. Returns ------- out : dict raw dictionary """ out = {} for obs in self: if self[obs] is None: continue out[obs] = [] for kin in self[obs]: out[obs].append({k: float(v) for k, v in kin.items()}) return out
[docs] @classmethod def load_yaml(cls, stream): """Load the object from YAML. Parameters ---------- stream : any source stream Returns ------- obj : :class:`PDFOutput` loaded object """ obj = yaml.safe_load(stream) return cls(obj)
@property def tables(self): """Convert data into a mapping structure functions -> :class:`pandas.DataFrame`.""" tables = {} for k, v in self.items(): tables[k] = pd.DataFrame(v) return tables
[docs] def dump_tables_to_file(self, filename): """Write all tables to file. Parameters ---------- filename : str output file name """ with open(filename, "w") as f: for name, table in self.tables.items(): f.write("\n".join([name, str(table), "\n"]))
[docs] def dump_tables_to_csv(self, dirname): """Write all tables to separate csv files. Parameters ---------- dirname : str output directory name """ dirname = pathlib.Path(dirname) dirname.mkdir(exist_ok=True) for name, table in self.tables.items(): filename = dirname / f"{name}.csv" table.to_csv(filename)