In [1]:
%matplotlib inline
import pandas as pd
import socket
host = socket.getfqdn()

from core import  load, zoom, calc, save,plots,monitor
In [2]:
#reload funcs after updating ./core/*.py
import importlib
importlib.reload(load)
importlib.reload(zoom)
importlib.reload(calc)
importlib.reload(save)
importlib.reload(plots)
importlib.reload(monitor)
Out[2]:
<module 'core.monitor' from '/ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/monitor.py'>
In [3]:
# 'month':  = 'JOBID' almost month but not really, 

# If you submit the job with job scheduler, above

#below are list of enviroment variable one can pass
#%env local='2"
# local : if True  run dask local cluster, if not true, put number of workers
# setted in the 'local'
# if no 'local ' given, local will be setted automatically to 'True'
#%env ychunk='2'
#%env tchunk='2'
# controls chunk. 'False' sets no modification from original netcdf file's chunk.  
# ychunk=10 will group the original netcdf file to 10 by 10 
# tchunk=1 will chunk the time coordinate one by one
#%env control=FWC_SSH 
# name of control file to be used for computation/plots/save/ 
#%env file_exp= 
# 'file_exp': Which 'experiment' name is it? 
#.    this corresopnds to intake catalog name without path and .yaml
#%env year=
# for Validation, this correspoinds to path/year/month 's year
# for monitoring, this corresponids to 'date'  having * means do all files in the monitoring directory
# setting it as *0[0-9] &*1[0-9]& *[2-3][0-9], the job can be separated in three lots.
#%env month=
# for monitoring  this corresponds to file path path-XIOS.{month}/
#
#%env save=   proceed saving?   True or False  , Default is setted as True 
#%env plot=   proceed plotting?  True or False , Default is setted as True 
#%env calc=   proceed computation? or just load computed result? True or False , Default is setted as True 
#%env save=False
#%env lazy=False
In [4]:
%%time
# 'savefig': Do we save output in html? or not. keep it true. 
savefig=True
client,cluster,control,catalog_url,month,year,daskreport,outputpath = load.set_control(host)
!mkdir -p $outputpath
!mkdir -p $daskreport
client
local True
using host= irene4075.c-irene.mg1.tgcc.ccc.cea.fr starting dask cluster on local= True workers 16
10000000000
False
rome local cluster starting
This code is running on  irene4075.c-irene.mg1.tgcc.ccc.cea.fr using  SEDNA_DELTA_MONITOR file experiment, read from  ../lib/SEDNA_DELTA_MONITOR.yaml  on year= 2012  on month= 01  outputpath= ../results/SEDNA_DELTA_MONITOR/ daskreport= ../results/dask/6413747irene4075.c-irene.mg1.tgcc.ccc.cea.fr_SEDNA_DELTA_MONITOR_01M_IceConce/
CPU times: user 486 ms, sys: 121 ms, total: 606 ms
Wall time: 19.1 s
Out[4]:

Client

Client-3940a872-1344-11ed-8885-080038b93893

Connection method: Cluster object Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status

Cluster Info

LocalCluster

fc98b074

Dashboard: http://127.0.0.1:8787/status Workers: 16
Total threads: 128 Total memory: 251.06 GiB
Status: running Using processes: True

Scheduler Info

Scheduler

Scheduler-ac2c0da3-abdc-48ac-9bc2-a417b02e8e4f

Comm: tcp://127.0.0.1:40434 Workers: 16
Dashboard: http://127.0.0.1:8787/status Total threads: 128
Started: Just now Total memory: 251.06 GiB

Workers

Worker: 0

Comm: tcp://127.0.0.1:33075 Total threads: 8
Dashboard: http://127.0.0.1:46489/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:35618
Local directory: /tmp/dask-worker-space/worker-t_ah9orl

Worker: 1

Comm: tcp://127.0.0.1:45853 Total threads: 8
Dashboard: http://127.0.0.1:33499/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:39386
Local directory: /tmp/dask-worker-space/worker-jymxgbvf

Worker: 2

Comm: tcp://127.0.0.1:43159 Total threads: 8
Dashboard: http://127.0.0.1:46736/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:38066
Local directory: /tmp/dask-worker-space/worker-bkr_r6hm

Worker: 3

Comm: tcp://127.0.0.1:39353 Total threads: 8
Dashboard: http://127.0.0.1:38521/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:43897
Local directory: /tmp/dask-worker-space/worker-26adx6_i

Worker: 4

Comm: tcp://127.0.0.1:40781 Total threads: 8
Dashboard: http://127.0.0.1:34987/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:36178
Local directory: /tmp/dask-worker-space/worker-c7pt2gdf

Worker: 5

Comm: tcp://127.0.0.1:42025 Total threads: 8
Dashboard: http://127.0.0.1:37413/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:38775
Local directory: /tmp/dask-worker-space/worker-vpucyq2s

Worker: 6

Comm: tcp://127.0.0.1:40741 Total threads: 8
Dashboard: http://127.0.0.1:41360/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:34016
Local directory: /tmp/dask-worker-space/worker-6nbu09xz

Worker: 7

Comm: tcp://127.0.0.1:42598 Total threads: 8
Dashboard: http://127.0.0.1:43156/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:40427
Local directory: /tmp/dask-worker-space/worker-tm2z7knv

Worker: 8

Comm: tcp://127.0.0.1:40931 Total threads: 8
Dashboard: http://127.0.0.1:40402/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:42428
Local directory: /tmp/dask-worker-space/worker-q95rn3ge

Worker: 9

Comm: tcp://127.0.0.1:38844 Total threads: 8
Dashboard: http://127.0.0.1:42345/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:35886
Local directory: /tmp/dask-worker-space/worker-8bfprbeb

Worker: 10

Comm: tcp://127.0.0.1:42534 Total threads: 8
Dashboard: http://127.0.0.1:41622/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:40430
Local directory: /tmp/dask-worker-space/worker-9wiipbrc

Worker: 11

Comm: tcp://127.0.0.1:43405 Total threads: 8
Dashboard: http://127.0.0.1:36913/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:40100
Local directory: /tmp/dask-worker-space/worker-zokslius

Worker: 12

Comm: tcp://127.0.0.1:40323 Total threads: 8
Dashboard: http://127.0.0.1:33837/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:42229
Local directory: /tmp/dask-worker-space/worker-j57ig7_s

Worker: 13

Comm: tcp://127.0.0.1:34872 Total threads: 8
Dashboard: http://127.0.0.1:39623/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:39009
Local directory: /tmp/dask-worker-space/worker-4jfc3nzq

Worker: 14

Comm: tcp://127.0.0.1:46037 Total threads: 8
Dashboard: http://127.0.0.1:38116/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:42236
Local directory: /tmp/dask-worker-space/worker-hr8djyho

Worker: 15

Comm: tcp://127.0.0.1:33474 Total threads: 8
Dashboard: http://127.0.0.1:39917/status Memory: 15.69 GiB
Nanny: tcp://127.0.0.1:42933
Local directory: /tmp/dask-worker-space/worker-_wgkwtip

read plotting information from a csv file¶

In [5]:
df=load.controlfile(control)
#Take out 'later' tagged computations
#df=df[~df['Value'].str.contains('later')]
df
Out[5]:
Value Inputs Equation Zone Plot Colourmap MinMax Unit Oldname Unnamed: 10
IceConce icemod.siconc (data.siconc.where(data.siconc >0)).to_dataset... ALL maps Blues None M-4

Computation starts here¶

Each computation consists of

  1. Load NEMO data set
  2. Zoom data set
  3. Compute (or load computed data set)
  4. Save
  5. Plot
  6. Close
In [6]:
%%time
import os
calcswitch=os.environ.get('calc', 'True') 
lazy=os.environ.get('lazy','False' )
loaddata=((df.Inputs != '').any()) 
print('calcswitch=',calcswitch,'df.Inputs != nothing',loaddata, 'lazy=',lazy)
data = load.datas(catalog_url,df.Inputs,month,year,daskreport,lazy=lazy) if ((calcswitch=='True' )*loaddata) else 0 
data
calcswitch= True df.Inputs != nothing True lazy= False
../lib/SEDNA_DELTA_MONITOR.yaml
using param_xios reading  ../lib/SEDNA_DELTA_MONITOR.yaml
using param_xios reading  <bound method DataSourceBase.describe of sources:
  param_xios:
    args:
      combine: nested
      concat_dim: y
      urlpath: /ccc/work/cont003/gen7420/odakatin/CONFIGS/SEDNA/SEDNA-I/SEDNA_Domain_cfg_Tgt_20210423_tsh10m_L1/param_f32/x_*.nc
      xarray_kwargs:
        compat: override
        coords: minimal
        data_vars: minimal
        parallel: true
    description: SEDNA NEMO parameters from MPI output  nav_lon lat fails
    driver: intake_xarray.netcdf.NetCDFSource
    metadata:
      catalog_dir: /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/../lib/
>
{'name': 'param_xios', 'container': 'xarray', 'plugin': ['netcdf'], 'driver': ['netcdf'], 'description': 'SEDNA NEMO parameters from MPI output  nav_lon lat fails', 'direct_access': 'forbid', 'user_parameters': [{'name': 'path', 'description': 'file coordinate', 'type': 'str', 'default': '/ccc/work/cont003/gen7420/odakatin/CONFIGS/SEDNA/MESH/SEDNA_mesh_mask_Tgt_20210423_tsh10m_L1/param'}], 'metadata': {}, 'args': {'urlpath': '/ccc/work/cont003/gen7420/odakatin/CONFIGS/SEDNA/SEDNA-I/SEDNA_Domain_cfg_Tgt_20210423_tsh10m_L1/param_f32/x_*.nc', 'combine': 'nested', 'concat_dim': 'y'}}
0 read icemod ['siconc']
using load_data_xios_kerchunk reading  icemod
using load_data_xios_kerchunk reading  <bound method DataSourceBase.describe of sources:
  data_xios_kerchunk:
    args:
      consolidated: false
      storage_options:
        fo: file:////ccc/cont003/home/ra5563/ra5563/catalogue/DELTA/201201/icemod_0[0-5][0-9][0-9].json
        target_protocol: file
      urlpath: reference://
    description: CREG025 NEMO outputs from different xios server in kerchunk format
    driver: intake_xarray.xzarr.ZarrSource
    metadata:
      catalog_dir: /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/../lib/
>
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/xarray/core/dataset.py:1279, in Dataset._copy_listed(self, names)
   1278 try:
-> 1279     variables[name] = self._variables[name]
   1280 except KeyError:

KeyError: 'siconc'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
File <timed exec>:6, in <module>

File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:629, in datas(catalog_url, dfi, month, year, daskreport, lazy)
    624 datadict, paramdict = getdict(dfi)
    625 #print('datadict:',datadict)
    626 #if datadict == {}:
    627 #    data=0
    628 #else:
--> 629 data=outputs(catalog_url,datadict,month,year,daskreport,lazy) 
    630 for s in paramdict:
    631     print('param',s,'will be included in data')

File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:452, in outputs(catalog_url, datadict, month, year, daskreport, lazy)
    448 start = time.time()
    449 with performance_report(filename=daskreport+"_load_output_"+filename+"_"+month+year+".html"):
    450     #ds=load_data_xios_patch(cat,filename,month,catalog_url) 
--> 452     ds = load_data_xios(cat,filename,items,month,year) if not ('False' in lazy)  else load_data_xios_kerchunk(cat,filename,items,month,year,rome=True)
    453 extime=time.time() - start
    454 print('      took', extime, 'seconds')

File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:423, in load_data_xios_kerchunk(cat, filename, items, month, year, rome)
    421 desc=cat.data_xios_kerchunk(file=filename,month=month,year=year).describe         
    422 print('using load_data_xios_kerchunk reading ',desc)
--> 423 ds_x= [ prep(
    424     cat.data_xios_kerchunk(
    425         file=filename,month=month,year=year,eio=f'{xios:04}' 
    426     ).to_dask().drop_vars(dro,errors='ignore'))[items]
    427        for xios in xioss]
    429 return xr.concat(ds_x,dim='y',compat="override",coords="minimal")

File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:423, in <listcomp>(.0)
    421 desc=cat.data_xios_kerchunk(file=filename,month=month,year=year).describe         
    422 print('using load_data_xios_kerchunk reading ',desc)
--> 423 ds_x= [ prep(
    424     cat.data_xios_kerchunk(
    425         file=filename,month=month,year=year,eio=f'{xios:04}' 
    426     ).to_dask().drop_vars(dro,errors='ignore'))[items]
    427        for xios in xioss]
    429 return xr.concat(ds_x,dim='y',compat="override",coords="minimal")

File /ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/xarray/core/dataset.py:1412, in Dataset.__getitem__(self, key)
   1410     return self._construct_dataarray(key)
   1411 if utils.iterable_of_hashable(key):
-> 1412     return self._copy_listed(key)
   1413 raise ValueError(f"Unsupported key-type {type(key)}")

File /ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/xarray/core/dataset.py:1281, in Dataset._copy_listed(self, names)
   1279     variables[name] = self._variables[name]
   1280 except KeyError:
-> 1281     ref_name, var_name, var = _get_virtual_variable(
   1282         self._variables, name, self.dims
   1283     )
   1284     variables[var_name] = var
   1285     if ref_name in self._coord_names or ref_name in self.dims:

File /ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/xarray/core/dataset.py:175, in _get_virtual_variable(variables, key, dim_sizes)
    173 split_key = key.split(".", 1)
    174 if len(split_key) != 2:
--> 175     raise KeyError(key)
    177 ref_name, var_name = split_key
    178 ref_var = variables[ref_name]

KeyError: 'siconc'
In [7]:
%%time
monitor.auto(df,data,savefig,daskreport,outputpath,file_exp='SEDNA'
            )
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed eval>:1, in <module>

NameError: name 'data' is not defined