%matplotlib inline
import pandas as pd
import socket
host = socket.getfqdn()
from core import load, zoom, calc, save,plots,monitor
#reload funcs after updating ./core/*.py
import importlib
importlib.reload(load)
importlib.reload(zoom)
importlib.reload(calc)
importlib.reload(save)
importlib.reload(plots)
importlib.reload(monitor)
<module 'core.monitor' from '/ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/monitor.py'>
# 'month': = 'JOBID' almost month but not really,
# If you submit the job with job scheduler, above
#below are list of enviroment variable one can pass
#%env local='2"
# local : if True run dask local cluster, if not true, put number of workers
# setted in the 'local'
# if no 'local ' given, local will be setted automatically to 'True'
#%env ychunk='2'
#%env tchunk='2'
# controls chunk. 'False' sets no modification from original netcdf file's chunk.
# ychunk=10 will group the original netcdf file to 10 by 10
# tchunk=1 will chunk the time coordinate one by one
#%env control=FWC_SSH
# name of control file to be used for computation/plots/save/
#%env file_exp=
# 'file_exp': Which 'experiment' name is it?
#. this corresopnds to intake catalog name without path and .yaml
#%env year=
# for Validation, this correspoinds to path/year/month 's year
# for monitoring, this corresponids to 'date' having * means do all files in the monitoring directory
# setting it as *0[0-9] &*1[0-9]& *[2-3][0-9], the job can be separated in three lots.
#%env month=
# for monitoring this corresponds to file path path-XIOS.{month}/
#
#%env save= proceed saving? True or False , Default is setted as True
#%env plot= proceed plotting? True or False , Default is setted as True
#%env calc= proceed computation? or just load computed result? True or False , Default is setted as True
#%env save=False
#%env lazy=False
%%time
# 'savefig': Do we save output in html? or not. keep it true.
savefig=True
client,cluster,control,catalog_url,month,year,daskreport,outputpath = load.set_control(host)
!mkdir -p $outputpath
!mkdir -p $daskreport
client
local True using host= irene4695.c-irene.mg1.tgcc.ccc.cea.fr starting dask cluster on local= True workers 16 10000000000 False rome local cluster starting This code is running on irene4695.c-irene.mg1.tgcc.ccc.cea.fr using SEDNA_DELTA_MONITOR file experiment, read from ../lib/SEDNA_DELTA_MONITOR.yaml on year= 2012 on month= 01 outputpath= ../results/SEDNA_DELTA_MONITOR/ daskreport= ../results/dask/6412985irene4695.c-irene.mg1.tgcc.ccc.cea.fr_SEDNA_DELTA_MONITOR_01M_FWC_2D/ CPU times: user 575 ms, sys: 129 ms, total: 704 ms Wall time: 21.5 s
Client-acb15a14-1331-11ed-a50c-080038b949a5
Connection method: Cluster object | Cluster type: distributed.LocalCluster |
Dashboard: http://127.0.0.1:8787/status |
c6a4ba70
Dashboard: http://127.0.0.1:8787/status | Workers: 16 |
Total threads: 128 | Total memory: 251.06 GiB |
Status: running | Using processes: True |
Scheduler-d9133c78-7eb9-4e63-a4a9-127cce4b067e
Comm: tcp://127.0.0.1:32875 | Workers: 16 |
Dashboard: http://127.0.0.1:8787/status | Total threads: 128 |
Started: Just now | Total memory: 251.06 GiB |
Comm: tcp://127.0.0.1:46347 | Total threads: 8 |
Dashboard: http://127.0.0.1:43955/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:42089 | |
Local directory: /tmp/dask-worker-space/worker-muxlhkfh |
Comm: tcp://127.0.0.1:44852 | Total threads: 8 |
Dashboard: http://127.0.0.1:37132/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:41874 | |
Local directory: /tmp/dask-worker-space/worker-ebndkefa |
Comm: tcp://127.0.0.1:36927 | Total threads: 8 |
Dashboard: http://127.0.0.1:41265/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:42802 | |
Local directory: /tmp/dask-worker-space/worker-bpu_cwox |
Comm: tcp://127.0.0.1:43840 | Total threads: 8 |
Dashboard: http://127.0.0.1:39076/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:39665 | |
Local directory: /tmp/dask-worker-space/worker-xxulyy_2 |
Comm: tcp://127.0.0.1:40565 | Total threads: 8 |
Dashboard: http://127.0.0.1:41071/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:42041 | |
Local directory: /tmp/dask-worker-space/worker-pg8djy4_ |
Comm: tcp://127.0.0.1:38546 | Total threads: 8 |
Dashboard: http://127.0.0.1:39135/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:36611 | |
Local directory: /tmp/dask-worker-space/worker-1mef0wz4 |
Comm: tcp://127.0.0.1:34827 | Total threads: 8 |
Dashboard: http://127.0.0.1:42782/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:39613 | |
Local directory: /tmp/dask-worker-space/worker-01pc8349 |
Comm: tcp://127.0.0.1:39868 | Total threads: 8 |
Dashboard: http://127.0.0.1:33343/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:33148 | |
Local directory: /tmp/dask-worker-space/worker-86mps9p2 |
Comm: tcp://127.0.0.1:42632 | Total threads: 8 |
Dashboard: http://127.0.0.1:36320/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:40157 | |
Local directory: /tmp/dask-worker-space/worker-xbolg3j9 |
Comm: tcp://127.0.0.1:39718 | Total threads: 8 |
Dashboard: http://127.0.0.1:41774/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:40387 | |
Local directory: /tmp/dask-worker-space/worker-0zyv_lrf |
Comm: tcp://127.0.0.1:44009 | Total threads: 8 |
Dashboard: http://127.0.0.1:46065/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:46598 | |
Local directory: /tmp/dask-worker-space/worker-mgcyu_s6 |
Comm: tcp://127.0.0.1:44161 | Total threads: 8 |
Dashboard: http://127.0.0.1:39393/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:39842 | |
Local directory: /tmp/dask-worker-space/worker-vvkgeeg0 |
Comm: tcp://127.0.0.1:43742 | Total threads: 8 |
Dashboard: http://127.0.0.1:37967/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:33900 | |
Local directory: /tmp/dask-worker-space/worker-xz9a4nrt |
Comm: tcp://127.0.0.1:34509 | Total threads: 8 |
Dashboard: http://127.0.0.1:34980/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:39966 | |
Local directory: /tmp/dask-worker-space/worker-h7dsoxis |
Comm: tcp://127.0.0.1:36714 | Total threads: 8 |
Dashboard: http://127.0.0.1:35933/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:35695 | |
Local directory: /tmp/dask-worker-space/worker-dq8_sysj |
Comm: tcp://127.0.0.1:46623 | Total threads: 8 |
Dashboard: http://127.0.0.1:35453/status | Memory: 15.69 GiB |
Nanny: tcp://127.0.0.1:39547 | |
Local directory: /tmp/dask-worker-space/worker-fqjn8tkm |
df=load.controlfile(control)
#Take out 'later' tagged computations
#df=df[~df['Value'].str.contains('later')]
df
Value | Inputs | Equation | Zone | Plot | Colourmap | MinMax | Unit | Oldname | Unnamed: 10 | |
---|---|---|---|---|---|---|---|---|---|---|
FWC_2D | gridS.vosaline,param.mask,param.e3t,param.e1te2t | calc.FWC2D_UFUNC(data) | BBFG | maps | Spectral_r | (0,24) | m | S-1 |
Each computation consists of
%%time
import os
calcswitch=os.environ.get('calc', 'True')
lazy=os.environ.get('lazy','False' )
loaddata=((df.Inputs != '').any())
print('calcswitch=',calcswitch,'df.Inputs != nothing',loaddata, 'lazy=',lazy)
data = load.datas(catalog_url,df.Inputs,month,year,daskreport,lazy=lazy) if ((calcswitch=='True' )*loaddata) else 0
data
calcswitch= True df.Inputs != nothing True lazy= False ../lib/SEDNA_DELTA_MONITOR.yaml using param_xios reading ../lib/SEDNA_DELTA_MONITOR.yaml using param_xios reading <bound method DataSourceBase.describe of sources: param_xios: args: combine: nested concat_dim: y urlpath: /ccc/work/cont003/gen7420/odakatin/CONFIGS/SEDNA/SEDNA-I/SEDNA_Domain_cfg_Tgt_20210423_tsh10m_L1/param_f32/x_*.nc xarray_kwargs: compat: override coords: minimal data_vars: minimal parallel: true description: SEDNA NEMO parameters from MPI output nav_lon lat fails driver: intake_xarray.netcdf.NetCDFSource metadata: catalog_dir: /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/../lib/ > {'name': 'param_xios', 'container': 'xarray', 'plugin': ['netcdf'], 'driver': ['netcdf'], 'description': 'SEDNA NEMO parameters from MPI output nav_lon lat fails', 'direct_access': 'forbid', 'user_parameters': [{'name': 'path', 'description': 'file coordinate', 'type': 'str', 'default': '/ccc/work/cont003/gen7420/odakatin/CONFIGS/SEDNA/MESH/SEDNA_mesh_mask_Tgt_20210423_tsh10m_L1/param'}], 'metadata': {}, 'args': {'urlpath': '/ccc/work/cont003/gen7420/odakatin/CONFIGS/SEDNA/SEDNA-I/SEDNA_Domain_cfg_Tgt_20210423_tsh10m_L1/param_f32/x_*.nc', 'combine': 'nested', 'concat_dim': 'y'}} 0 read gridS ['vosaline'] using load_data_xios_kerchunk reading gridS using load_data_xios_kerchunk reading <bound method DataSourceBase.describe of sources: data_xios_kerchunk: args: consolidated: false storage_options: fo: file:////ccc/cont003/home/ra5563/ra5563/catalogue/DELTA/201201/gridS_0[0-5][0-9][0-9].json target_protocol: file urlpath: reference:// description: CREG025 NEMO outputs from different xios server in kerchunk format driver: intake_xarray.xzarr.ZarrSource metadata: catalog_dir: /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/../lib/ > took 35.06680512428284 seconds 0 merging gridS ['vosaline'] param nav_lon will be included in data param nav_lat will be included in data param mask will be included in data param mask2d will be included in data param e1te2t will be included in data param e3t will be included in data CPU times: user 20.7 s, sys: 3.29 s, total: 24 s Wall time: 57.9 s
<xarray.Dataset> Dimensions: (t: 31, z: 150, y: 6540, x: 6560) Coordinates: time_centered (t) object dask.array<chunksize=(1,), meta=np.ndarray> * t (t) object 2012-01-01 12:00:00 ... 2012-01-31 12:00:00 * y (y) int64 1 2 3 4 5 6 7 ... 6535 6536 6537 6538 6539 6540 * x (x) int64 1 2 3 4 5 6 7 ... 6555 6556 6557 6558 6559 6560 * z (z) int64 1 2 3 4 5 6 7 8 ... 143 144 145 146 147 148 149 150 nav_lon (y, x) float32 dask.array<chunksize=(13, 6560), meta=np.ndarray> nav_lat (y, x) float32 dask.array<chunksize=(13, 6560), meta=np.ndarray> mask (z, y, x) bool dask.array<chunksize=(150, 13, 6560), meta=np.ndarray> mask2d (y, x) bool dask.array<chunksize=(13, 6560), meta=np.ndarray> e1te2t (y, x) float64 dask.array<chunksize=(13, 6560), meta=np.ndarray> e3t (z, y, x) float64 dask.array<chunksize=(150, 13, 6560), meta=np.ndarray> Data variables: vosaline (t, z, y, x) float32 dask.array<chunksize=(1, 150, 13, 6560), meta=np.ndarray> Attributes: (12/26) CASE: DELTA CONFIG: SEDNA Conventions: CF-1.6 DOMAIN_dimensions_ids: [2, 3] DOMAIN_halo_size_end: [0, 0] DOMAIN_halo_size_start: [0, 0] ... ... nj: 13 output_frequency: 1d start_date: 20090101 timeStamp: 2022-Jan-17 19:00:16 GMT title: ocean T grid variables uuid: d8db76f6-a436-451a-9ab1-72dc892753af
%%time
monitor.auto(df,data,savefig,daskreport,outputpath,file_exp='SEDNA'
)
#calc= True #save= True #plot= False monitor.optimize_dataset(data) Value='FWC_2D' Zone='BBFG' Plot='maps' cmap='Spectral_r' clabel='m' clim= (0, 24) outputpath='../results/SEDNA_DELTA_MONITOR/' nc_outputpath='../nc_results/SEDNA_DELTA_MONITOR/' filename='SEDNA_maps_BBFG_FWC_2D' #2 Zooming Data dtaa= zoom.BBFG(data)
<xarray.Dataset> Dimensions: (t: 31, z: 150, y: 5264, x: 6560) Coordinates: time_centered (t) object dask.array<chunksize=(1,), meta=np.ndarray> * t (t) object 2012-01-01 12:00:00 ... 2012-01-31 12:00:00 * y (y) int64 1277 1278 1279 1280 1281 ... 6537 6538 6539 6540 * x (x) int64 1 2 3 4 5 6 7 ... 6555 6556 6557 6558 6559 6560 * z (z) int64 1 2 3 4 5 6 7 8 ... 143 144 145 146 147 148 149 150 nav_lon (y, x) float32 dask.array<chunksize=(8, 6560), meta=np.ndarray> nav_lat (y, x) float32 dask.array<chunksize=(8, 6560), meta=np.ndarray> mask (z, y, x) bool dask.array<chunksize=(150, 8, 6560), meta=np.ndarray> mask2d (y, x) bool dask.array<chunksize=(8, 6560), meta=np.ndarray> e1te2t (y, x) float64 dask.array<chunksize=(8, 6560), meta=np.ndarray> e3t (z, y, x) float64 dask.array<chunksize=(150, 8, 6560), meta=np.ndarray> Data variables: vosaline (t, z, y, x) float32 dask.array<chunksize=(1, 150, 8, 6560), meta=np.ndarray> Attributes: (12/26) CASE: DELTA CONFIG: SEDNA Conventions: CF-1.6 DOMAIN_dimensions_ids: [2, 3] DOMAIN_halo_size_end: [0, 0] DOMAIN_halo_size_start: [0, 0] ... ... nj: 13 output_frequency: 1d start_date: 20090101 timeStamp: 2022-Jan-17 19:00:16 GMT title: ocean T grid variables uuid: d8db76f6-a436-451a-9ab1-72dc892753af
#3 Start computing dtaa= calc.FWC2D_UFUNC(data) monitor.optimize_dataset(dtaa)
<xarray.Dataset> Dimensions: (t: 31, y: 5264, x: 6560) Coordinates: time_centered (t) object dask.array<chunksize=(1,), meta=np.ndarray> * t (t) object 2012-01-01 12:00:00 ... 2012-01-31 12:00:00 * y (y) int64 1277 1278 1279 1280 1281 ... 6537 6538 6539 6540 * x (x) int64 1 2 3 4 5 6 7 ... 6555 6556 6557 6558 6559 6560 nav_lon (y, x) float32 dask.array<chunksize=(8, 6560), meta=np.ndarray> nav_lat (y, x) float32 dask.array<chunksize=(8, 6560), meta=np.ndarray> mask2d (y, x) bool dask.array<chunksize=(8, 6560), meta=np.ndarray> e1te2t (y, x) float64 dask.array<chunksize=(8, 6560), meta=np.ndarray> Data variables: FWC2D (t, y, x) float32 dask.array<chunksize=(1, 8, 6560), meta=np.ndarray>
#4 Saving SEDNA_maps_BBFG_FWC_2D dtaa=save.datas(data,plot=Plot,path=nc_outputpath,filename=filename) start saving data saving data in a file t (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 slice(0, 1, None)
2022-08-03 15:41:49,718 - distributed.worker - ERROR - Worker stream died during communication: tcp://127.0.0.1:46347 Traceback (most recent call last): File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/comm/tcp.py", line 264, in write async def write(self, msg, serializers=None, on_error="message"): asyncio.exceptions.CancelledError During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/asyncio/tasks.py", line 418, in wait_for return fut.result() asyncio.exceptions.CancelledError The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/comm/core.py", line 329, in connect await asyncio.wait_for(comm.write(local_info), time_left()) File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/asyncio/tasks.py", line 420, in wait_for raise exceptions.TimeoutError() from exc asyncio.exceptions.TimeoutError The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/worker.py", line 1983, in gather_dep response = await get_data_from_worker( File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/worker.py", line 2725, in get_data_from_worker return await retry_operation(_get_data, operation="get_data_from_worker") File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/utils_comm.py", line 383, in retry_operation return await retry( File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/utils_comm.py", line 368, in retry return await coro() File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/worker.py", line 2702, in _get_data comm = await rpc.connect(worker) File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/core.py", line 1371, in connect return await connect_attempt File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/core.py", line 1307, in _connect comm = await connect( File "/ccc/cont003/home/ra5563/ra5563/monitor/lib/python3.10/site-packages/distributed/comm/core.py", line 333, in connect raise OSError( OSError: Timed out during handshake while connecting to tcp://127.0.0.1:46347 after 30 s
slice(1, 2, None) slice(2, 3, None) slice(3, 4, None) slice(4, 5, None) slice(5, 6, None) slice(6, 7, None) slice(7, 8, None) slice(8, 9, None) slice(9, 10, None) slice(10, 11, None) slice(11, 12, None) slice(12, 13, None) slice(13, 14, None) slice(14, 15, None) slice(15, 16, None) slice(16, 17, None) slice(17, 18, None) slice(18, 19, None) slice(19, 20, None) slice(20, 21, None) slice(21, 22, None) slice(22, 23, None) slice(23, 24, None) slice(24, 25, None) slice(25, 26, None) slice(26, 27, None) slice(27, 28, None) slice(28, 29, None) slice(29, 30, None) slice(30, 31, None) CPU times: user 15min 6s, sys: 2min 13s, total: 17min 20s Wall time: 57min 53s