In [1]:
%matplotlib inline
import pandas as pd
import socket
host = socket.getfqdn()

from core import  load, zoom, calc, save,plots,monitor
In [2]:
#reload funcs after updating ./core/*.py
import importlib
importlib.reload(load)
importlib.reload(zoom)
importlib.reload(calc)
importlib.reload(save)
importlib.reload(plots)
importlib.reload(monitor)
Out[2]:
<module 'core.monitor' from '/ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/monitor.py'>

If you submit the job with job scheduler, above¶

below are list of enviroment variable one can pass

%env local='2"¶

local : if True run dask local cluster, if not true, put number of workers setted in the 'local' if no 'local ' given, local will be setted automatically to 'True'

%env ychunk='2'¶

%env tchunk='2'¶

controls chunk. 'False' sets no modification from original netcdf file's chunk.¶

ychunk=10 will group the original netcdf file to 10 by 10¶

tchunk=1 will chunk the time coordinate one by one¶

%env file_exp=¶

'file_exp': Which 'experiment' name is it?¶

. this corresopnds to intake catalog name without path and .yaml¶

%env year=¶

for Validation, this correspoinds to path/year/month 's year¶

for monitoring, this corresponids to 'date' having * means do all files in the monitoring directory¶

setting it as 0[0-9] &1[0-9]& *[2-3][0-9], the job can be separated in three lots.¶

%env month=¶

for monitoring this corresponds to file path path-XIOS.{month}/¶

#

%env control=FWC_SSH¶

name of control file to be used for computation/plots/save/ & how it is called from Monitor.sh¶

Monitor.sh calls M_MLD_2D

and AWTD.sh, Fluxnet.sh, Siconc.sh, IceClim.sh, FWC_SSH.sh

  • AWTD.sh M_AWTMD

  • Fluxnet.sh M_Fluxnet

  • Siconc.sh M_Ice_quantities
  • IceClim.sh M_IceClim M_IceConce M_IceThick

FWC_SSH.sh M_FWC_2D M_FWC_integrals M_FWC_SSH M_SSH_anomaly

Integrals.sh M_Mean_temp_velo M_Mooring M_Sectionx M_Sectiony

%env save= proceed saving? True or False , Default is setted as True¶

%env plot= proceed plotting? True or False , Default is setted as True¶

%env calc= proceed computation? or just load computed result? True or False , Default is setted as True¶

%env save=False¶

%env lazy=False¶

For debugging this cell can help¶

%env file_exp=SEDNA_DELTA_MONITOR %env year=2012 %env month=01

0[1-2]¶

%env ychunk=10 %env ychunk=False %env save=False %env plot=True %env calc=True # %env lazy=False

False¶

%env control=M_Fluxnet

M_Sectiony ok with ychunk=False local=True lazy=False¶

In [3]:
%%time
# 'savefig': Do we save output in html? or not. keep it true. 
savefig=True
client,cluster,control,catalog_url,month,year,daskreport,outputpath = load.set_control(host)
!mkdir -p $outputpath
!mkdir -p $daskreport
client
local True
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed exec>:3, in <module>

File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:204, in set_control(host)
    202 if ('True' in local):
    203     local=True
--> 204     client, cluster=daskcluster(host,local=local)
    205 else:
    206     workers=int(local.replace('\'', '').replace('\"', ''))

File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:20, in daskcluster(host, queue, local)
     11 def daskcluster(host='workbook',queue='xlarge',local=False):
     12 #    print('using host=',host, 'starting dask cluster on local=',local)
     13     
   (...)
     17 #                           threads_per_worker=4)
     18 #            client= Client(cluster)
     19     from dask.distributed import performance_report, Client
---> 20     print('using host=',host, 'starting dask cluster on local=',local,'workers',workers)  
     21     import dask.distributed
     22     dask.config.set({"distributed.scheduler.bandwidth" : 10000000000})   

NameError: name 'workers' is not defined

read plotting information from a csv file¶

In [4]:
df=load.controlfile(control)
#Take out 'later' tagged computations
#df=df[~df['Value'].str.contains('later')]
df
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Input In [4], in <cell line: 1>()
----> 1 df=load.controlfile(control)
      2 #Take out 'later' tagged computations
      3 #df=df[~df['Value'].str.contains('later')]
      4 df

NameError: name 'control' is not defined

Computation starts here¶

Each computation consists of

  1. Load NEMO data set
  2. Zoom data set
  3. Compute (or load computed data set)
  4. Save
  5. Plot
  6. Close
In [5]:
%%time
import os
calcswitch=os.environ.get('calc', 'True') 
lazy=os.environ.get('lazy','False' )
loaddata=((df.Inputs != '').any()) 
print('calcswitch=',calcswitch,'df.Inputs != nothing',loaddata, 'lazy=',lazy)
data = load.datas(catalog_url,df.Inputs,month,year,daskreport,lazy=lazy) if ((calcswitch=='True' )*loaddata) else 0 
data
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed exec>:4, in <module>

NameError: name 'df' is not defined
In [6]:
%%time
monitor.auto(df,data,savefig,daskreport,outputpath,file_exp='SEDNA'
            )
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed eval>:1, in <module>

NameError: name 'df' is not defined