%matplotlib inline
import pandas as pd
import socket
host = socket.getfqdn()

from core import  load, zoom, calc, save,plots,monitor


#reload funcs after updating ./core/*.py
import importlib
importlib.reload(load)
importlib.reload(zoom)
importlib.reload(calc)
importlib.reload(save)
importlib.reload(plots)
importlib.reload(monitor)

<module 'core.monitor' from '/ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/monitor.py'>


%%time
# 'savefig': Do we save output in html? or not. keep it true. 
savefig=True
client,cluster,control,catalog_url,month,year,daskreport,outputpath = load.set_control(host)
!mkdir -p $outputpath
!mkdir -p $daskreport
client

local True

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed exec>:3, in <module>

File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:204, in set_control(host)
    202 if ('True' in local):
    203     local=True
--> 204     client, cluster=daskcluster(host,local=local)
    205 else:
    206     workers=int(local.replace('\'', '').replace('\"', ''))

File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:20, in daskcluster(host, queue, local)
     11 def daskcluster(host='workbook',queue='xlarge',local=False):
     12 #    print('using host=',host, 'starting dask cluster on local=',local)
     13     
   (...)
     17 #                           threads_per_worker=4)
     18 #            client= Client(cluster)
     19     from dask.distributed import performance_report, Client
---> 20     print('using host=',host, 'starting dask cluster on local=',local,'workers',workers)  
     21     import dask.distributed
     22     dask.config.set({"distributed.scheduler.bandwidth" : 10000000000})   

NameError: name 'workers' is not defined


df=load.controlfile(control)
#Take out 'later' tagged computations
#df=df[~df['Value'].str.contains('later')]
df

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Input In [4], in <cell line: 1>()
----> 1 df=load.controlfile(control)
      2 #Take out 'later' tagged computations
      3 #df=df[~df['Value'].str.contains('later')]
      4 df

NameError: name 'control' is not defined


%%time
import os
calcswitch=os.environ.get('calc', 'True') 
lazy=os.environ.get('lazy','False' )
loaddata=((df.Inputs != '').any()) 
print('calcswitch=',calcswitch,'df.Inputs != nothing',loaddata, 'lazy=',lazy)
data = load.datas(catalog_url,df.Inputs,month,year,daskreport,lazy=lazy) if ((calcswitch=='True' )*loaddata) else 0 
data

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed exec>:4, in <module>

NameError: name 'df' is not defined


%%time
monitor.auto(df,data,savefig,daskreport,outputpath,file_exp='SEDNA'
            )

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed eval>:1, in <module>

NameError: name 'df' is not defined

If you submit the job with job scheduler, above¶

%env local='2"¶

%env ychunk='2'¶

%env tchunk='2'¶

controls chunk. 'False' sets no modification from original netcdf file's chunk.¶

ychunk=10 will group the original netcdf file to 10 by 10¶

tchunk=1 will chunk the time coordinate one by one¶

%env file_exp=¶

'file_exp': Which 'experiment' name is it?¶

. this corresopnds to intake catalog name without path and .yaml¶

%env year=¶

for Validation, this correspoinds to path/year/month 's year¶

for monitoring, this corresponids to 'date' having * means do all files in the monitoring directory¶

setting it as 0[0-9] &1[0-9]& *[2-3][0-9], the job can be separated in three lots.¶

%env month=¶

for monitoring this corresponds to file path path-XIOS.{month}/¶

%env control=FWC_SSH¶

name of control file to be used for computation/plots/save/ & how it is called from Monitor.sh¶

%env save= proceed saving? True or False , Default is setted as True¶

%env plot= proceed plotting? True or False , Default is setted as True¶

%env calc= proceed computation? or just load computed result? True or False , Default is setted as True¶

%env save=False¶

%env lazy=False¶

For debugging this cell can help¶

0[1-2]¶

False¶

M_Sectiony ok with ychunk=False local=True lazy=False¶

read plotting information from a csv file¶

Computation starts here¶