%matplotlib inline
import pandas as pd
import socket
host = socket.getfqdn()
from core import load, zoom, calc, save,plots,monitor
#reload funcs after updating ./core/*.py
import importlib
importlib.reload(load)
importlib.reload(zoom)
importlib.reload(calc)
importlib.reload(save)
importlib.reload(plots)
importlib.reload(monitor)
<module 'core.monitor' from '/ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/monitor.py'>
below are list of enviroment variable one can pass
local : if True run dask local cluster, if not true, put number of workers setted in the 'local' if no 'local ' given, local will be setted automatically to 'True'
#
Monitor.sh calls M_MLD_2D
and AWTD.sh, Fluxnet.sh, Siconc.sh, IceClim.sh, FWC_SSH.sh
AWTD.sh M_AWTMD
Fluxnet.sh M_Fluxnet
FWC_SSH.sh M_FWC_2D M_FWC_integrals M_FWC_SSH M_SSH_anomaly
Integrals.sh M_Mean_temp_velo M_Mooring M_Sectionx M_Sectiony
%%time
# 'savefig': Do we save output in html? or not. keep it true.
savefig=True
client,cluster,control,catalog_url,month,year,daskreport,outputpath = load.set_control(host)
!mkdir -p $outputpath
!mkdir -p $daskreport
client
local True
--------------------------------------------------------------------------- NameError Traceback (most recent call last) File <timed exec>:3, in <module> File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:204, in set_control(host) 202 if ('True' in local): 203 local=True --> 204 client, cluster=daskcluster(host,local=local) 205 else: 206 workers=int(local.replace('\'', '').replace('\"', '')) File /ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py:20, in daskcluster(host, queue, local) 11 def daskcluster(host='workbook',queue='xlarge',local=False): 12 # print('using host=',host, 'starting dask cluster on local=',local) 13 (...) 17 # threads_per_worker=4) 18 # client= Client(cluster) 19 from dask.distributed import performance_report, Client ---> 20 print('using host=',host, 'starting dask cluster on local=',local,'workers',workers) 21 import dask.distributed 22 dask.config.set({"distributed.scheduler.bandwidth" : 10000000000}) NameError: name 'workers' is not defined
df=load.controlfile(control)
#Take out 'later' tagged computations
#df=df[~df['Value'].str.contains('later')]
df
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [4], in <cell line: 1>() ----> 1 df=load.controlfile(control) 2 #Take out 'later' tagged computations 3 #df=df[~df['Value'].str.contains('later')] 4 df NameError: name 'control' is not defined
Each computation consists of
%%time
import os
calcswitch=os.environ.get('calc', 'True')
lazy=os.environ.get('lazy','False' )
loaddata=((df.Inputs != '').any())
print('calcswitch=',calcswitch,'df.Inputs != nothing',loaddata, 'lazy=',lazy)
data = load.datas(catalog_url,df.Inputs,month,year,daskreport,lazy=lazy) if ((calcswitch=='True' )*loaddata) else 0
data
--------------------------------------------------------------------------- NameError Traceback (most recent call last) File <timed exec>:4, in <module> NameError: name 'df' is not defined
%%time
monitor.auto(df,data,savefig,daskreport,outputpath,file_exp='SEDNA'
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) File <timed eval>:1, in <module> NameError: name 'df' is not defined