%matplotlib inline
import pandas as pd
import socket
host = socket.getfqdn()

from core import  load, zoom, calc, save,plots,monitor


#reload funcs after updating ./core/*.py
import importlib
importlib.reload(load)
importlib.reload(zoom)
importlib.reload(calc)
importlib.reload(save)
importlib.reload(plots)
importlib.reload(monitor)

<module 'core.monitor' from '/ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/monitor.py'>


# 'month':  = 'JOBID' almost month but not really, 

# If you submit the job with job scheduler, above

#below are list of enviroment variable one can pass
#%env local='2"
# local : if True  run dask local cluster, if not true, put number of workers
# setted in the 'local'
# if no 'local ' given, local will be setted automatically to 'True'
#%env ychunk='2'
#%env tchunk='2'
# controls chunk. 'False' sets no modification from original netcdf file's chunk.  
# ychunk=10 will group the original netcdf file to 10 by 10 
# tchunk=1 will chunk the time coordinate one by one
#%env control=Fluxnet 
# name of control file to be used for computation/plots/save/ 
#%env file_exp= 
# 'file_exp': Which 'experiment' name is it? 
#.    this corresopnds to intake catalog name without path and .yaml
#%env year=
# for Validation, this correspoinds to path/year/month 's year
# for monitoring, this corresponids to 'date'  having * means do all files in the monitoring directory
# setting it as *0[0-9] &*1[0-9]& *[2-3][0-9], the job can be separated in three lots.
#%env month=
# for monitoring  this corresponds to file path path-XIOS.{month}/
#
#%env save=   proceed saving?   True or False  , Default is setted as True 
#%env plot=   proceed plotting?  True or False , Default is setted as True 
#%env calc=   proceed computation? or just load computed result? True or False , Default is setted as True 
#%env calc=True


%%time
# 'savefig': Do we save output in html? or not. keep it true. 
savefig=True
client,cluster,control,catalog_url,month,year,daskreport,outputpath = load.set_control(host)
!mkdir -p $outputpath
!mkdir -p $daskreport
client

local True
using host= irene8001.c-irene.tgcc.ccc.cea.fr starting dask cluster on local= True workers 16
10000000000
False
not local in tgcc
c-irene.tgcc local FORCED
tgcc local cluster starting
This code is running on  irene8001.c-irene.tgcc.ccc.cea.fr using  SEDNA_ALPHA_MONITOR file experiment, read from  ../lib/SEDNA_ALPHA_MONITOR.yaml  on year= *  on month= 23  outputpath= ../results/SEDNA_ALPHA_MONITOR/23/ daskreport= ../results/dask/7400540irene8001.c-irene.tgcc.ccc.cea.fr_SEDNA_ALPHA_MONITOR_23SSH_anomaly/
CPU times: user 778 ms, sys: 775 ms, total: 1.55 s
Wall time: 16.1 s


df=load.controlfile(control)
#Take out 'later' tagged computations
#df=df[~df['Value'].str.contains('later')]
df

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-1-904c323d9316> in <module>
----> 1 df=load.controlfile(control)
      2 #Take out 'later' tagged computations
      3 #df=df[~df['Value'].str.contains('later')]
      4 df

/ccc/work/cont003/gen7420/odakatin/monitor-sedna/notebook/core/load.py in controlfile(filename)
    640                          , index_col=[0]
    641                  , dtype={'MinMax': str,'Colourmap': str}
--> 642                  , na_filter=False
    643                 #,usecols=lambda x: x is not '1'
    644                  #, nrows=2

~/monitor/lib/python3.7/site-packages/pandas/io/parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    608     kwds.update(kwds_defaults)
    609 
--> 610     return _read(filepath_or_buffer, kwds)
    611 
    612 

~/monitor/lib/python3.7/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    460 
    461     # Create the parser.
--> 462     parser = TextFileReader(filepath_or_buffer, **kwds)
    463 
    464     if chunksize or iterator:

~/monitor/lib/python3.7/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    817             self.options["has_index_names"] = kwds["has_index_names"]
    818 
--> 819         self._engine = self._make_engine(self.engine)
    820 
    821     def close(self):

~/monitor/lib/python3.7/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1048             )
   1049         # error: Too many arguments for "ParserBase"
-> 1050         return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
   1051 
   1052     def _failover_to_python(self):

~/monitor/lib/python3.7/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1865 
   1866         # open handles
-> 1867         self._open_handles(src, kwds)
   1868         assert self.handles is not None
   1869         for key in ("storage_options", "encoding", "memory_map", "compression"):

~/monitor/lib/python3.7/site-packages/pandas/io/parsers.py in _open_handles(self, src, kwds)
   1366             compression=kwds.get("compression", None),
   1367             memory_map=kwds.get("memory_map", False),
-> 1368             storage_options=kwds.get("storage_options", None),
   1369         )
   1370 

~/monitor/lib/python3.7/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    645                 encoding=ioargs.encoding,
    646                 errors=errors,
--> 647                 newline="",
    648             )
    649         else:

FileNotFoundError: [Errno 2] No such file or directory: '../lib/SSH_anomaly.csv'


%%time
import os
calcswitch=os.environ.get('calc', 'True') 
print('calcswitch=',calcswitch)
data = load.datas(catalog_url,df.Inputs,month,year,daskreport) if calcswitch=='True' else 0 
data

calcswitch= True

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<timed exec> in <module>

NameError: name 'df' is not defined


%%time
monitor.auto(df,data,savefig,daskreport,outputpath,file_exp='SEDNA'
            )

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<timed eval> in <module>

NameError: name 'df' is not defined

Client

Cluster

read plotting information from a csv file¶

Computation starts here¶