Source code for piscat.InputOutput.cpu_configurations

from __future__ import print_function

import json
import os

import pandas as pd


[docs]class CPUConfigurations: def __init__( self, n_jobs=-1, backend="multiprocessing", verbose=0, parallel_active=True, threshold_for_parallel_run=None, flag_report=False, ): """This class generates a JSON file based on the parallel loop setting on the CPU that the user prefers. This JSON was used by other functions and methods to set hyperparameters in a parallel loop. For parallelization, PiSCAT used Joblib. | [1]. https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html. Parameters ---------- n_jobs: int The maximum number of workers that can work at the same time. If -1, all CPU cores are available for use. backend: str Specify the implementation of the parallelization backend. The following backends are supported: * `“loky”`: It can induce some communication and Memory overhead when exchanging input and output data with the worker Python processes. * `“multiprocessing”`: It previous process-based backend based on multiprocessing.Pool. Less robust than loky. * `“threading”`: It is a very low-overhead backend but it suffers from the Python Global Interpreter. Lock if the called function relies a lot on Python objects. “threading” is mostly useful when the execution bottleneck is a compiled extension that explicitly releases the GIL (for instance a Cython loop wrapped in a “with nogil” block or an expensive call to a library such as NumPy). verbose: int, optional The verbosity level, if non zero, progress messages are printed. Above 50, the output is sent to stdout. The frequency of the messages increases with the verbosity level. If it more than 10, all iterations are reported. parallel_active: bool Functions will run the parallel implementation if it is True. threshold_for_parallel_run: float It reserved for next generation of PiSCAT. flag_report: bool This flag is set if you need to see the values that will be used for CPU configuration. """ try: self.read_cpu_setting(flag_report) except FileNotFoundError: self.n_jobs = n_jobs self.backend = backend self.verbose = verbose self.parallel_active = parallel_active self.threshold_for_parallel_run = threshold_for_parallel_run setting_dic = { "n_jobs": [self.n_jobs], "backend": [self.backend], "verbose": [self.verbose], "parallel_active": [self.parallel_active], "threshold_for_parallel_run": [self.threshold_for_parallel_run], } self.save_cpu_setting(setting_dic) def save_cpu_setting(self, setting_dic): name = "cpu_configurations.json" here = os.path.dirname(os.getcwd()) subdir = "piscat_configuration" try: dr_mk = os.path.join(here, subdir) os.mkdir(dr_mk) print("Directory ", subdir, " Created ") except FileExistsError: print("Directory ", subdir, " already exists") filepath = os.path.join(here, subdir, name) df_configfile = pd.DataFrame(data=setting_dic) df_configfile.to_json(filepath)
[docs] def read_cpu_setting(self, flag_report=False): """ flag_report: bool Whether you need to see the values that will be used for CPU configuration. """ subdir = "piscat_configuration" here = os.path.dirname(os.getcwd()) filepath = os.path.join(here, subdir, "cpu_configurations.json") with open(filepath) as json_file: cpu_setting = json.load(json_file) self.n_jobs = cpu_setting["n_jobs"]["0"] self.backend = cpu_setting["backend"]["0"] self.verbose = cpu_setting["verbose"]["0"] self.parallel_active = cpu_setting["parallel_active"]["0"] self.threshold_for_parallel_run = cpu_setting["threshold_for_parallel_run"]["0"] if flag_report: print("PiSCAT's general parallel flag is set to {}".format(self.parallel_active)) print("\nThe number of parallel jobs is set to {}".format(self.n_jobs)) print("\nThe backend is set to {}".format(self.backend)) print("\nThe verbose is set to {}".format(self.verbose))