You can use the following input and output data formats for Python Decision Optimization models.
Input data
For Python DOcplex models, the input data can read from a file in any format.
If you want to use external sources to populate your input data by using connectors, then you must
use .csv
files for your input data.
get_all_inputs
method to read files and return a dataframe dictionary.
from docplex.util.environment import get_environment
import pandas
from six import iteritems
from collections.abc import Mapping
from os.path import join, dirname, basename, splitext, exists
import glob
class _InputDict(dict):
def __init__(self, directory, names):
dict.__init__(self)
self._directory = directory
for k in names:
dict.__setitem__(self, k, None)
file='model_schema.json'
if self._directory is not None:
file = "{0}/".format(self._directory) + file
self.dtype_schemas = self.get_dtype_schemas( file)
def __getitem__(self, key):
if isinstance(key, str):
item = dict.__getitem__(self, key)
if item is None:
file = "{0}.csv".format(key)
if file in self.dtype_schemas:
return self.read_df( key, dtype=self.dtype_schemas[file])
else:
return self.read_df( key)
else:
return item
else:
raise Exception("Accessing input dict via non string index")
def read_df(self, key, **kwargs):
env = get_environment()
file = "{0}.csv".format(key)
if self._directory is not None:
file = "{0}/".format(self._directory) + file
with env.get_input_stream(file) as ist:
params = {'encoding': 'utf8'}
if kwargs:
params.update(kwargs)
df = pandas.read_csv( ist, **params)
dict.__setitem__(self, key, df)
return df
def get_dtype_schemas(self, path):
dtype_schemas = {}
if exists(path):
input_schemas=json.load(open(path))
if 'input' in input_schemas:
for input_schema in input_schemas['input']:
dtype_schema = {}
if 'fields' in input_schema:
for input_schema_field in input_schema['fields']:
if input_schema_field['type']=='string':
dtype_schema[input_schema_field['name']]='str'
if len(dtype_schema) > 0:
dtype_schemas[input_schema['id']]=dtype_schema
print(dtype_schemas)
return dtype_schemas
class _LazyDict(Mapping):
def __init__(self, *args, **kw):
self._raw_dict = _InputDict(*args, **kw)
def __getitem__(self, key):
return self._raw_dict.__getitem__(key)
def __iter__(self):
return iter(self._raw_dict)
def __len__(self):
return len(self._raw_dict)
def read_df(self, key, **kwargs):
return self._raw_dict.read_df(key, **kwargs)
def get_all_inputs(directory=None):
'''Utility method to read a list of files and return a tuple with all
read data frames.
Returns:
a map { datasetname: data frame }
'''
all_csv = "*.csv"
g = join(directory, all_csv) if directory else all_csv
names = [splitext(basename(f))[0] for f in glob.glob(g)]
result = _LazyDict(directory, names)
return result
Output data
def write_all_outputs(outputs):
'''Write all dataframes in ``outputs`` as .csv.
Args:
outputs: The map of outputs 'outputname' -> 'output df'
'''
for (name, df) in iteritems(outputs):
csv_file = '%s.csv' % name
print(csv_file)
with get_environment().get_output_stream(csv_file) as fp:
if sys.version_info[0] < 3:
fp.write(df.to_csv(index=False, encoding='utf8'))
else:
fp.write(df.to_csv(index=False).encode(encoding='utf8'))
if len(outputs) == 0:
print("Warning: no outputs written")
You can also use the get_environment().get_output_stream(csv_file) as fp:
to
save your outputs.
For a complete example of deploying a Decision Optimization Python DOcplex model, see the Deploying a DO model with WML sample located in the jupyter folder of the DO-samples. Select the relevant product and version subfolder.
This sample can also be found in the Cloud Pak for Data Resource hub, see Deploying a Decision Optimization model in watsonx.ai Runtime.