Decision Optimization Python model input and output data file formats

Input and output data formats for Decision Optimization Python DOcplex models

Last updated: Nov 21, 2024

You can use the following input and output data formats for Python Decision Optimization models.

Input data

For Python DOcplex models, the input data can read from a file in any format. If you want to use external sources to populate your input data by using connectors, then you must use .csv files for your input data.

You can use dictionaries to define your input data. You can use the get_all_inputs method to read files and return a dataframe dictionary.

from docplex.util.environment import get_environment
import pandas
from six import iteritems
from collections.abc import Mapping
from os.path import join, dirname, basename, splitext, exists
import glob

class _InputDict(dict):
    def __init__(self, directory, names):
        dict.__init__(self)
        self._directory = directory
        for k in names:
            dict.__setitem__(self, k, None)
        file='model_schema.json'
        if self._directory is not None:
            file  = "{0}/".format(self._directory) + file
        self.dtype_schemas = self.get_dtype_schemas( file)
    def __getitem__(self, key):
        if isinstance(key, str):
            item = dict.__getitem__(self, key)
            if item is None:
                file = "{0}.csv".format(key)
                if file in self.dtype_schemas:
                    return self.read_df( key, dtype=self.dtype_schemas[file])
                else:
                    return self.read_df( key)
            else:
                return item
        else:
            raise Exception("Accessing input dict via non string index")
    def read_df(self, key, **kwargs):
        env = get_environment()
        file = "{0}.csv".format(key)
        if self._directory is not None:
            file  = "{0}/".format(self._directory) + file
        with env.get_input_stream(file) as ist:
            params = {'encoding': 'utf8'}
            if kwargs:
                params.update(kwargs)
            df = pandas.read_csv( ist, **params)
            dict.__setitem__(self, key, df)
        return df
    def get_dtype_schemas(self, path):
        dtype_schemas = {}
        if exists(path):
            input_schemas=json.load(open(path))
            if 'input' in input_schemas:
                for input_schema in input_schemas['input']:
                    dtype_schema = {}
                    if 'fields' in input_schema:
                        for input_schema_field in input_schema['fields']:
                            if input_schema_field['type']=='string':
                                dtype_schema[input_schema_field['name']]='str'
                        if len(dtype_schema) > 0:
                            dtype_schemas[input_schema['id']]=dtype_schema
        print(dtype_schemas)
        return dtype_schemas

class _LazyDict(Mapping):
    def __init__(self, *args, **kw):
        self._raw_dict = _InputDict(*args, **kw)

    def __getitem__(self, key):
        return self._raw_dict.__getitem__(key)

    def __iter__(self):
        return iter(self._raw_dict)

    def __len__(self):
        return len(self._raw_dict)

    def read_df(self, key, **kwargs):
        return self._raw_dict.read_df(key, **kwargs)

def get_all_inputs(directory=None):
    '''Utility method to read a list of files and return a tuple with all
    read data frames.
    Returns:
        a map { datasetname: data frame }
    '''

    all_csv = "*.csv"
    g = join(directory, all_csv) if directory else all_csv

    names = [splitext(basename(f))[0] for f in glob.glob(g)]
    result = _LazyDict(directory, names)
    return result

Output data

This example shows you how, if you use outputs in a dataframe dictionary, how to write all outputs and save the dictionary data as the output of the job.

def write_all_outputs(outputs):
    '''Write all dataframes in ``outputs`` as .csv.

    Args:
        outputs: The map of outputs 'outputname' -> 'output df'
    '''
    for (name, df) in iteritems(outputs):
        csv_file = '%s.csv' % name
        print(csv_file)
        with get_environment().get_output_stream(csv_file) as fp:
            if sys.version_info[0] < 3:
                fp.write(df.to_csv(index=False, encoding='utf8'))
            else:
                fp.write(df.to_csv(index=False).encode(encoding='utf8'))
    if len(outputs) == 0:
        print("Warning: no outputs written")

You can also use the get_environment().get_output_stream(csv_file) as fp: to save your outputs.

For a complete example of deploying a Decision Optimization Python DOcplex model, see the Deploying a DO model with WML sample located in the jupyter folder of the DO-samples. Select the relevant product and version subfolder.

This sample can also be found in the IBM watsonx Resource hub, see Deploying a Decision Optimization model in watsonx.ai Runtime.