Process mining and causal discovery example

[24]:
! pip install sax4bpm
[25]:
import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning)
warnings.filterwarnings('ignore', category=UserWarning)
[26]:
import sax
from sax.core.process_data.formatters.xes_formatter import XESFormatter
from sax.core.process_data.formatters.csv_formatter import CSVFormatter
from sax.core.utils.constants import Constants
from lingam.utils import make_dot
from sax.core.process_data.tabular_data import TabularEventData
from pm4py.objects.conversion.log import converter as log_converter
import sax.core.process_mining.process_mining as pm
[27]:
#Import event log file and create event_log data object
fileName = "simulated_run_non_overlapping_gen.csv"
event_log = pm.import_csv(eventlog=fileName,kloop_unroling=False,timestamp_format="%Y-%m-%d %H:%M:%S.%f")

[28]:
# Get the parsed dataset from the event log object
event_log.getData()
[28]:
Id Source Type Timestamp Resource Role start:timestamp
0 2182 EVENT 1 START assign 2023-02-24 12:02:25.523000+00:00 Owner2 Administrator 2023-02-24 12:02:25.523000+00:00
1 2182 EVENT 1 START start 2023-02-24 12:02:25.523000+00:00 Owner2 Lawyer 2023-02-24 12:02:25.523000+00:00
2 2182 EVENT 1 START complete 2023-02-24 12:02:25.523000+00:00 Owner1 Administrator 2023-02-24 12:02:25.523000+00:00
3 2182 Email assign 2023-02-24 12:02:25.523000+00:00 Owner3 Lawyer 2023-02-24 12:02:25.523000+00:00
4 2182 Archive assign 2023-02-24 12:02:25.523000+00:00 Owner1 Technician 2023-02-24 12:02:25.523000+00:00
... ... ... ... ... ... ... ...
149980 686 Close Application start 2023-02-03 10:57:38.527000+00:00 Owner3 Administrator 2023-02-03 10:57:28.275000+00:00
149981 686 Close Application complete 2023-02-03 10:57:44.397227+00:00 Owner3 Administrator 2023-02-03 10:57:28.275000+00:00
149982 686 EVENT 7 END assign 2023-02-03 10:57:44.397227+00:00 Owner3 Lawyer 2023-02-03 10:57:28.275000+00:00
149983 686 EVENT 7 END start 2023-02-03 10:57:44.397227+00:00 Owner3 Lawyer 2023-02-03 10:57:28.275000+00:00
149984 686 EVENT 7 END complete 2023-02-03 10:57:44.397227+00:00 Owner1 Lawyer 2023-02-03 10:57:28.275000+00:00

149985 rows × 7 columns

[29]:
# Fetch and print the mandatory properties and optional properties within the event log file
print('Mandatory Properties of the parsed event log: \n',event_log.getMandatoryProperties())
print('Optional properties of the parsed event log: \n',event_log.getOptionalProperties())

Mandatory Properties of the parsed event log:
 {'case:concept:name': 'Id', 'concept:name': 'Source', 'time:timestamp': 'Timestamp', 'lifecycle:transition': 'Type', 'start:timestamp': 'start:timestamp'}
Optional properties of the parsed event log:
 {'Resource': 'Resource', 'Role': 'Role'}
[30]:

#Filter only the specified lifecycle events filtered_log = event_log.filterLifecycleEvents(['start']) filtered_log.getData()
[30]:
Id Source Type Timestamp Resource Role start:timestamp
1 2182 EVENT 1 START start 2023-02-24 12:02:25.523000+00:00 Owner2 Lawyer 2023-02-24 12:02:25.523000+00:00
5 2182 Email start 2023-02-24 12:02:25.523000+00:00 Owner2 Technician 2023-02-24 12:02:25.523000+00:00
6 2182 Archive start 2023-02-24 12:02:25.523000+00:00 Owner2 Administrator 2023-02-24 12:02:25.523000+00:00
10 2182 Close Application start 2023-02-24 12:02:36.506000+00:00 Owner3 Technician 2023-02-24 12:02:25.523000+00:00
13 2182 EVENT 7 END start 2023-02-24 12:02:43.253164+00:00 Owner3 Technician 2023-02-24 12:02:25.523000+00:00
... ... ... ... ... ... ... ...
149971 686 EVENT 1 START start 2023-02-03 10:57:28.275000+00:00 Owner1 Lawyer 2023-02-03 10:57:28.275000+00:00
149975 686 Email start 2023-02-03 10:57:28.275000+00:00 Owner3 Technician 2023-02-03 10:57:28.275000+00:00
149976 686 Archive start 2023-02-03 10:57:28.275000+00:00 Owner3 Technician 2023-02-03 10:57:28.275000+00:00
149980 686 Close Application start 2023-02-03 10:57:38.527000+00:00 Owner3 Administrator 2023-02-03 10:57:28.275000+00:00
149983 686 EVENT 7 END start 2023-02-03 10:57:44.397227+00:00 Owner3 Lawyer 2023-02-03 10:57:28.275000+00:00

49995 rows × 7 columns

[32]:
#Performm process mining
import sax.core.process_mining.process_mining as pm
net = pm.discover_heuristics_net(event_log)
pm.view_heuristics_net(net)

_images/PM_Causal_Discovery_8_0.png
[33]:
#transform the RawEventData to tabular representation
transposed_df = event_log.transposeToTabular()
transposed_df.getData()
[33]:
Id start:timestamp Archive Close Application EVENT 1 START EVENT 7 END Email
0 0 2023-01-25 09:00:00+00:00 2023-01-25 09:00:10.743000+00:00 2023-01-25 09:00:17.152773+00:00 2023-01-25 09:00:00+00:00 2023-01-25 09:00:17.152773+00:00 2023-01-25 09:00:06.406000+00:00
1 1 2023-01-25 09:01:12.348000+00:00 2023-01-25 09:01:21.402000+00:00 2023-01-25 09:01:28.016404+00:00 2023-01-25 09:01:12.348000+00:00 2023-01-25 09:01:28.016404+00:00 2023-01-25 09:01:18.959000+00:00
2 10 2023-01-25 09:41:04.989000+00:00 2023-01-25 09:41:14.083000+00:00 2023-01-25 09:41:20.676132+00:00 2023-01-25 09:41:04.989000+00:00 2023-01-25 09:41:20.676132+00:00 2023-01-25 09:41:11.580000+00:00
3 100 2023-01-25 16:48:32.712000+00:00 2023-01-25 16:48:42.294000+00:00 2023-01-25 16:48:47.714735+00:00 2023-01-25 16:48:32.712000+00:00 2023-01-25 16:48:47.714735+00:00 2023-01-25 16:48:38.129000+00:00
4 1000 2023-02-08 12:01:59.993000+00:00 2023-02-08 12:02:10.483000+00:00 2023-02-08 12:02:16.848774+00:00 2023-02-08 12:01:59.993000+00:00 2023-02-08 12:02:16.848774+00:00 2023-02-08 12:02:06.355000+00:00
... ... ... ... ... ... ... ...
9994 9994 2023-06-19 12:25:33.362000+00:00 2023-06-19 12:25:43.787000+00:00 2023-06-19 12:25:50.615538+00:00 2023-06-19 12:25:33.362000+00:00 2023-06-19 12:25:50.615538+00:00 2023-06-19 12:25:40.188000+00:00
9995 9995 2023-06-19 12:25:55.764000+00:00 2023-06-19 12:26:05.662000+00:00 2023-06-19 12:26:11.965755+00:00 2023-06-19 12:25:55.764000+00:00 2023-06-19 12:26:11.965755+00:00 2023-06-19 12:26:02.064000+00:00
9996 9996 2023-06-19 12:27:15.155000+00:00 2023-06-19 12:27:25.548000+00:00 2023-06-19 12:27:31.124483+00:00 2023-06-19 12:27:15.155000+00:00 2023-06-19 12:27:31.124483+00:00 2023-06-19 12:27:20.729000+00:00
9997 9997 2023-06-19 12:30:28.224000+00:00 2023-06-19 12:30:38.557000+00:00 2023-06-19 12:30:44.296887+00:00 2023-06-19 12:30:28.224000+00:00 2023-06-19 12:30:44.296887+00:00 2023-06-19 12:30:33.960000+00:00
9998 9998 2023-06-19 12:31:29.774000+00:00 2023-06-19 12:31:40.249000+00:00 2023-06-19 12:31:46.576142+00:00 2023-06-19 12:31:29.774000+00:00 2023-06-19 12:31:46.576142+00:00 2023-06-19 12:31:36.099000+00:00

9999 rows × 7 columns

[34]:
#Perform causal discovery
import sax.core.causal_process_discovery.causal_discovery as cd
result = cd.discover_causal_dependencies(event_log)
cd.view_causal_dependencies(result)

[34]:
_images/PM_Causal_Discovery_10_0.svg