Skip to content

src.tools.core.parsers.json_tool_call_parser.JSONToolCallParser

Bases: BaseToolCallParser

Enhanced parser for extracting and processing JSON tool calls from raw text.

This parser handles common LLM JSON generation errors including:

  • Semicolons instead of commas between array items
  • Missing or extra commas
  • Unquoted keys
  • Single quotes instead of double quotes
  • Trailing commas
  • Other common JSON syntax errors
Source code in src/tools/core/parsers/json_tool_call_parser.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
class JSONToolCallParser(BaseToolCallParser):
    """Enhanced parser for extracting and processing JSON tool calls from raw text.

    This parser handles common LLM JSON generation errors including:

    - Semicolons instead of commas between array items
    - Missing or extra commas
    - Unquoted keys
    - Single quotes instead of double quotes
    - Trailing commas
    - Other common JSON syntax errors
    """
    # Precompiled regex patterns for common JSON errors
    SEMICOLON_PATTERN = re.compile(r'(\}|\])\s*;\s*(\{|\[)')
    TRAILING_COMMA_PATTERN = re.compile(r',\s*(\}|\])')
    MISSING_COMMA_PATTERN = re.compile(r'(\}|\])\s*(\{|\[)')
    UNQUOTED_PROPERTY_PATTERN = re.compile(r'([{,])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:')

    def extract(self, text: str) -> Dict[str, Any]:
        """Extract and parse JSON tool calls from the input text with enhanced error recovery.

        Args:
            text (str): The input text containing JSON tool calls.

        Returns:
            Dict[str, Any]: A dictionary containing parsed tool calls or error information.

            - Success format: `{"tool_calls": [{"name": "...", "arguments": {...}}, ...]}`
            - Error format: `{"error": "error message"}`
        """
        try:
            # Try to find JSON-like content
            json_strings = self.find_json_content(text)
            valid_calls = []

            for json_str in json_strings:
                try:
                    # First attempt: Standard parsing with json5
                    parsed = json5.loads(json_str)
                    parsed = self.parse_nested_json(parsed)

                    if isinstance(parsed, dict):
                        valid_calls.append(parsed)
                    elif isinstance(parsed, list):
                        valid_calls.extend(parsed)
                except Exception:
                    # Second attempt: Apply preprocessing to fix common issues
                    try:
                        fixed_json = self.preprocess_json(json_str)
                        parsed = json5.loads(fixed_json)
                        parsed = self.parse_nested_json(parsed)

                        if isinstance(parsed, dict):
                            valid_calls.append(parsed)
                        elif isinstance(parsed, list):
                            valid_calls.extend(parsed)
                    except Exception:
                        # If all else fails, try a more aggressive approach for lists
                        if json_str.startswith('[') and json_str.endswith(']'):
                            items = self.split_json_list_items(json_str)
                            for item in items:
                                try:
                                    parsed_item = json5.loads(item)
                                    parsed_item = self.parse_nested_json(parsed_item)
                                    valid_calls.append(parsed_item)
                                except Exception:
                                    continue

            return {"tool_calls": valid_calls} if valid_calls else {"error": "No valid tool calls found"}
        except Exception as e:
            return {"error": f"Unexpected error: {str(e)}"}

    @staticmethod
    def find_json_content(text: str) -> List[str]:
        """Extract potential JSON content from raw text using balanced delimiter matching.

        This method scans the text character by character to identify and extract
        valid JSON objects or arrays, handling nested structures correctly.

        Args:
            text (str): The raw text to search for JSON content.

        Returns:
            List[str]: A list of extracted JSON string segments.
        """
        results = []
        start = None
        depth = 0
        in_string = False
        escape_next = False

        for i, char in enumerate(text):
            # Handle string literals correctly
            if char == '\\' and not escape_next:
                escape_next = True
                continue

            if char == '"' and not escape_next:
                in_string = not in_string

            escape_next = False

            # Only process delimiters when not inside a string
            if not in_string:
                if char in '{[' and depth == 0:
                    start = i
                    depth += 1
                elif char in '{[':
                    depth += 1
                elif char in '}]':
                    depth -= 1
                    if depth == 0 and start is not None:
                        results.append(text[start:i + 1])
                        start = None

        return results

    def preprocess_json(self, json_str: str) -> str:
        """Preprocess JSON string to fix common LLM-generated syntax errors.

        Args:
            json_str (str): The potentially malformed JSON string.

        Returns:
            str: A corrected JSON string.
        """
        # Replace semicolons with commas between objects/arrays
        json_str = self.SEMICOLON_PATTERN.sub(r'\1,\2', json_str)

        # Fix trailing commas
        json_str = self.TRAILING_COMMA_PATTERN.sub(r'\1', json_str)

        # Fix missing commas between objects/arrays
        json_str = self.MISSING_COMMA_PATTERN.sub(r'\1,\2', json_str)

        # Fix unquoted property names
        json_str = self.UNQUOTED_PROPERTY_PATTERN.sub(r'\1"\2":', json_str)

        return json_str

    def split_json_list_items(self, json_list: str) -> List[str]:
        """Split a JSON array string into individual item strings for separate parsing.

        This handles cases where items are separated by semicolons or have other issues.

        Args:
            json_list (str): A string containing a JSON array with possibly invalid separators.

        Returns:
            List[str]: List of individual item strings.
        """
        # Remove the outer brackets
        content = json_list[1:-1].strip()

        items = []
        depth = 0
        start = 0
        in_string = False
        escape_next = False

        for i, char in enumerate(content):
            # Handle string literals correctly
            if char == '\\' and not escape_next:
                escape_next = True
                continue

            if char == '"' and not escape_next:
                in_string = not in_string

            escape_next = False

            # Track nesting level
            if not in_string:
                if char in '{[':
                    depth += 1
                elif char in '}]':
                    depth -= 1

                # When at top level, check for separators (comma or semicolon)
                if depth == 0 and char in ',;' and i >= start:
                    items.append(content[start:i].strip())
                    start = i + 1

        # Don't forget the last item
        if start < len(content):
            items.append(content[start:].strip())

        return items

    def parse_nested_json(self, value: Any) -> Any:
        """Recursively parses stringified JSON within a JSON structure.

        Args:
            value (Any): The input value to check and potentially parse.

        Returns:
            Any: The processed value, either as a parsed JSON object or as its original type.
        """
        if isinstance(value, str):
            trimmed = value.strip()
            if trimmed and trimmed[0] in ['{', '[']:
                try:
                    # Try to fix common issues and then parse
                    fixed_str = self.preprocess_json(trimmed)
                    parsed = json5.loads(fixed_str)
                    return self.parse_nested_json(parsed)
                except Exception:
                    # If that fails, return as is
                    return value
            else:
                return value
        elif isinstance(value, dict):
            return {k: self.parse_nested_json(v) for k, v in value.items()}
        elif isinstance(value, list):
            return [self.parse_nested_json(item) for item in value]
        else:
            return value

extract(text)

Extract and parse JSON tool calls from the input text with enhanced error recovery.

Parameters:

Name Type Description Default
text str

The input text containing JSON tool calls.

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: A dictionary containing parsed tool calls or error information.

Dict[str, Any]
  • Success format: {"tool_calls": [{"name": "...", "arguments": {...}}, ...]}
Dict[str, Any]
  • Error format: {"error": "error message"}
Source code in src/tools/core/parsers/json_tool_call_parser.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def extract(self, text: str) -> Dict[str, Any]:
    """Extract and parse JSON tool calls from the input text with enhanced error recovery.

    Args:
        text (str): The input text containing JSON tool calls.

    Returns:
        Dict[str, Any]: A dictionary containing parsed tool calls or error information.

        - Success format: `{"tool_calls": [{"name": "...", "arguments": {...}}, ...]}`
        - Error format: `{"error": "error message"}`
    """
    try:
        # Try to find JSON-like content
        json_strings = self.find_json_content(text)
        valid_calls = []

        for json_str in json_strings:
            try:
                # First attempt: Standard parsing with json5
                parsed = json5.loads(json_str)
                parsed = self.parse_nested_json(parsed)

                if isinstance(parsed, dict):
                    valid_calls.append(parsed)
                elif isinstance(parsed, list):
                    valid_calls.extend(parsed)
            except Exception:
                # Second attempt: Apply preprocessing to fix common issues
                try:
                    fixed_json = self.preprocess_json(json_str)
                    parsed = json5.loads(fixed_json)
                    parsed = self.parse_nested_json(parsed)

                    if isinstance(parsed, dict):
                        valid_calls.append(parsed)
                    elif isinstance(parsed, list):
                        valid_calls.extend(parsed)
                except Exception:
                    # If all else fails, try a more aggressive approach for lists
                    if json_str.startswith('[') and json_str.endswith(']'):
                        items = self.split_json_list_items(json_str)
                        for item in items:
                            try:
                                parsed_item = json5.loads(item)
                                parsed_item = self.parse_nested_json(parsed_item)
                                valid_calls.append(parsed_item)
                            except Exception:
                                continue

        return {"tool_calls": valid_calls} if valid_calls else {"error": "No valid tool calls found"}
    except Exception as e:
        return {"error": f"Unexpected error: {str(e)}"}

find_json_content(text) staticmethod

Extract potential JSON content from raw text using balanced delimiter matching.

This method scans the text character by character to identify and extract valid JSON objects or arrays, handling nested structures correctly.

Parameters:

Name Type Description Default
text str

The raw text to search for JSON content.

required

Returns:

Type Description
List[str]

List[str]: A list of extracted JSON string segments.

Source code in src/tools/core/parsers/json_tool_call_parser.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
@staticmethod
def find_json_content(text: str) -> List[str]:
    """Extract potential JSON content from raw text using balanced delimiter matching.

    This method scans the text character by character to identify and extract
    valid JSON objects or arrays, handling nested structures correctly.

    Args:
        text (str): The raw text to search for JSON content.

    Returns:
        List[str]: A list of extracted JSON string segments.
    """
    results = []
    start = None
    depth = 0
    in_string = False
    escape_next = False

    for i, char in enumerate(text):
        # Handle string literals correctly
        if char == '\\' and not escape_next:
            escape_next = True
            continue

        if char == '"' and not escape_next:
            in_string = not in_string

        escape_next = False

        # Only process delimiters when not inside a string
        if not in_string:
            if char in '{[' and depth == 0:
                start = i
                depth += 1
            elif char in '{[':
                depth += 1
            elif char in '}]':
                depth -= 1
                if depth == 0 and start is not None:
                    results.append(text[start:i + 1])
                    start = None

    return results

parse_nested_json(value)

Recursively parses stringified JSON within a JSON structure.

Parameters:

Name Type Description Default
value Any

The input value to check and potentially parse.

required

Returns:

Name Type Description
Any Any

The processed value, either as a parsed JSON object or as its original type.

Source code in src/tools/core/parsers/json_tool_call_parser.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def parse_nested_json(self, value: Any) -> Any:
    """Recursively parses stringified JSON within a JSON structure.

    Args:
        value (Any): The input value to check and potentially parse.

    Returns:
        Any: The processed value, either as a parsed JSON object or as its original type.
    """
    if isinstance(value, str):
        trimmed = value.strip()
        if trimmed and trimmed[0] in ['{', '[']:
            try:
                # Try to fix common issues and then parse
                fixed_str = self.preprocess_json(trimmed)
                parsed = json5.loads(fixed_str)
                return self.parse_nested_json(parsed)
            except Exception:
                # If that fails, return as is
                return value
        else:
            return value
    elif isinstance(value, dict):
        return {k: self.parse_nested_json(v) for k, v in value.items()}
    elif isinstance(value, list):
        return [self.parse_nested_json(item) for item in value]
    else:
        return value

preprocess_json(json_str)

Preprocess JSON string to fix common LLM-generated syntax errors.

Parameters:

Name Type Description Default
json_str str

The potentially malformed JSON string.

required

Returns:

Name Type Description
str str

A corrected JSON string.

Source code in src/tools/core/parsers/json_tool_call_parser.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def preprocess_json(self, json_str: str) -> str:
    """Preprocess JSON string to fix common LLM-generated syntax errors.

    Args:
        json_str (str): The potentially malformed JSON string.

    Returns:
        str: A corrected JSON string.
    """
    # Replace semicolons with commas between objects/arrays
    json_str = self.SEMICOLON_PATTERN.sub(r'\1,\2', json_str)

    # Fix trailing commas
    json_str = self.TRAILING_COMMA_PATTERN.sub(r'\1', json_str)

    # Fix missing commas between objects/arrays
    json_str = self.MISSING_COMMA_PATTERN.sub(r'\1,\2', json_str)

    # Fix unquoted property names
    json_str = self.UNQUOTED_PROPERTY_PATTERN.sub(r'\1"\2":', json_str)

    return json_str

split_json_list_items(json_list)

Split a JSON array string into individual item strings for separate parsing.

This handles cases where items are separated by semicolons or have other issues.

Parameters:

Name Type Description Default
json_list str

A string containing a JSON array with possibly invalid separators.

required

Returns:

Type Description
List[str]

List[str]: List of individual item strings.

Source code in src/tools/core/parsers/json_tool_call_parser.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
def split_json_list_items(self, json_list: str) -> List[str]:
    """Split a JSON array string into individual item strings for separate parsing.

    This handles cases where items are separated by semicolons or have other issues.

    Args:
        json_list (str): A string containing a JSON array with possibly invalid separators.

    Returns:
        List[str]: List of individual item strings.
    """
    # Remove the outer brackets
    content = json_list[1:-1].strip()

    items = []
    depth = 0
    start = 0
    in_string = False
    escape_next = False

    for i, char in enumerate(content):
        # Handle string literals correctly
        if char == '\\' and not escape_next:
            escape_next = True
            continue

        if char == '"' and not escape_next:
            in_string = not in_string

        escape_next = False

        # Track nesting level
        if not in_string:
            if char in '{[':
                depth += 1
            elif char in '}]':
                depth -= 1

            # When at top level, check for separators (comma or semicolon)
            if depth == 0 and char in ',;' and i >= start:
                items.append(content[start:i].strip())
                start = i + 1

    # Don't forget the last item
    if start < len(content):
        items.append(content[start:].strip())

    return items