1b8e80941Smrg#!/usr/bin/env python2 2848b8605Smrg########################################################################## 3848b8605Smrg# 4848b8605Smrg# Copyright 2011 Jose Fonseca 5848b8605Smrg# All Rights Reserved. 6848b8605Smrg# 7848b8605Smrg# Permission is hereby granted, free of charge, to any person obtaining a copy 8848b8605Smrg# of this software and associated documentation files (the "Software"), to deal 9848b8605Smrg# in the Software without restriction, including without limitation the rights 10848b8605Smrg# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11848b8605Smrg# copies of the Software, and to permit persons to whom the Software is 12848b8605Smrg# furnished to do so, subject to the following conditions: 13848b8605Smrg# 14848b8605Smrg# The above copyright notice and this permission notice shall be included in 15848b8605Smrg# all copies or substantial portions of the Software. 16848b8605Smrg# 17848b8605Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18848b8605Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19848b8605Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20848b8605Smrg# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21848b8605Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22848b8605Smrg# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23848b8605Smrg# THE SOFTWARE. 24848b8605Smrg# 25848b8605Smrg##########################################################################/ 26848b8605Smrg 27848b8605Smrg 28848b8605Smrgimport json 29848b8605Smrgimport optparse 30848b8605Smrgimport re 31848b8605Smrgimport difflib 32848b8605Smrgimport sys 33848b8605Smrg 34848b8605Smrg 35848b8605Smrgdef strip_object_hook(obj): 36848b8605Smrg if '__class__' in obj: 37848b8605Smrg return None 38848b8605Smrg for name in obj.keys(): 39848b8605Smrg if name.startswith('__') and name.endswith('__'): 40848b8605Smrg del obj[name] 41848b8605Smrg return obj 42848b8605Smrg 43848b8605Smrg 44848b8605Smrgclass Visitor: 45848b8605Smrg 46848b8605Smrg def visit(self, node, *args, **kwargs): 47848b8605Smrg if isinstance(node, dict): 48848b8605Smrg return self.visitObject(node, *args, **kwargs) 49848b8605Smrg elif isinstance(node, list): 50848b8605Smrg return self.visitArray(node, *args, **kwargs) 51848b8605Smrg else: 52848b8605Smrg return self.visitValue(node, *args, **kwargs) 53848b8605Smrg 54848b8605Smrg def visitObject(self, node, *args, **kwargs): 55848b8605Smrg pass 56848b8605Smrg 57848b8605Smrg def visitArray(self, node, *args, **kwargs): 58848b8605Smrg pass 59848b8605Smrg 60848b8605Smrg def visitValue(self, node, *args, **kwargs): 61848b8605Smrg pass 62848b8605Smrg 63848b8605Smrg 64848b8605Smrgclass Dumper(Visitor): 65848b8605Smrg 66848b8605Smrg def __init__(self, stream = sys.stdout): 67848b8605Smrg self.stream = stream 68848b8605Smrg self.level = 0 69848b8605Smrg 70848b8605Smrg def _write(self, s): 71848b8605Smrg self.stream.write(s) 72848b8605Smrg 73848b8605Smrg def _indent(self): 74848b8605Smrg self._write(' '*self.level) 75848b8605Smrg 76848b8605Smrg def _newline(self): 77848b8605Smrg self._write('\n') 78848b8605Smrg 79848b8605Smrg def visitObject(self, node): 80848b8605Smrg self.enter_object() 81848b8605Smrg 82848b8605Smrg members = node.keys() 83848b8605Smrg members.sort() 84848b8605Smrg for i in range(len(members)): 85848b8605Smrg name = members[i] 86848b8605Smrg value = node[name] 87848b8605Smrg self.enter_member(name) 88848b8605Smrg self.visit(value) 89848b8605Smrg self.leave_member(i == len(members) - 1) 90848b8605Smrg self.leave_object() 91848b8605Smrg 92848b8605Smrg def enter_object(self): 93848b8605Smrg self._write('{') 94848b8605Smrg self._newline() 95848b8605Smrg self.level += 1 96848b8605Smrg 97848b8605Smrg def enter_member(self, name): 98848b8605Smrg self._indent() 99848b8605Smrg self._write('%s: ' % name) 100848b8605Smrg 101848b8605Smrg def leave_member(self, last): 102848b8605Smrg if not last: 103848b8605Smrg self._write(',') 104848b8605Smrg self._newline() 105848b8605Smrg 106848b8605Smrg def leave_object(self): 107848b8605Smrg self.level -= 1 108848b8605Smrg self._indent() 109848b8605Smrg self._write('}') 110848b8605Smrg if self.level <= 0: 111848b8605Smrg self._newline() 112848b8605Smrg 113848b8605Smrg def visitArray(self, node): 114848b8605Smrg self.enter_array() 115848b8605Smrg for i in range(len(node)): 116848b8605Smrg value = node[i] 117848b8605Smrg self._indent() 118848b8605Smrg self.visit(value) 119848b8605Smrg if i != len(node) - 1: 120848b8605Smrg self._write(',') 121848b8605Smrg self._newline() 122848b8605Smrg self.leave_array() 123848b8605Smrg 124848b8605Smrg def enter_array(self): 125848b8605Smrg self._write('[') 126848b8605Smrg self._newline() 127848b8605Smrg self.level += 1 128848b8605Smrg 129848b8605Smrg def leave_array(self): 130848b8605Smrg self.level -= 1 131848b8605Smrg self._indent() 132848b8605Smrg self._write(']') 133848b8605Smrg 134848b8605Smrg def visitValue(self, node): 135848b8605Smrg self._write(json.dumps(node, allow_nan=True)) 136848b8605Smrg 137848b8605Smrg 138848b8605Smrg 139848b8605Smrgclass Comparer(Visitor): 140848b8605Smrg 141848b8605Smrg def __init__(self, ignore_added = False, tolerance = 2.0 ** -24): 142848b8605Smrg self.ignore_added = ignore_added 143848b8605Smrg self.tolerance = tolerance 144848b8605Smrg 145848b8605Smrg def visitObject(self, a, b): 146848b8605Smrg if not isinstance(b, dict): 147848b8605Smrg return False 148848b8605Smrg if len(a) != len(b) and not self.ignore_added: 149848b8605Smrg return False 150848b8605Smrg ak = a.keys() 151848b8605Smrg bk = b.keys() 152848b8605Smrg ak.sort() 153848b8605Smrg bk.sort() 154848b8605Smrg if ak != bk and not self.ignore_added: 155848b8605Smrg return False 156848b8605Smrg for k in ak: 157848b8605Smrg ae = a[k] 158848b8605Smrg try: 159848b8605Smrg be = b[k] 160848b8605Smrg except KeyError: 161848b8605Smrg return False 162848b8605Smrg if not self.visit(ae, be): 163848b8605Smrg return False 164848b8605Smrg return True 165848b8605Smrg 166848b8605Smrg def visitArray(self, a, b): 167848b8605Smrg if not isinstance(b, list): 168848b8605Smrg return False 169848b8605Smrg if len(a) != len(b): 170848b8605Smrg return False 171848b8605Smrg for ae, be in zip(a, b): 172848b8605Smrg if not self.visit(ae, be): 173848b8605Smrg return False 174848b8605Smrg return True 175848b8605Smrg 176848b8605Smrg def visitValue(self, a, b): 177848b8605Smrg if isinstance(a, float) or isinstance(b, float): 178848b8605Smrg if a == 0: 179848b8605Smrg return abs(b) < self.tolerance 180848b8605Smrg else: 181848b8605Smrg return abs((b - a)/a) < self.tolerance 182848b8605Smrg else: 183848b8605Smrg return a == b 184848b8605Smrg 185848b8605Smrg 186848b8605Smrgclass Differ(Visitor): 187848b8605Smrg 188848b8605Smrg def __init__(self, stream = sys.stdout, ignore_added = False): 189848b8605Smrg self.dumper = Dumper(stream) 190848b8605Smrg self.comparer = Comparer(ignore_added = ignore_added) 191848b8605Smrg 192848b8605Smrg def visit(self, a, b): 193848b8605Smrg if self.comparer.visit(a, b): 194848b8605Smrg return 195848b8605Smrg Visitor.visit(self, a, b) 196848b8605Smrg 197848b8605Smrg def visitObject(self, a, b): 198848b8605Smrg if not isinstance(b, dict): 199848b8605Smrg self.replace(a, b) 200848b8605Smrg else: 201848b8605Smrg self.dumper.enter_object() 202848b8605Smrg names = set(a.keys()) 203848b8605Smrg if not self.comparer.ignore_added: 204848b8605Smrg names.update(b.keys()) 205848b8605Smrg names = list(names) 206848b8605Smrg names.sort() 207848b8605Smrg 208848b8605Smrg for i in range(len(names)): 209848b8605Smrg name = names[i] 210848b8605Smrg ae = a.get(name, None) 211848b8605Smrg be = b.get(name, None) 212848b8605Smrg if not self.comparer.visit(ae, be): 213848b8605Smrg self.dumper.enter_member(name) 214848b8605Smrg self.visit(ae, be) 215848b8605Smrg self.dumper.leave_member(i == len(names) - 1) 216848b8605Smrg 217848b8605Smrg self.dumper.leave_object() 218848b8605Smrg 219848b8605Smrg def visitArray(self, a, b): 220848b8605Smrg if not isinstance(b, list): 221848b8605Smrg self.replace(a, b) 222848b8605Smrg else: 223848b8605Smrg self.dumper.enter_array() 224848b8605Smrg max_len = max(len(a), len(b)) 225848b8605Smrg for i in range(max_len): 226848b8605Smrg try: 227848b8605Smrg ae = a[i] 228848b8605Smrg except IndexError: 229848b8605Smrg ae = None 230848b8605Smrg try: 231848b8605Smrg be = b[i] 232848b8605Smrg except IndexError: 233848b8605Smrg be = None 234848b8605Smrg self.dumper._indent() 235848b8605Smrg if self.comparer.visit(ae, be): 236848b8605Smrg self.dumper.visit(ae) 237848b8605Smrg else: 238848b8605Smrg self.visit(ae, be) 239848b8605Smrg if i != max_len - 1: 240848b8605Smrg self.dumper._write(',') 241848b8605Smrg self.dumper._newline() 242848b8605Smrg 243848b8605Smrg self.dumper.leave_array() 244848b8605Smrg 245848b8605Smrg def visitValue(self, a, b): 246848b8605Smrg if a != b: 247848b8605Smrg self.replace(a, b) 248848b8605Smrg 249848b8605Smrg def replace(self, a, b): 250848b8605Smrg if isinstance(a, basestring) and isinstance(b, basestring): 251848b8605Smrg if '\n' in a or '\n' in b: 252848b8605Smrg a = a.splitlines() 253848b8605Smrg b = b.splitlines() 254848b8605Smrg differ = difflib.Differ() 255848b8605Smrg result = differ.compare(a, b) 256848b8605Smrg self.dumper.level += 1 257848b8605Smrg for entry in result: 258848b8605Smrg self.dumper._newline() 259848b8605Smrg self.dumper._indent() 260848b8605Smrg tag = entry[:2] 261848b8605Smrg text = entry[2:] 262848b8605Smrg if tag == '? ': 263848b8605Smrg tag = ' ' 264848b8605Smrg prefix = ' ' 265848b8605Smrg text = text.rstrip() 266848b8605Smrg suffix = '' 267848b8605Smrg else: 268848b8605Smrg prefix = '"' 269848b8605Smrg suffix = '\\n"' 270848b8605Smrg line = tag + prefix + text + suffix 271848b8605Smrg self.dumper._write(line) 272848b8605Smrg self.dumper.level -= 1 273848b8605Smrg return 274848b8605Smrg self.dumper.visit(a) 275848b8605Smrg self.dumper._write(' -> ') 276848b8605Smrg self.dumper.visit(b) 277848b8605Smrg 278848b8605Smrg def isMultilineString(self, value): 279848b8605Smrg return isinstance(value, basestring) and '\n' in value 280848b8605Smrg 281848b8605Smrg def replaceMultilineString(self, a, b): 282848b8605Smrg self.dumper.visit(a) 283848b8605Smrg self.dumper._write(' -> ') 284848b8605Smrg self.dumper.visit(b) 285848b8605Smrg 286848b8605Smrg 287848b8605Smrg# 288848b8605Smrg# Unfortunately JSON standard does not include comments, but this is a quite 289848b8605Smrg# useful feature to have on regressions tests 290848b8605Smrg# 291848b8605Smrg 292848b8605Smrg_token_res = [ 293848b8605Smrg r'//[^\r\n]*', # comment 294848b8605Smrg r'"[^"\\]*(\\.[^"\\]*)*"', # string 295848b8605Smrg] 296848b8605Smrg 297848b8605Smrg_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL) 298848b8605Smrg 299848b8605Smrg 300848b8605Smrgdef _strip_comment(mo): 301848b8605Smrg if mo.group(1): 302848b8605Smrg return '' 303848b8605Smrg else: 304848b8605Smrg return mo.group(0) 305848b8605Smrg 306848b8605Smrg 307848b8605Smrgdef _strip_comments(data): 308848b8605Smrg '''Strip (non-standard) JSON comments.''' 309848b8605Smrg return _tokens_re.sub(_strip_comment, data) 310848b8605Smrg 311848b8605Smrg 312848b8605Smrgassert _strip_comments('''// a comment 313848b8605Smrg"// a comment in a string 314848b8605Smrg"''') == ''' 315848b8605Smrg"// a comment in a string 316848b8605Smrg"''' 317848b8605Smrg 318848b8605Smrg 319848b8605Smrgdef load(stream, strip_images = True, strip_comments = True): 320848b8605Smrg if strip_images: 321848b8605Smrg object_hook = strip_object_hook 322848b8605Smrg else: 323848b8605Smrg object_hook = None 324848b8605Smrg if strip_comments: 325848b8605Smrg data = stream.read() 326848b8605Smrg data = _strip_comments(data) 327848b8605Smrg return json.loads(data, strict=False, object_hook = object_hook) 328848b8605Smrg else: 329848b8605Smrg return json.load(stream, strict=False, object_hook = object_hook) 330848b8605Smrg 331848b8605Smrg 332848b8605Smrgdef main(): 333848b8605Smrg optparser = optparse.OptionParser( 334848b8605Smrg usage="\n\t%prog [options] <ref_json> <src_json>") 335848b8605Smrg optparser.add_option( 336848b8605Smrg '--keep-images', 337848b8605Smrg action="store_false", dest="strip_images", default=True, 338848b8605Smrg help="compare images") 339848b8605Smrg 340848b8605Smrg (options, args) = optparser.parse_args(sys.argv[1:]) 341848b8605Smrg 342848b8605Smrg if len(args) != 2: 343848b8605Smrg optparser.error('incorrect number of arguments') 344848b8605Smrg 345848b8605Smrg a = load(open(sys.argv[1], 'rt'), options.strip_images) 346848b8605Smrg b = load(open(sys.argv[2], 'rt'), options.strip_images) 347848b8605Smrg 348848b8605Smrg if False: 349848b8605Smrg dumper = Dumper() 350848b8605Smrg dumper.visit(a) 351848b8605Smrg 352848b8605Smrg differ = Differ() 353848b8605Smrg differ.visit(a, b) 354848b8605Smrg 355848b8605Smrg 356848b8605Smrgif __name__ == '__main__': 357848b8605Smrg main() 358