1b8e80941Smrg#!/usr/bin/env python2
2848b8605Smrg##########################################################################
3848b8605Smrg#
4848b8605Smrg# Copyright 2011 Jose Fonseca
5848b8605Smrg# All Rights Reserved.
6848b8605Smrg#
7848b8605Smrg# Permission is hereby granted, free of charge, to any person obtaining a copy
8848b8605Smrg# of this software and associated documentation files (the "Software"), to deal
9848b8605Smrg# in the Software without restriction, including without limitation the rights
10848b8605Smrg# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11848b8605Smrg# copies of the Software, and to permit persons to whom the Software is
12848b8605Smrg# furnished to do so, subject to the following conditions:
13848b8605Smrg#
14848b8605Smrg# The above copyright notice and this permission notice shall be included in
15848b8605Smrg# all copies or substantial portions of the Software.
16848b8605Smrg#
17848b8605Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18848b8605Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19848b8605Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20848b8605Smrg# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21848b8605Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22848b8605Smrg# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23848b8605Smrg# THE SOFTWARE.
24848b8605Smrg#
25848b8605Smrg##########################################################################/
26848b8605Smrg
27848b8605Smrg
28848b8605Smrgimport json
29848b8605Smrgimport optparse
30848b8605Smrgimport re
31848b8605Smrgimport difflib
32848b8605Smrgimport sys
33848b8605Smrg
34848b8605Smrg
35848b8605Smrgdef strip_object_hook(obj):
36848b8605Smrg    if '__class__' in obj:
37848b8605Smrg        return None
38848b8605Smrg    for name in obj.keys():
39848b8605Smrg        if name.startswith('__') and name.endswith('__'):
40848b8605Smrg            del obj[name]
41848b8605Smrg    return obj
42848b8605Smrg
43848b8605Smrg
44848b8605Smrgclass Visitor:
45848b8605Smrg
46848b8605Smrg    def visit(self, node, *args, **kwargs):
47848b8605Smrg        if isinstance(node, dict):
48848b8605Smrg            return self.visitObject(node, *args, **kwargs)
49848b8605Smrg        elif isinstance(node, list):
50848b8605Smrg            return self.visitArray(node, *args, **kwargs)
51848b8605Smrg        else:
52848b8605Smrg            return self.visitValue(node, *args, **kwargs)
53848b8605Smrg
54848b8605Smrg    def visitObject(self, node, *args, **kwargs):
55848b8605Smrg        pass
56848b8605Smrg
57848b8605Smrg    def visitArray(self, node, *args, **kwargs):
58848b8605Smrg        pass
59848b8605Smrg
60848b8605Smrg    def visitValue(self, node, *args, **kwargs):
61848b8605Smrg        pass
62848b8605Smrg
63848b8605Smrg
64848b8605Smrgclass Dumper(Visitor):
65848b8605Smrg
66848b8605Smrg    def __init__(self, stream = sys.stdout):
67848b8605Smrg        self.stream = stream
68848b8605Smrg        self.level = 0
69848b8605Smrg
70848b8605Smrg    def _write(self, s):
71848b8605Smrg        self.stream.write(s)
72848b8605Smrg
73848b8605Smrg    def _indent(self):
74848b8605Smrg        self._write('  '*self.level)
75848b8605Smrg
76848b8605Smrg    def _newline(self):
77848b8605Smrg        self._write('\n')
78848b8605Smrg
79848b8605Smrg    def visitObject(self, node):
80848b8605Smrg        self.enter_object()
81848b8605Smrg
82848b8605Smrg        members = node.keys()
83848b8605Smrg        members.sort()
84848b8605Smrg        for i in range(len(members)):
85848b8605Smrg            name = members[i]
86848b8605Smrg            value = node[name]
87848b8605Smrg            self.enter_member(name)
88848b8605Smrg            self.visit(value)
89848b8605Smrg            self.leave_member(i == len(members) - 1)
90848b8605Smrg        self.leave_object()
91848b8605Smrg
92848b8605Smrg    def enter_object(self):
93848b8605Smrg        self._write('{')
94848b8605Smrg        self._newline()
95848b8605Smrg        self.level += 1
96848b8605Smrg
97848b8605Smrg    def enter_member(self, name):
98848b8605Smrg        self._indent()
99848b8605Smrg        self._write('%s: ' % name)
100848b8605Smrg
101848b8605Smrg    def leave_member(self, last):
102848b8605Smrg        if not last:
103848b8605Smrg            self._write(',')
104848b8605Smrg        self._newline()
105848b8605Smrg
106848b8605Smrg    def leave_object(self):
107848b8605Smrg        self.level -= 1
108848b8605Smrg        self._indent()
109848b8605Smrg        self._write('}')
110848b8605Smrg        if self.level <= 0:
111848b8605Smrg            self._newline()
112848b8605Smrg
113848b8605Smrg    def visitArray(self, node):
114848b8605Smrg        self.enter_array()
115848b8605Smrg        for i in range(len(node)):
116848b8605Smrg            value = node[i]
117848b8605Smrg            self._indent()
118848b8605Smrg            self.visit(value)
119848b8605Smrg            if i != len(node) - 1:
120848b8605Smrg                self._write(',')
121848b8605Smrg            self._newline()
122848b8605Smrg        self.leave_array()
123848b8605Smrg
124848b8605Smrg    def enter_array(self):
125848b8605Smrg        self._write('[')
126848b8605Smrg        self._newline()
127848b8605Smrg        self.level += 1
128848b8605Smrg
129848b8605Smrg    def leave_array(self):
130848b8605Smrg        self.level -= 1
131848b8605Smrg        self._indent()
132848b8605Smrg        self._write(']')
133848b8605Smrg
134848b8605Smrg    def visitValue(self, node):
135848b8605Smrg        self._write(json.dumps(node, allow_nan=True))
136848b8605Smrg
137848b8605Smrg
138848b8605Smrg
139848b8605Smrgclass Comparer(Visitor):
140848b8605Smrg
141848b8605Smrg    def __init__(self, ignore_added = False, tolerance = 2.0 ** -24):
142848b8605Smrg        self.ignore_added = ignore_added
143848b8605Smrg        self.tolerance = tolerance
144848b8605Smrg
145848b8605Smrg    def visitObject(self, a, b):
146848b8605Smrg        if not isinstance(b, dict):
147848b8605Smrg            return False
148848b8605Smrg        if len(a) != len(b) and not self.ignore_added:
149848b8605Smrg            return False
150848b8605Smrg        ak = a.keys()
151848b8605Smrg        bk = b.keys()
152848b8605Smrg        ak.sort()
153848b8605Smrg        bk.sort()
154848b8605Smrg        if ak != bk and not self.ignore_added:
155848b8605Smrg            return False
156848b8605Smrg        for k in ak:
157848b8605Smrg            ae = a[k]
158848b8605Smrg            try:
159848b8605Smrg                be = b[k]
160848b8605Smrg            except KeyError:
161848b8605Smrg                return False
162848b8605Smrg            if not self.visit(ae, be):
163848b8605Smrg                return False
164848b8605Smrg        return True
165848b8605Smrg
166848b8605Smrg    def visitArray(self, a, b):
167848b8605Smrg        if not isinstance(b, list):
168848b8605Smrg            return False
169848b8605Smrg        if len(a) != len(b):
170848b8605Smrg            return False
171848b8605Smrg        for ae, be in zip(a, b):
172848b8605Smrg            if not self.visit(ae, be):
173848b8605Smrg                return False
174848b8605Smrg        return True
175848b8605Smrg
176848b8605Smrg    def visitValue(self, a, b):
177848b8605Smrg        if isinstance(a, float) or isinstance(b, float):
178848b8605Smrg            if a == 0:
179848b8605Smrg                return abs(b) < self.tolerance
180848b8605Smrg            else:
181848b8605Smrg                return abs((b - a)/a) < self.tolerance
182848b8605Smrg        else:
183848b8605Smrg            return a == b
184848b8605Smrg
185848b8605Smrg
186848b8605Smrgclass Differ(Visitor):
187848b8605Smrg
188848b8605Smrg    def __init__(self, stream = sys.stdout, ignore_added = False):
189848b8605Smrg        self.dumper = Dumper(stream)
190848b8605Smrg        self.comparer = Comparer(ignore_added = ignore_added)
191848b8605Smrg
192848b8605Smrg    def visit(self, a, b):
193848b8605Smrg        if self.comparer.visit(a, b):
194848b8605Smrg            return
195848b8605Smrg        Visitor.visit(self, a, b)
196848b8605Smrg
197848b8605Smrg    def visitObject(self, a, b):
198848b8605Smrg        if not isinstance(b, dict):
199848b8605Smrg            self.replace(a, b)
200848b8605Smrg        else:
201848b8605Smrg            self.dumper.enter_object()
202848b8605Smrg            names = set(a.keys())
203848b8605Smrg            if not self.comparer.ignore_added:
204848b8605Smrg                names.update(b.keys())
205848b8605Smrg            names = list(names)
206848b8605Smrg            names.sort()
207848b8605Smrg
208848b8605Smrg            for i in range(len(names)):
209848b8605Smrg                name = names[i]
210848b8605Smrg                ae = a.get(name, None)
211848b8605Smrg                be = b.get(name, None)
212848b8605Smrg                if not self.comparer.visit(ae, be):
213848b8605Smrg                    self.dumper.enter_member(name)
214848b8605Smrg                    self.visit(ae, be)
215848b8605Smrg                    self.dumper.leave_member(i == len(names) - 1)
216848b8605Smrg
217848b8605Smrg            self.dumper.leave_object()
218848b8605Smrg
219848b8605Smrg    def visitArray(self, a, b):
220848b8605Smrg        if not isinstance(b, list):
221848b8605Smrg            self.replace(a, b)
222848b8605Smrg        else:
223848b8605Smrg            self.dumper.enter_array()
224848b8605Smrg            max_len = max(len(a), len(b))
225848b8605Smrg            for i in range(max_len):
226848b8605Smrg                try:
227848b8605Smrg                    ae = a[i]
228848b8605Smrg                except IndexError:
229848b8605Smrg                    ae = None
230848b8605Smrg                try:
231848b8605Smrg                    be = b[i]
232848b8605Smrg                except IndexError:
233848b8605Smrg                    be = None
234848b8605Smrg                self.dumper._indent()
235848b8605Smrg                if self.comparer.visit(ae, be):
236848b8605Smrg                    self.dumper.visit(ae)
237848b8605Smrg                else:
238848b8605Smrg                    self.visit(ae, be)
239848b8605Smrg                if i != max_len - 1:
240848b8605Smrg                    self.dumper._write(',')
241848b8605Smrg                self.dumper._newline()
242848b8605Smrg
243848b8605Smrg            self.dumper.leave_array()
244848b8605Smrg
245848b8605Smrg    def visitValue(self, a, b):
246848b8605Smrg        if a != b:
247848b8605Smrg            self.replace(a, b)
248848b8605Smrg
249848b8605Smrg    def replace(self, a, b):
250848b8605Smrg        if isinstance(a, basestring) and isinstance(b, basestring):
251848b8605Smrg            if '\n' in a or '\n' in b:
252848b8605Smrg                a = a.splitlines()
253848b8605Smrg                b = b.splitlines()
254848b8605Smrg                differ = difflib.Differ()
255848b8605Smrg                result = differ.compare(a, b)
256848b8605Smrg                self.dumper.level += 1
257848b8605Smrg                for entry in result:
258848b8605Smrg                    self.dumper._newline()
259848b8605Smrg                    self.dumper._indent()
260848b8605Smrg                    tag = entry[:2]
261848b8605Smrg                    text = entry[2:]
262848b8605Smrg                    if tag == '? ':
263848b8605Smrg                        tag = '  '
264848b8605Smrg                        prefix = ' '
265848b8605Smrg                        text = text.rstrip()
266848b8605Smrg                        suffix = ''
267848b8605Smrg                    else:
268848b8605Smrg                        prefix = '"'
269848b8605Smrg                        suffix = '\\n"'
270848b8605Smrg                    line = tag + prefix + text + suffix
271848b8605Smrg                    self.dumper._write(line)
272848b8605Smrg                self.dumper.level -= 1
273848b8605Smrg                return
274848b8605Smrg        self.dumper.visit(a)
275848b8605Smrg        self.dumper._write(' -> ')
276848b8605Smrg        self.dumper.visit(b)
277848b8605Smrg
278848b8605Smrg    def isMultilineString(self, value):
279848b8605Smrg        return isinstance(value, basestring) and '\n' in value
280848b8605Smrg
281848b8605Smrg    def replaceMultilineString(self, a, b):
282848b8605Smrg        self.dumper.visit(a)
283848b8605Smrg        self.dumper._write(' -> ')
284848b8605Smrg        self.dumper.visit(b)
285848b8605Smrg
286848b8605Smrg
287848b8605Smrg#
288848b8605Smrg# Unfortunately JSON standard does not include comments, but this is a quite
289848b8605Smrg# useful feature to have on regressions tests
290848b8605Smrg#
291848b8605Smrg
292848b8605Smrg_token_res = [
293848b8605Smrg    r'//[^\r\n]*', # comment
294848b8605Smrg    r'"[^"\\]*(\\.[^"\\]*)*"', # string
295848b8605Smrg]
296848b8605Smrg
297848b8605Smrg_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)
298848b8605Smrg
299848b8605Smrg
300848b8605Smrgdef _strip_comment(mo):
301848b8605Smrg    if mo.group(1):
302848b8605Smrg        return ''
303848b8605Smrg    else:
304848b8605Smrg        return mo.group(0)
305848b8605Smrg
306848b8605Smrg
307848b8605Smrgdef _strip_comments(data):
308848b8605Smrg    '''Strip (non-standard) JSON comments.'''
309848b8605Smrg    return _tokens_re.sub(_strip_comment, data)
310848b8605Smrg
311848b8605Smrg
312848b8605Smrgassert _strip_comments('''// a comment
313848b8605Smrg"// a comment in a string
314848b8605Smrg"''') == '''
315848b8605Smrg"// a comment in a string
316848b8605Smrg"'''
317848b8605Smrg
318848b8605Smrg
319848b8605Smrgdef load(stream, strip_images = True, strip_comments = True):
320848b8605Smrg    if strip_images:
321848b8605Smrg        object_hook = strip_object_hook
322848b8605Smrg    else:
323848b8605Smrg        object_hook = None
324848b8605Smrg    if strip_comments:
325848b8605Smrg        data = stream.read()
326848b8605Smrg        data = _strip_comments(data)
327848b8605Smrg        return json.loads(data, strict=False, object_hook = object_hook)
328848b8605Smrg    else:
329848b8605Smrg        return json.load(stream, strict=False, object_hook = object_hook)
330848b8605Smrg
331848b8605Smrg
332848b8605Smrgdef main():
333848b8605Smrg    optparser = optparse.OptionParser(
334848b8605Smrg        usage="\n\t%prog [options] <ref_json> <src_json>")
335848b8605Smrg    optparser.add_option(
336848b8605Smrg        '--keep-images',
337848b8605Smrg        action="store_false", dest="strip_images", default=True,
338848b8605Smrg        help="compare images")
339848b8605Smrg
340848b8605Smrg    (options, args) = optparser.parse_args(sys.argv[1:])
341848b8605Smrg
342848b8605Smrg    if len(args) != 2:
343848b8605Smrg        optparser.error('incorrect number of arguments')
344848b8605Smrg
345848b8605Smrg    a = load(open(sys.argv[1], 'rt'), options.strip_images)
346848b8605Smrg    b = load(open(sys.argv[2], 'rt'), options.strip_images)
347848b8605Smrg
348848b8605Smrg    if False:
349848b8605Smrg        dumper = Dumper()
350848b8605Smrg        dumper.visit(a)
351848b8605Smrg
352848b8605Smrg    differ = Differ()
353848b8605Smrg    differ.visit(a, b)
354848b8605Smrg
355848b8605Smrg
356848b8605Smrgif __name__ == '__main__':
357848b8605Smrg    main()
358