1b8e80941Smrg#!/usr/bin/env python2
2848b8605Smrg##########################################################################
3848b8605Smrg#
4848b8605Smrg# Copyright 2008 VMware, Inc.
5848b8605Smrg# All Rights Reserved.
6848b8605Smrg#
7848b8605Smrg# Permission is hereby granted, free of charge, to any person obtaining a
8848b8605Smrg# copy of this software and associated documentation files (the
9848b8605Smrg# "Software"), to deal in the Software without restriction, including
10848b8605Smrg# without limitation the rights to use, copy, modify, merge, publish,
11848b8605Smrg# distribute, sub license, and/or sell copies of the Software, and to
12848b8605Smrg# permit persons to whom the Software is furnished to do so, subject to
13848b8605Smrg# the following conditions:
14848b8605Smrg#
15848b8605Smrg# The above copyright notice and this permission notice (including the
16848b8605Smrg# next paragraph) shall be included in all copies or substantial portions
17848b8605Smrg# of the Software.
18848b8605Smrg#
19848b8605Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20848b8605Smrg# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21848b8605Smrg# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22848b8605Smrg# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23848b8605Smrg# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24848b8605Smrg# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25848b8605Smrg# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26848b8605Smrg#
27848b8605Smrg##########################################################################
28848b8605Smrg
29848b8605Smrg
30848b8605Smrgimport sys
31848b8605Smrgimport xml.parsers.expat
32848b8605Smrgimport optparse
33848b8605Smrg
34848b8605Smrgfrom model import *
35848b8605Smrg
36848b8605Smrg
37848b8605SmrgELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4)
38848b8605Smrg
39848b8605Smrg
40848b8605Smrgclass XmlToken:
41848b8605Smrg
42848b8605Smrg    def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
43848b8605Smrg        assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF)
44848b8605Smrg        self.type = type
45848b8605Smrg        self.name_or_data = name_or_data
46848b8605Smrg        self.attrs = attrs
47848b8605Smrg        self.line = line
48848b8605Smrg        self.column = column
49848b8605Smrg
50848b8605Smrg    def __str__(self):
51848b8605Smrg        if self.type == ELEMENT_START:
52848b8605Smrg            return '<' + self.name_or_data + ' ...>'
53848b8605Smrg        if self.type == ELEMENT_END:
54848b8605Smrg            return '</' + self.name_or_data + '>'
55848b8605Smrg        if self.type == CHARACTER_DATA:
56848b8605Smrg            return self.name_or_data
57848b8605Smrg        if self.type == EOF:
58848b8605Smrg            return 'end of file'
59848b8605Smrg        assert 0
60848b8605Smrg
61848b8605Smrg
62848b8605Smrgclass XmlTokenizer:
63848b8605Smrg    """Expat based XML tokenizer."""
64848b8605Smrg
65848b8605Smrg    def __init__(self, fp, skip_ws = True):
66848b8605Smrg        self.fp = fp
67848b8605Smrg        self.tokens = []
68848b8605Smrg        self.index = 0
69848b8605Smrg        self.final = False
70848b8605Smrg        self.skip_ws = skip_ws
71848b8605Smrg
72848b8605Smrg        self.character_pos = 0, 0
73848b8605Smrg        self.character_data = ''
74848b8605Smrg
75848b8605Smrg        self.parser = xml.parsers.expat.ParserCreate()
76848b8605Smrg        self.parser.StartElementHandler  = self.handle_element_start
77848b8605Smrg        self.parser.EndElementHandler    = self.handle_element_end
78848b8605Smrg        self.parser.CharacterDataHandler = self.handle_character_data
79848b8605Smrg
80848b8605Smrg    def handle_element_start(self, name, attributes):
81848b8605Smrg        self.finish_character_data()
82848b8605Smrg        line, column = self.pos()
83848b8605Smrg        token = XmlToken(ELEMENT_START, name, attributes, line, column)
84848b8605Smrg        self.tokens.append(token)
85848b8605Smrg
86848b8605Smrg    def handle_element_end(self, name):
87848b8605Smrg        self.finish_character_data()
88848b8605Smrg        line, column = self.pos()
89848b8605Smrg        token = XmlToken(ELEMENT_END, name, None, line, column)
90848b8605Smrg        self.tokens.append(token)
91848b8605Smrg
92848b8605Smrg    def handle_character_data(self, data):
93848b8605Smrg        if not self.character_data:
94848b8605Smrg            self.character_pos = self.pos()
95848b8605Smrg        self.character_data += data
96848b8605Smrg
97848b8605Smrg    def finish_character_data(self):
98848b8605Smrg        if self.character_data:
99848b8605Smrg            if not self.skip_ws or not self.character_data.isspace():
100848b8605Smrg                line, column = self.character_pos
101848b8605Smrg                token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column)
102848b8605Smrg                self.tokens.append(token)
103848b8605Smrg            self.character_data = ''
104848b8605Smrg
105848b8605Smrg    def next(self):
106848b8605Smrg        size = 16*1024
107848b8605Smrg        while self.index >= len(self.tokens) and not self.final:
108848b8605Smrg            self.tokens = []
109848b8605Smrg            self.index = 0
110848b8605Smrg            data = self.fp.read(size)
111848b8605Smrg            self.final = len(data) < size
112848b8605Smrg            data = data.rstrip('\0')
113848b8605Smrg            try:
114848b8605Smrg                self.parser.Parse(data, self.final)
115848b8605Smrg            except xml.parsers.expat.ExpatError, e:
116848b8605Smrg                #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
117848b8605Smrg                if e.code == 3:
118848b8605Smrg                    pass
119848b8605Smrg                else:
120848b8605Smrg                    raise e
121848b8605Smrg        if self.index >= len(self.tokens):
122848b8605Smrg            line, column = self.pos()
123848b8605Smrg            token = XmlToken(EOF, None, None, line, column)
124848b8605Smrg        else:
125848b8605Smrg            token = self.tokens[self.index]
126848b8605Smrg            self.index += 1
127848b8605Smrg        return token
128848b8605Smrg
129848b8605Smrg    def pos(self):
130848b8605Smrg        return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
131848b8605Smrg
132848b8605Smrg
133848b8605Smrgclass TokenMismatch(Exception):
134848b8605Smrg
135848b8605Smrg    def __init__(self, expected, found):
136848b8605Smrg        self.expected = expected
137848b8605Smrg        self.found = found
138848b8605Smrg
139848b8605Smrg    def __str__(self):
140848b8605Smrg        return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
141848b8605Smrg
142848b8605Smrg
143848b8605Smrg
144848b8605Smrgclass XmlParser:
145848b8605Smrg    """Base XML document parser."""
146848b8605Smrg
147848b8605Smrg    def __init__(self, fp):
148848b8605Smrg        self.tokenizer = XmlTokenizer(fp)
149848b8605Smrg        self.consume()
150848b8605Smrg
151848b8605Smrg    def consume(self):
152848b8605Smrg        self.token = self.tokenizer.next()
153848b8605Smrg
154848b8605Smrg    def match_element_start(self, name):
155848b8605Smrg        return self.token.type == ELEMENT_START and self.token.name_or_data == name
156848b8605Smrg
157848b8605Smrg    def match_element_end(self, name):
158848b8605Smrg        return self.token.type == ELEMENT_END and self.token.name_or_data == name
159848b8605Smrg
160848b8605Smrg    def element_start(self, name):
161848b8605Smrg        while self.token.type == CHARACTER_DATA:
162848b8605Smrg            self.consume()
163848b8605Smrg        if self.token.type != ELEMENT_START:
164848b8605Smrg            raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
165848b8605Smrg        if self.token.name_or_data != name:
166848b8605Smrg            raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
167848b8605Smrg        attrs = self.token.attrs
168848b8605Smrg        self.consume()
169848b8605Smrg        return attrs
170848b8605Smrg
171848b8605Smrg    def element_end(self, name):
172848b8605Smrg        while self.token.type == CHARACTER_DATA:
173848b8605Smrg            self.consume()
174848b8605Smrg        if self.token.type != ELEMENT_END:
175848b8605Smrg            raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
176848b8605Smrg        if self.token.name_or_data != name:
177848b8605Smrg            raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
178848b8605Smrg        self.consume()
179848b8605Smrg
180848b8605Smrg    def character_data(self, strip = True):
181848b8605Smrg        data = ''
182848b8605Smrg        while self.token.type == CHARACTER_DATA:
183848b8605Smrg            data += self.token.name_or_data
184848b8605Smrg            self.consume()
185848b8605Smrg        if strip:
186848b8605Smrg            data = data.strip()
187848b8605Smrg        return data
188848b8605Smrg
189848b8605Smrg
190848b8605Smrgclass TraceParser(XmlParser):
191848b8605Smrg
192848b8605Smrg    def __init__(self, fp):
193848b8605Smrg        XmlParser.__init__(self, fp)
194848b8605Smrg        self.last_call_no = 0
195848b8605Smrg
196848b8605Smrg    def parse(self):
197848b8605Smrg        self.element_start('trace')
198848b8605Smrg        while self.token.type not in (ELEMENT_END, EOF):
199848b8605Smrg            call = self.parse_call()
200848b8605Smrg            self.handle_call(call)
201848b8605Smrg        if self.token.type != EOF:
202848b8605Smrg            self.element_end('trace')
203848b8605Smrg
204848b8605Smrg    def parse_call(self):
205848b8605Smrg        attrs = self.element_start('call')
206848b8605Smrg        try:
207848b8605Smrg            no = int(attrs['no'])
208848b8605Smrg        except KeyError:
209848b8605Smrg            self.last_call_no += 1
210848b8605Smrg            no = self.last_call_no
211848b8605Smrg        else:
212848b8605Smrg            self.last_call_no = no
213848b8605Smrg        klass = attrs['class']
214848b8605Smrg        method = attrs['method']
215848b8605Smrg        args = []
216848b8605Smrg        ret = None
217848b8605Smrg        time = None
218848b8605Smrg        while self.token.type == ELEMENT_START:
219848b8605Smrg            if self.token.name_or_data == 'arg':
220848b8605Smrg                arg = self.parse_arg()
221848b8605Smrg                args.append(arg)
222848b8605Smrg            elif self.token.name_or_data == 'ret':
223848b8605Smrg                ret = self.parse_ret()
224848b8605Smrg            elif self.token.name_or_data == 'call':
225848b8605Smrg                # ignore nested function calls
226848b8605Smrg                self.parse_call()
227848b8605Smrg            elif self.token.name_or_data == 'time':
228848b8605Smrg                time = self.parse_time()
229848b8605Smrg            else:
230848b8605Smrg                raise TokenMismatch("<arg ...> or <ret ...>", self.token)
231848b8605Smrg        self.element_end('call')
232848b8605Smrg
233848b8605Smrg        return Call(no, klass, method, args, ret, time)
234848b8605Smrg
235848b8605Smrg    def parse_arg(self):
236848b8605Smrg        attrs = self.element_start('arg')
237848b8605Smrg        name = attrs['name']
238848b8605Smrg        value = self.parse_value()
239848b8605Smrg        self.element_end('arg')
240848b8605Smrg
241848b8605Smrg        return name, value
242848b8605Smrg
243848b8605Smrg    def parse_ret(self):
244848b8605Smrg        attrs = self.element_start('ret')
245848b8605Smrg        value = self.parse_value()
246848b8605Smrg        self.element_end('ret')
247848b8605Smrg
248848b8605Smrg        return value
249848b8605Smrg
250848b8605Smrg    def parse_time(self):
251848b8605Smrg        attrs = self.element_start('time')
252848b8605Smrg        time = self.parse_value();
253848b8605Smrg        self.element_end('time')
254848b8605Smrg        return time
255848b8605Smrg
256848b8605Smrg    def parse_value(self):
257848b8605Smrg        expected_tokens = ('null', 'bool', 'int', 'uint', 'float', 'string', 'enum', 'array', 'struct', 'ptr', 'bytes')
258848b8605Smrg        if self.token.type == ELEMENT_START:
259848b8605Smrg            if self.token.name_or_data in expected_tokens:
260848b8605Smrg                method = getattr(self, 'parse_' +  self.token.name_or_data)
261848b8605Smrg                return method()
262848b8605Smrg        raise TokenMismatch(" or " .join(expected_tokens), self.token)
263848b8605Smrg
264848b8605Smrg    def parse_null(self):
265848b8605Smrg        self.element_start('null')
266848b8605Smrg        self.element_end('null')
267848b8605Smrg        return Literal(None)
268848b8605Smrg
269848b8605Smrg    def parse_bool(self):
270848b8605Smrg        self.element_start('bool')
271848b8605Smrg        value = int(self.character_data())
272848b8605Smrg        self.element_end('bool')
273848b8605Smrg        return Literal(value)
274848b8605Smrg
275848b8605Smrg    def parse_int(self):
276848b8605Smrg        self.element_start('int')
277848b8605Smrg        value = int(self.character_data())
278848b8605Smrg        self.element_end('int')
279848b8605Smrg        return Literal(value)
280848b8605Smrg
281848b8605Smrg    def parse_uint(self):
282848b8605Smrg        self.element_start('uint')
283848b8605Smrg        value = int(self.character_data())
284848b8605Smrg        self.element_end('uint')
285848b8605Smrg        return Literal(value)
286848b8605Smrg
287848b8605Smrg    def parse_float(self):
288848b8605Smrg        self.element_start('float')
289848b8605Smrg        value = float(self.character_data())
290848b8605Smrg        self.element_end('float')
291848b8605Smrg        return Literal(value)
292848b8605Smrg
293848b8605Smrg    def parse_enum(self):
294848b8605Smrg        self.element_start('enum')
295848b8605Smrg        name = self.character_data()
296848b8605Smrg        self.element_end('enum')
297848b8605Smrg        return NamedConstant(name)
298848b8605Smrg
299848b8605Smrg    def parse_string(self):
300848b8605Smrg        self.element_start('string')
301848b8605Smrg        value = self.character_data()
302848b8605Smrg        self.element_end('string')
303848b8605Smrg        return Literal(value)
304848b8605Smrg
305848b8605Smrg    def parse_bytes(self):
306848b8605Smrg        self.element_start('bytes')
307848b8605Smrg        value = self.character_data()
308848b8605Smrg        self.element_end('bytes')
309848b8605Smrg        return Blob(value)
310848b8605Smrg
311848b8605Smrg    def parse_array(self):
312848b8605Smrg        self.element_start('array')
313848b8605Smrg        elems = []
314848b8605Smrg        while self.token.type != ELEMENT_END:
315848b8605Smrg            elems.append(self.parse_elem())
316848b8605Smrg        self.element_end('array')
317848b8605Smrg        return Array(elems)
318848b8605Smrg
319848b8605Smrg    def parse_elem(self):
320848b8605Smrg        self.element_start('elem')
321848b8605Smrg        value = self.parse_value()
322848b8605Smrg        self.element_end('elem')
323848b8605Smrg        return value
324848b8605Smrg
325848b8605Smrg    def parse_struct(self):
326848b8605Smrg        attrs = self.element_start('struct')
327848b8605Smrg        name = attrs['name']
328848b8605Smrg        members = []
329848b8605Smrg        while self.token.type != ELEMENT_END:
330848b8605Smrg            members.append(self.parse_member())
331848b8605Smrg        self.element_end('struct')
332848b8605Smrg        return Struct(name, members)
333848b8605Smrg
334848b8605Smrg    def parse_member(self):
335848b8605Smrg        attrs = self.element_start('member')
336848b8605Smrg        name = attrs['name']
337848b8605Smrg        value = self.parse_value()
338848b8605Smrg        self.element_end('member')
339848b8605Smrg
340848b8605Smrg        return name, value
341848b8605Smrg
342848b8605Smrg    def parse_ptr(self):
343848b8605Smrg        self.element_start('ptr')
344848b8605Smrg        address = self.character_data()
345848b8605Smrg        self.element_end('ptr')
346848b8605Smrg
347848b8605Smrg        return Pointer(address)
348848b8605Smrg
349848b8605Smrg    def handle_call(self, call):
350848b8605Smrg        pass
351848b8605Smrg
352848b8605Smrg
353848b8605Smrgclass TraceDumper(TraceParser):
354848b8605Smrg
355848b8605Smrg    def __init__(self, fp, outStream = sys.stdout):
356848b8605Smrg        TraceParser.__init__(self, fp)
357848b8605Smrg        self.formatter = format.DefaultFormatter(outStream)
358848b8605Smrg        self.pretty_printer = PrettyPrinter(self.formatter)
359848b8605Smrg
360848b8605Smrg    def handle_call(self, call):
361848b8605Smrg        call.visit(self.pretty_printer)
362848b8605Smrg        self.formatter.newline()
363848b8605Smrg
364848b8605Smrg
365848b8605Smrgclass Main:
366848b8605Smrg    '''Common main class for all retrace command line utilities.'''
367848b8605Smrg
368848b8605Smrg    def __init__(self):
369848b8605Smrg        pass
370848b8605Smrg
371848b8605Smrg    def main(self):
372848b8605Smrg        optparser = self.get_optparser()
373848b8605Smrg        (options, args) = optparser.parse_args(sys.argv[1:])
374848b8605Smrg
375848b8605Smrg        if not args:
376848b8605Smrg            optparser.error('insufficient number of arguments')
377848b8605Smrg
378848b8605Smrg        for arg in args:
379848b8605Smrg            if arg.endswith('.gz'):
380848b8605Smrg                from gzip import GzipFile
381848b8605Smrg                stream = GzipFile(arg, 'rt')
382848b8605Smrg            elif arg.endswith('.bz2'):
383848b8605Smrg                from bz2 import BZ2File
384848b8605Smrg                stream = BZ2File(arg, 'rU')
385848b8605Smrg            else:
386848b8605Smrg                stream = open(arg, 'rt')
387848b8605Smrg            self.process_arg(stream, options)
388848b8605Smrg
389848b8605Smrg    def get_optparser(self):
390848b8605Smrg        optparser = optparse.OptionParser(
391848b8605Smrg            usage="\n\t%prog [options] TRACE  [...]")
392848b8605Smrg        return optparser
393848b8605Smrg
394848b8605Smrg    def process_arg(self, stream, options):
395848b8605Smrg        parser = TraceDumper(stream)
396848b8605Smrg        parser.parse()
397848b8605Smrg
398848b8605Smrg
399848b8605Smrgif __name__ == '__main__':
400848b8605Smrg    Main().main()
401