1b8e80941Smrg#!/usr/bin/env python2 2848b8605Smrg########################################################################## 3848b8605Smrg# 4848b8605Smrg# Copyright 2008 VMware, Inc. 5848b8605Smrg# All Rights Reserved. 6848b8605Smrg# 7848b8605Smrg# Permission is hereby granted, free of charge, to any person obtaining a 8848b8605Smrg# copy of this software and associated documentation files (the 9848b8605Smrg# "Software"), to deal in the Software without restriction, including 10848b8605Smrg# without limitation the rights to use, copy, modify, merge, publish, 11848b8605Smrg# distribute, sub license, and/or sell copies of the Software, and to 12848b8605Smrg# permit persons to whom the Software is furnished to do so, subject to 13848b8605Smrg# the following conditions: 14848b8605Smrg# 15848b8605Smrg# The above copyright notice and this permission notice (including the 16848b8605Smrg# next paragraph) shall be included in all copies or substantial portions 17848b8605Smrg# of the Software. 18848b8605Smrg# 19848b8605Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20848b8605Smrg# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21848b8605Smrg# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22848b8605Smrg# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23848b8605Smrg# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24848b8605Smrg# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25848b8605Smrg# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26848b8605Smrg# 27848b8605Smrg########################################################################## 28848b8605Smrg 29848b8605Smrg 30848b8605Smrgimport sys 31848b8605Smrgimport xml.parsers.expat 32848b8605Smrgimport optparse 33848b8605Smrg 34848b8605Smrgfrom model import * 35848b8605Smrg 36848b8605Smrg 37848b8605SmrgELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4) 38848b8605Smrg 39848b8605Smrg 40848b8605Smrgclass XmlToken: 41848b8605Smrg 42848b8605Smrg def __init__(self, type, name_or_data, attrs = None, line = None, column = None): 43848b8605Smrg assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF) 44848b8605Smrg self.type = type 45848b8605Smrg self.name_or_data = name_or_data 46848b8605Smrg self.attrs = attrs 47848b8605Smrg self.line = line 48848b8605Smrg self.column = column 49848b8605Smrg 50848b8605Smrg def __str__(self): 51848b8605Smrg if self.type == ELEMENT_START: 52848b8605Smrg return '<' + self.name_or_data + ' ...>' 53848b8605Smrg if self.type == ELEMENT_END: 54848b8605Smrg return '</' + self.name_or_data + '>' 55848b8605Smrg if self.type == CHARACTER_DATA: 56848b8605Smrg return self.name_or_data 57848b8605Smrg if self.type == EOF: 58848b8605Smrg return 'end of file' 59848b8605Smrg assert 0 60848b8605Smrg 61848b8605Smrg 62848b8605Smrgclass XmlTokenizer: 63848b8605Smrg """Expat based XML tokenizer.""" 64848b8605Smrg 65848b8605Smrg def __init__(self, fp, skip_ws = True): 66848b8605Smrg self.fp = fp 67848b8605Smrg self.tokens = [] 68848b8605Smrg self.index = 0 69848b8605Smrg self.final = False 70848b8605Smrg self.skip_ws = skip_ws 71848b8605Smrg 72848b8605Smrg self.character_pos = 0, 0 73848b8605Smrg self.character_data = '' 74848b8605Smrg 75848b8605Smrg self.parser = xml.parsers.expat.ParserCreate() 76848b8605Smrg self.parser.StartElementHandler = self.handle_element_start 77848b8605Smrg self.parser.EndElementHandler = self.handle_element_end 78848b8605Smrg self.parser.CharacterDataHandler = self.handle_character_data 79848b8605Smrg 80848b8605Smrg def handle_element_start(self, name, attributes): 81848b8605Smrg self.finish_character_data() 82848b8605Smrg line, column = self.pos() 83848b8605Smrg token = XmlToken(ELEMENT_START, name, attributes, line, column) 84848b8605Smrg self.tokens.append(token) 85848b8605Smrg 86848b8605Smrg def handle_element_end(self, name): 87848b8605Smrg self.finish_character_data() 88848b8605Smrg line, column = self.pos() 89848b8605Smrg token = XmlToken(ELEMENT_END, name, None, line, column) 90848b8605Smrg self.tokens.append(token) 91848b8605Smrg 92848b8605Smrg def handle_character_data(self, data): 93848b8605Smrg if not self.character_data: 94848b8605Smrg self.character_pos = self.pos() 95848b8605Smrg self.character_data += data 96848b8605Smrg 97848b8605Smrg def finish_character_data(self): 98848b8605Smrg if self.character_data: 99848b8605Smrg if not self.skip_ws or not self.character_data.isspace(): 100848b8605Smrg line, column = self.character_pos 101848b8605Smrg token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column) 102848b8605Smrg self.tokens.append(token) 103848b8605Smrg self.character_data = '' 104848b8605Smrg 105848b8605Smrg def next(self): 106848b8605Smrg size = 16*1024 107848b8605Smrg while self.index >= len(self.tokens) and not self.final: 108848b8605Smrg self.tokens = [] 109848b8605Smrg self.index = 0 110848b8605Smrg data = self.fp.read(size) 111848b8605Smrg self.final = len(data) < size 112848b8605Smrg data = data.rstrip('\0') 113848b8605Smrg try: 114848b8605Smrg self.parser.Parse(data, self.final) 115848b8605Smrg except xml.parsers.expat.ExpatError, e: 116848b8605Smrg #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS: 117848b8605Smrg if e.code == 3: 118848b8605Smrg pass 119848b8605Smrg else: 120848b8605Smrg raise e 121848b8605Smrg if self.index >= len(self.tokens): 122848b8605Smrg line, column = self.pos() 123848b8605Smrg token = XmlToken(EOF, None, None, line, column) 124848b8605Smrg else: 125848b8605Smrg token = self.tokens[self.index] 126848b8605Smrg self.index += 1 127848b8605Smrg return token 128848b8605Smrg 129848b8605Smrg def pos(self): 130848b8605Smrg return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber 131848b8605Smrg 132848b8605Smrg 133848b8605Smrgclass TokenMismatch(Exception): 134848b8605Smrg 135848b8605Smrg def __init__(self, expected, found): 136848b8605Smrg self.expected = expected 137848b8605Smrg self.found = found 138848b8605Smrg 139848b8605Smrg def __str__(self): 140848b8605Smrg return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found)) 141848b8605Smrg 142848b8605Smrg 143848b8605Smrg 144848b8605Smrgclass XmlParser: 145848b8605Smrg """Base XML document parser.""" 146848b8605Smrg 147848b8605Smrg def __init__(self, fp): 148848b8605Smrg self.tokenizer = XmlTokenizer(fp) 149848b8605Smrg self.consume() 150848b8605Smrg 151848b8605Smrg def consume(self): 152848b8605Smrg self.token = self.tokenizer.next() 153848b8605Smrg 154848b8605Smrg def match_element_start(self, name): 155848b8605Smrg return self.token.type == ELEMENT_START and self.token.name_or_data == name 156848b8605Smrg 157848b8605Smrg def match_element_end(self, name): 158848b8605Smrg return self.token.type == ELEMENT_END and self.token.name_or_data == name 159848b8605Smrg 160848b8605Smrg def element_start(self, name): 161848b8605Smrg while self.token.type == CHARACTER_DATA: 162848b8605Smrg self.consume() 163848b8605Smrg if self.token.type != ELEMENT_START: 164848b8605Smrg raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token) 165848b8605Smrg if self.token.name_or_data != name: 166848b8605Smrg raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token) 167848b8605Smrg attrs = self.token.attrs 168848b8605Smrg self.consume() 169848b8605Smrg return attrs 170848b8605Smrg 171848b8605Smrg def element_end(self, name): 172848b8605Smrg while self.token.type == CHARACTER_DATA: 173848b8605Smrg self.consume() 174848b8605Smrg if self.token.type != ELEMENT_END: 175848b8605Smrg raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token) 176848b8605Smrg if self.token.name_or_data != name: 177848b8605Smrg raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token) 178848b8605Smrg self.consume() 179848b8605Smrg 180848b8605Smrg def character_data(self, strip = True): 181848b8605Smrg data = '' 182848b8605Smrg while self.token.type == CHARACTER_DATA: 183848b8605Smrg data += self.token.name_or_data 184848b8605Smrg self.consume() 185848b8605Smrg if strip: 186848b8605Smrg data = data.strip() 187848b8605Smrg return data 188848b8605Smrg 189848b8605Smrg 190848b8605Smrgclass TraceParser(XmlParser): 191848b8605Smrg 192848b8605Smrg def __init__(self, fp): 193848b8605Smrg XmlParser.__init__(self, fp) 194848b8605Smrg self.last_call_no = 0 195848b8605Smrg 196848b8605Smrg def parse(self): 197848b8605Smrg self.element_start('trace') 198848b8605Smrg while self.token.type not in (ELEMENT_END, EOF): 199848b8605Smrg call = self.parse_call() 200848b8605Smrg self.handle_call(call) 201848b8605Smrg if self.token.type != EOF: 202848b8605Smrg self.element_end('trace') 203848b8605Smrg 204848b8605Smrg def parse_call(self): 205848b8605Smrg attrs = self.element_start('call') 206848b8605Smrg try: 207848b8605Smrg no = int(attrs['no']) 208848b8605Smrg except KeyError: 209848b8605Smrg self.last_call_no += 1 210848b8605Smrg no = self.last_call_no 211848b8605Smrg else: 212848b8605Smrg self.last_call_no = no 213848b8605Smrg klass = attrs['class'] 214848b8605Smrg method = attrs['method'] 215848b8605Smrg args = [] 216848b8605Smrg ret = None 217848b8605Smrg time = None 218848b8605Smrg while self.token.type == ELEMENT_START: 219848b8605Smrg if self.token.name_or_data == 'arg': 220848b8605Smrg arg = self.parse_arg() 221848b8605Smrg args.append(arg) 222848b8605Smrg elif self.token.name_or_data == 'ret': 223848b8605Smrg ret = self.parse_ret() 224848b8605Smrg elif self.token.name_or_data == 'call': 225848b8605Smrg # ignore nested function calls 226848b8605Smrg self.parse_call() 227848b8605Smrg elif self.token.name_or_data == 'time': 228848b8605Smrg time = self.parse_time() 229848b8605Smrg else: 230848b8605Smrg raise TokenMismatch("<arg ...> or <ret ...>", self.token) 231848b8605Smrg self.element_end('call') 232848b8605Smrg 233848b8605Smrg return Call(no, klass, method, args, ret, time) 234848b8605Smrg 235848b8605Smrg def parse_arg(self): 236848b8605Smrg attrs = self.element_start('arg') 237848b8605Smrg name = attrs['name'] 238848b8605Smrg value = self.parse_value() 239848b8605Smrg self.element_end('arg') 240848b8605Smrg 241848b8605Smrg return name, value 242848b8605Smrg 243848b8605Smrg def parse_ret(self): 244848b8605Smrg attrs = self.element_start('ret') 245848b8605Smrg value = self.parse_value() 246848b8605Smrg self.element_end('ret') 247848b8605Smrg 248848b8605Smrg return value 249848b8605Smrg 250848b8605Smrg def parse_time(self): 251848b8605Smrg attrs = self.element_start('time') 252848b8605Smrg time = self.parse_value(); 253848b8605Smrg self.element_end('time') 254848b8605Smrg return time 255848b8605Smrg 256848b8605Smrg def parse_value(self): 257848b8605Smrg expected_tokens = ('null', 'bool', 'int', 'uint', 'float', 'string', 'enum', 'array', 'struct', 'ptr', 'bytes') 258848b8605Smrg if self.token.type == ELEMENT_START: 259848b8605Smrg if self.token.name_or_data in expected_tokens: 260848b8605Smrg method = getattr(self, 'parse_' + self.token.name_or_data) 261848b8605Smrg return method() 262848b8605Smrg raise TokenMismatch(" or " .join(expected_tokens), self.token) 263848b8605Smrg 264848b8605Smrg def parse_null(self): 265848b8605Smrg self.element_start('null') 266848b8605Smrg self.element_end('null') 267848b8605Smrg return Literal(None) 268848b8605Smrg 269848b8605Smrg def parse_bool(self): 270848b8605Smrg self.element_start('bool') 271848b8605Smrg value = int(self.character_data()) 272848b8605Smrg self.element_end('bool') 273848b8605Smrg return Literal(value) 274848b8605Smrg 275848b8605Smrg def parse_int(self): 276848b8605Smrg self.element_start('int') 277848b8605Smrg value = int(self.character_data()) 278848b8605Smrg self.element_end('int') 279848b8605Smrg return Literal(value) 280848b8605Smrg 281848b8605Smrg def parse_uint(self): 282848b8605Smrg self.element_start('uint') 283848b8605Smrg value = int(self.character_data()) 284848b8605Smrg self.element_end('uint') 285848b8605Smrg return Literal(value) 286848b8605Smrg 287848b8605Smrg def parse_float(self): 288848b8605Smrg self.element_start('float') 289848b8605Smrg value = float(self.character_data()) 290848b8605Smrg self.element_end('float') 291848b8605Smrg return Literal(value) 292848b8605Smrg 293848b8605Smrg def parse_enum(self): 294848b8605Smrg self.element_start('enum') 295848b8605Smrg name = self.character_data() 296848b8605Smrg self.element_end('enum') 297848b8605Smrg return NamedConstant(name) 298848b8605Smrg 299848b8605Smrg def parse_string(self): 300848b8605Smrg self.element_start('string') 301848b8605Smrg value = self.character_data() 302848b8605Smrg self.element_end('string') 303848b8605Smrg return Literal(value) 304848b8605Smrg 305848b8605Smrg def parse_bytes(self): 306848b8605Smrg self.element_start('bytes') 307848b8605Smrg value = self.character_data() 308848b8605Smrg self.element_end('bytes') 309848b8605Smrg return Blob(value) 310848b8605Smrg 311848b8605Smrg def parse_array(self): 312848b8605Smrg self.element_start('array') 313848b8605Smrg elems = [] 314848b8605Smrg while self.token.type != ELEMENT_END: 315848b8605Smrg elems.append(self.parse_elem()) 316848b8605Smrg self.element_end('array') 317848b8605Smrg return Array(elems) 318848b8605Smrg 319848b8605Smrg def parse_elem(self): 320848b8605Smrg self.element_start('elem') 321848b8605Smrg value = self.parse_value() 322848b8605Smrg self.element_end('elem') 323848b8605Smrg return value 324848b8605Smrg 325848b8605Smrg def parse_struct(self): 326848b8605Smrg attrs = self.element_start('struct') 327848b8605Smrg name = attrs['name'] 328848b8605Smrg members = [] 329848b8605Smrg while self.token.type != ELEMENT_END: 330848b8605Smrg members.append(self.parse_member()) 331848b8605Smrg self.element_end('struct') 332848b8605Smrg return Struct(name, members) 333848b8605Smrg 334848b8605Smrg def parse_member(self): 335848b8605Smrg attrs = self.element_start('member') 336848b8605Smrg name = attrs['name'] 337848b8605Smrg value = self.parse_value() 338848b8605Smrg self.element_end('member') 339848b8605Smrg 340848b8605Smrg return name, value 341848b8605Smrg 342848b8605Smrg def parse_ptr(self): 343848b8605Smrg self.element_start('ptr') 344848b8605Smrg address = self.character_data() 345848b8605Smrg self.element_end('ptr') 346848b8605Smrg 347848b8605Smrg return Pointer(address) 348848b8605Smrg 349848b8605Smrg def handle_call(self, call): 350848b8605Smrg pass 351848b8605Smrg 352848b8605Smrg 353848b8605Smrgclass TraceDumper(TraceParser): 354848b8605Smrg 355848b8605Smrg def __init__(self, fp, outStream = sys.stdout): 356848b8605Smrg TraceParser.__init__(self, fp) 357848b8605Smrg self.formatter = format.DefaultFormatter(outStream) 358848b8605Smrg self.pretty_printer = PrettyPrinter(self.formatter) 359848b8605Smrg 360848b8605Smrg def handle_call(self, call): 361848b8605Smrg call.visit(self.pretty_printer) 362848b8605Smrg self.formatter.newline() 363848b8605Smrg 364848b8605Smrg 365848b8605Smrgclass Main: 366848b8605Smrg '''Common main class for all retrace command line utilities.''' 367848b8605Smrg 368848b8605Smrg def __init__(self): 369848b8605Smrg pass 370848b8605Smrg 371848b8605Smrg def main(self): 372848b8605Smrg optparser = self.get_optparser() 373848b8605Smrg (options, args) = optparser.parse_args(sys.argv[1:]) 374848b8605Smrg 375848b8605Smrg if not args: 376848b8605Smrg optparser.error('insufficient number of arguments') 377848b8605Smrg 378848b8605Smrg for arg in args: 379848b8605Smrg if arg.endswith('.gz'): 380848b8605Smrg from gzip import GzipFile 381848b8605Smrg stream = GzipFile(arg, 'rt') 382848b8605Smrg elif arg.endswith('.bz2'): 383848b8605Smrg from bz2 import BZ2File 384848b8605Smrg stream = BZ2File(arg, 'rU') 385848b8605Smrg else: 386848b8605Smrg stream = open(arg, 'rt') 387848b8605Smrg self.process_arg(stream, options) 388848b8605Smrg 389848b8605Smrg def get_optparser(self): 390848b8605Smrg optparser = optparse.OptionParser( 391848b8605Smrg usage="\n\t%prog [options] TRACE [...]") 392848b8605Smrg return optparser 393848b8605Smrg 394848b8605Smrg def process_arg(self, stream, options): 395848b8605Smrg parser = TraceDumper(stream) 396848b8605Smrg parser.parse() 397848b8605Smrg 398848b8605Smrg 399848b8605Smrgif __name__ == '__main__': 400848b8605Smrg Main().main() 401