17ec681f3Smrg
27ec681f3Smrg#!/usr/bin/env python3
37ec681f3Smrg#
47ec681f3Smrg# Copyright © 2020 Google LLC
57ec681f3Smrg#
67ec681f3Smrg# Permission is hereby granted, free of charge, to any person obtaining a
77ec681f3Smrg# copy of this software and associated documentation files (the "Software"),
87ec681f3Smrg# to deal in the Software without restriction, including without limitation
97ec681f3Smrg# the rights to use, copy, modify, merge, publish, distribute, sublicense,
107ec681f3Smrg# and/or sell copies of the Software, and to permit persons to whom the
117ec681f3Smrg# Software is furnished to do so, subject to the following conditions:
127ec681f3Smrg#
137ec681f3Smrg# The above copyright notice and this permission notice (including the next
147ec681f3Smrg# paragraph) shall be included in all copies or substantial portions of the
157ec681f3Smrg# Software.
167ec681f3Smrg#
177ec681f3Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
187ec681f3Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
197ec681f3Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
207ec681f3Smrg# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
217ec681f3Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
227ec681f3Smrg# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
237ec681f3Smrg# IN THE SOFTWARE.
247ec681f3Smrg
257ec681f3Smrgimport argparse
267ec681f3Smrgimport queue
277ec681f3Smrgimport re
287ec681f3Smrgfrom serial_buffer import SerialBuffer
297ec681f3Smrgimport sys
307ec681f3Smrgimport threading
317ec681f3Smrg
327ec681f3Smrg
337ec681f3Smrgclass CrosServoRun:
347ec681f3Smrg    def __init__(self, cpu, ec):
357ec681f3Smrg        # Merged FIFO for the two serial buffers, fed by threads.
367ec681f3Smrg        self.serial_queue = queue.Queue()
377ec681f3Smrg        self.sentinel = object()
387ec681f3Smrg        self.threads_done = 0
397ec681f3Smrg
407ec681f3Smrg        self.ec_ser = SerialBuffer(
417ec681f3Smrg            ec, "results/serial-ec.txt", "R SERIAL-EC> ")
427ec681f3Smrg        self.cpu_ser = SerialBuffer(
437ec681f3Smrg            cpu, "results/serial.txt", "R SERIAL-CPU> ")
447ec681f3Smrg
457ec681f3Smrg        self.iter_feed_ec = threading.Thread(
467ec681f3Smrg            target=self.iter_feed_queue, daemon=True, args=(self.ec_ser.lines(),))
477ec681f3Smrg        self.iter_feed_ec.start()
487ec681f3Smrg
497ec681f3Smrg        self.iter_feed_cpu = threading.Thread(
507ec681f3Smrg            target=self.iter_feed_queue, daemon=True, args=(self.cpu_ser.lines(),))
517ec681f3Smrg        self.iter_feed_cpu.start()
527ec681f3Smrg
537ec681f3Smrg    # Feed lines from our serial queues into the merged queue, marking when our
547ec681f3Smrg    # input is done.
557ec681f3Smrg    def iter_feed_queue(self, it):
567ec681f3Smrg        for i in it:
577ec681f3Smrg            self.serial_queue.put(i)
587ec681f3Smrg        self.serial_queue.put(sentinel)
597ec681f3Smrg
607ec681f3Smrg    # Return the next line from the queue, counting how many threads have
617ec681f3Smrg    # terminated and joining when done
627ec681f3Smrg    def get_serial_queue_line(self):
637ec681f3Smrg        line = self.serial_queue.get()
647ec681f3Smrg        if line == self.sentinel:
657ec681f3Smrg            self.threads_done = self.threads_done + 1
667ec681f3Smrg            if self.threads_done == 2:
677ec681f3Smrg                self.iter_feed_cpu.join()
687ec681f3Smrg                self.iter_feed_ec.join()
697ec681f3Smrg        return line
707ec681f3Smrg
717ec681f3Smrg    # Returns an iterator for getting the next line.
727ec681f3Smrg    def serial_queue_lines(self):
737ec681f3Smrg        return iter(self.get_serial_queue_line, self.sentinel)
747ec681f3Smrg
757ec681f3Smrg    def ec_write(self, s):
767ec681f3Smrg        print("W SERIAL-EC> %s" % s)
777ec681f3Smrg        self.ec_ser.serial.write(s.encode())
787ec681f3Smrg
797ec681f3Smrg    def cpu_write(self, s):
807ec681f3Smrg        print("W SERIAL-CPU> %s" % s)
817ec681f3Smrg        self.cpu_ser.serial.write(s.encode())
827ec681f3Smrg
837ec681f3Smrg    def print_error(self, message):
847ec681f3Smrg        RED = '\033[0;31m'
857ec681f3Smrg        NO_COLOR = '\033[0m'
867ec681f3Smrg        print(RED + message + NO_COLOR)
877ec681f3Smrg
887ec681f3Smrg    def run(self):
897ec681f3Smrg        # Flush any partial commands in the EC's prompt, then ask for a reboot.
907ec681f3Smrg        self.ec_write("\n")
917ec681f3Smrg        self.ec_write("reboot\n")
927ec681f3Smrg
937ec681f3Smrg        # This is emitted right when the bootloader pauses to check for input.
947ec681f3Smrg        # Emit a ^N character to request network boot, because we don't have a
957ec681f3Smrg        # direct-to-netboot firmware on cheza.
967ec681f3Smrg        for line in self.serial_queue_lines():
977ec681f3Smrg            if re.search("load_archive: loading locale_en.bin", line):
987ec681f3Smrg                self.cpu_write("\016")
997ec681f3Smrg                break
1007ec681f3Smrg
1017ec681f3Smrg            # The Cheza boards have issues with failing to bring up power to
1027ec681f3Smrg            # the system sometimes, possibly dependent on ambient temperature
1037ec681f3Smrg            # in the farm.
1047ec681f3Smrg            if re.search("POWER_GOOD not seen in time", line):
1057ec681f3Smrg                self.print_error("Detected intermittent poweron failure, restarting run...")
1067ec681f3Smrg                return 2
1077ec681f3Smrg
1087ec681f3Smrg        tftp_failures = 0
1097ec681f3Smrg        for line in self.serial_queue_lines():
1107ec681f3Smrg            if re.search("---. end Kernel panic", line):
1117ec681f3Smrg                return 1
1127ec681f3Smrg
1137ec681f3Smrg            # The Cheza firmware seems to occasionally get stuck looping in
1147ec681f3Smrg            # this error state during TFTP booting, possibly based on amount of
1157ec681f3Smrg            # network traffic around it, but it'll usually recover after a
1167ec681f3Smrg            # reboot.
1177ec681f3Smrg            if re.search("R8152: Bulk read error 0xffffffbf", line):
1187ec681f3Smrg                tftp_failures += 1
1197ec681f3Smrg                if tftp_failures >= 100:
1207ec681f3Smrg                    self.print_error("Detected intermittent tftp failure, restarting run...")
1217ec681f3Smrg                    return 2
1227ec681f3Smrg
1237ec681f3Smrg            # There are very infrequent bus errors during power management transitions
1247ec681f3Smrg            # on cheza, which we don't expect to be the case on future boards.
1257ec681f3Smrg            if re.search("Kernel panic - not syncing: Asynchronous SError Interrupt", line):
1267ec681f3Smrg                self.print_error("Detected cheza power management bus error, restarting run...")
1277ec681f3Smrg                return 2
1287ec681f3Smrg
1297ec681f3Smrg            # If the network device dies, it's probably not graphics's fault, just try again.
1307ec681f3Smrg            if re.search("NETDEV WATCHDOG", line):
1317ec681f3Smrg                self.print_error(
1327ec681f3Smrg                    "Detected network device failure, restarting run...")
1337ec681f3Smrg                return 2
1347ec681f3Smrg
1357ec681f3Smrg            # These HFI response errors started appearing with the introduction
1367ec681f3Smrg            # of piglit runs.  CosmicPenguin says:
1377ec681f3Smrg            #
1387ec681f3Smrg            # "message ID 106 isn't a thing, so likely what happened is that we
1397ec681f3Smrg            # got confused when parsing the HFI queue.  If it happened on only
1407ec681f3Smrg            # one run, then memory corruption could be a possible clue"
1417ec681f3Smrg            #
1427ec681f3Smrg            # Given that it seems to trigger randomly near a GPU fault and then
1437ec681f3Smrg            # break many tests after that, just restart the whole run.
1447ec681f3Smrg            if re.search("a6xx_hfi_send_msg.*Unexpected message id .* on the response queue", line):
1457ec681f3Smrg                self.print_error("Detected cheza power management bus error, restarting run...")
1467ec681f3Smrg                return 2
1477ec681f3Smrg
1487ec681f3Smrg            if re.search("coreboot.*bootblock starting", line):
1497ec681f3Smrg                self.print_error(
1507ec681f3Smrg                    "Detected spontaneous reboot, restarting run...")
1517ec681f3Smrg                return 2
1527ec681f3Smrg
1537ec681f3Smrg            result = re.search("hwci: mesa: (\S*)", line)
1547ec681f3Smrg            if result:
1557ec681f3Smrg                if result.group(1) == "pass":
1567ec681f3Smrg                    return 0
1577ec681f3Smrg                else:
1587ec681f3Smrg                    return 1
1597ec681f3Smrg
1607ec681f3Smrg        self.print_error("Reached the end of the CPU serial log without finding a result")
1617ec681f3Smrg        return 1
1627ec681f3Smrg
1637ec681f3Smrg
1647ec681f3Smrgdef main():
1657ec681f3Smrg    parser = argparse.ArgumentParser()
1667ec681f3Smrg    parser.add_argument('--cpu', type=str,
1677ec681f3Smrg                        help='CPU Serial device', required=True)
1687ec681f3Smrg    parser.add_argument(
1697ec681f3Smrg        '--ec', type=str, help='EC Serial device', required=True)
1707ec681f3Smrg    args = parser.parse_args()
1717ec681f3Smrg
1727ec681f3Smrg    servo = CrosServoRun(args.cpu, args.ec)
1737ec681f3Smrg
1747ec681f3Smrg    while True:
1757ec681f3Smrg        retval = servo.run()
1767ec681f3Smrg        if retval != 2:
1777ec681f3Smrg            break
1787ec681f3Smrg
1797ec681f3Smrg    # power down the CPU on the device
1807ec681f3Smrg    servo.ec_write("power off\n")
1817ec681f3Smrg
1827ec681f3Smrg    sys.exit(retval)
1837ec681f3Smrg
1847ec681f3Smrg
1857ec681f3Smrgif __name__ == '__main__':
1867ec681f3Smrg    main()
187