diff options
Diffstat (limited to 'scripts')
183 files changed, 35258 insertions, 0 deletions
diff --git a/scripts/analyse-9p-simpletrace.py b/scripts/analyse-9p-simpletrace.py new file mode 100755 index 000000000..7dfcb6ba2 --- /dev/null +++ b/scripts/analyse-9p-simpletrace.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +# Pretty print 9p simpletrace log +# Usage: ./analyse-9p-simpletrace <trace-events> <trace-pid> +# +# Author: Harsh Prateek Bora +import os +import simpletrace + +symbol_9p = { + 6 : 'TLERROR', + 7 : 'RLERROR', + 8 : 'TSTATFS', + 9 : 'RSTATFS', + 12 : 'TLOPEN', + 13 : 'RLOPEN', + 14 : 'TLCREATE', + 15 : 'RLCREATE', + 16 : 'TSYMLINK', + 17 : 'RSYMLINK', + 18 : 'TMKNOD', + 19 : 'RMKNOD', + 20 : 'TRENAME', + 21 : 'RRENAME', + 22 : 'TREADLINK', + 23 : 'RREADLINK', + 24 : 'TGETATTR', + 25 : 'RGETATTR', + 26 : 'TSETATTR', + 27 : 'RSETATTR', + 30 : 'TXATTRWALK', + 31 : 'RXATTRWALK', + 32 : 'TXATTRCREATE', + 33 : 'RXATTRCREATE', + 40 : 'TREADDIR', + 41 : 'RREADDIR', + 50 : 'TFSYNC', + 51 : 'RFSYNC', + 52 : 'TLOCK', + 53 : 'RLOCK', + 54 : 'TGETLOCK', + 55 : 'RGETLOCK', + 70 : 'TLINK', + 71 : 'RLINK', + 72 : 'TMKDIR', + 73 : 'RMKDIR', + 74 : 'TRENAMEAT', + 75 : 'RRENAMEAT', + 76 : 'TUNLINKAT', + 77 : 'RUNLINKAT', + 100 : 'TVERSION', + 101 : 'RVERSION', + 102 : 'TAUTH', + 103 : 'RAUTH', + 104 : 'TATTACH', + 105 : 'RATTACH', + 106 : 'TERROR', + 107 : 'RERROR', + 108 : 'TFLUSH', + 109 : 'RFLUSH', + 110 : 'TWALK', + 111 : 'RWALK', + 112 : 'TOPEN', + 113 : 'ROPEN', + 114 : 'TCREATE', + 115 : 'RCREATE', + 116 : 'TREAD', + 117 : 'RREAD', + 118 : 'TWRITE', + 119 : 'RWRITE', + 120 : 'TCLUNK', + 121 : 'RCLUNK', + 122 : 'TREMOVE', + 123 : 'RREMOVE', + 124 : 'TSTAT', + 125 : 'RSTAT', + 126 : 'TWSTAT', + 127 : 'RWSTAT' +} + +class VirtFSRequestTracker(simpletrace.Analyzer): + def begin(self): + print("Pretty printing 9p simpletrace log ...") + + def v9fs_rerror(self, tag, id, err): + print("RERROR (tag =", tag, ", id =", symbol_9p[id], ", err = \"", os.strerror(err), "\")") + + def v9fs_version(self, tag, id, msize, version): + print("TVERSION (tag =", tag, ", msize =", msize, ", version =", version, ")") + + def v9fs_version_return(self, tag, id, msize, version): + print("RVERSION (tag =", tag, ", msize =", msize, ", version =", version, ")") + + def v9fs_attach(self, tag, id, fid, afid, uname, aname): + print("TATTACH (tag =", tag, ", fid =", fid, ", afid =", afid, ", uname =", uname, ", aname =", aname, ")") + + def v9fs_attach_return(self, tag, id, type, version, path): + print("RATTACH (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "})") + + def v9fs_stat(self, tag, id, fid): + print("TSTAT (tag =", tag, ", fid =", fid, ")") + + def v9fs_stat_return(self, tag, id, mode, atime, mtime, length): + print("RSTAT (tag =", tag, ", mode =", mode, ", atime =", atime, ", mtime =", mtime, ", length =", length, ")") + + def v9fs_getattr(self, tag, id, fid, request_mask): + print("TGETATTR (tag =", tag, ", fid =", fid, ", request_mask =", hex(request_mask), ")") + + def v9fs_getattr_return(self, tag, id, result_mask, mode, uid, gid): + print("RGETATTR (tag =", tag, ", result_mask =", hex(result_mask), ", mode =", oct(mode), ", uid =", uid, ", gid =", gid, ")") + + def v9fs_walk(self, tag, id, fid, newfid, nwnames): + print("TWALK (tag =", tag, ", fid =", fid, ", newfid =", newfid, ", nwnames =", nwnames, ")") + + def v9fs_walk_return(self, tag, id, nwnames, qids): + print("RWALK (tag =", tag, ", nwnames =", nwnames, ", qids =", hex(qids), ")") + + def v9fs_open(self, tag, id, fid, mode): + print("TOPEN (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ")") + + def v9fs_open_return(self, tag, id, type, version, path, iounit): + print("ROPEN (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")") + + def v9fs_lcreate(self, tag, id, dfid, flags, mode, gid): + print("TLCREATE (tag =", tag, ", dfid =", dfid, ", flags =", oct(flags), ", mode =", oct(mode), ", gid =", gid, ")") + + def v9fs_lcreate_return(self, tag, id, type, version, path, iounit): + print("RLCREATE (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")") + + def v9fs_fsync(self, tag, id, fid, datasync): + print("TFSYNC (tag =", tag, ", fid =", fid, ", datasync =", datasync, ")") + + def v9fs_clunk(self, tag, id, fid): + print("TCLUNK (tag =", tag, ", fid =", fid, ")") + + def v9fs_read(self, tag, id, fid, off, max_count): + print("TREAD (tag =", tag, ", fid =", fid, ", off =", off, ", max_count =", max_count, ")") + + def v9fs_read_return(self, tag, id, count, err): + print("RREAD (tag =", tag, ", count =", count, ", err =", err, ")") + + def v9fs_readdir(self, tag, id, fid, offset, max_count): + print("TREADDIR (tag =", tag, ", fid =", fid, ", offset =", offset, ", max_count =", max_count, ")") + + def v9fs_readdir_return(self, tag, id, count, retval): + print("RREADDIR (tag =", tag, ", count =", count, ", retval =", retval, ")") + + def v9fs_write(self, tag, id, fid, off, count, cnt): + print("TWRITE (tag =", tag, ", fid =", fid, ", off =", off, ", count =", count, ", cnt =", cnt, ")") + + def v9fs_write_return(self, tag, id, total, err): + print("RWRITE (tag =", tag, ", total =", total, ", err =", err, ")") + + def v9fs_create(self, tag, id, fid, name, perm, mode): + print("TCREATE (tag =", tag, ", fid =", fid, ", perm =", oct(perm), ", name =", name, ", mode =", oct(mode), ")") + + def v9fs_create_return(self, tag, id, type, version, path, iounit): + print("RCREATE (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "}, iounit =", iounit, ")") + + def v9fs_symlink(self, tag, id, fid, name, symname, gid): + print("TSYMLINK (tag =", tag, ", fid =", fid, ", name =", name, ", symname =", symname, ", gid =", gid, ")") + + def v9fs_symlink_return(self, tag, id, type, version, path): + print("RSYMLINK (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "})") + + def v9fs_flush(self, tag, id, flush_tag): + print("TFLUSH (tag =", tag, ", flush_tag =", flush_tag, ")") + + def v9fs_link(self, tag, id, dfid, oldfid, name): + print("TLINK (tag =", tag, ", dfid =", dfid, ", oldfid =", oldfid, ", name =", name, ")") + + def v9fs_remove(self, tag, id, fid): + print("TREMOVE (tag =", tag, ", fid =", fid, ")") + + def v9fs_wstat(self, tag, id, fid, mode, atime, mtime): + print("TWSTAT (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ", atime =", atime, "mtime =", mtime, ")") + + def v9fs_mknod(self, tag, id, fid, mode, major, minor): + print("TMKNOD (tag =", tag, ", fid =", fid, ", mode =", oct(mode), ", major =", major, ", minor =", minor, ")") + + def v9fs_lock(self, tag, id, fid, type, start, length): + print("TLOCK (tag =", tag, ", fid =", fid, "type =", type, ", start =", start, ", length =", length, ")") + + def v9fs_lock_return(self, tag, id, status): + print("RLOCK (tag =", tag, ", status =", status, ")") + + def v9fs_getlock(self, tag, id, fid, type, start, length): + print("TGETLOCK (tag =", tag, ", fid =", fid, "type =", type, ", start =", start, ", length =", length, ")") + + def v9fs_getlock_return(self, tag, id, type, start, length, proc_id): + print("RGETLOCK (tag =", tag, "type =", type, ", start =", start, ", length =", length, ", proc_id =", proc_id, ")") + + def v9fs_mkdir(self, tag, id, fid, name, mode, gid): + print("TMKDIR (tag =", tag, ", fid =", fid, ", name =", name, ", mode =", mode, ", gid =", gid, ")") + + def v9fs_mkdir_return(self, tag, id, type, version, path, err): + print("RMKDIR (tag =", tag, ", qid={type =", type, ", version =", version, ", path =", path, "}, err =", err, ")") + + def v9fs_xattrwalk(self, tag, id, fid, newfid, name): + print("TXATTRWALK (tag =", tag, ", fid =", fid, ", newfid =", newfid, ", xattr name =", name, ")") + + def v9fs_xattrwalk_return(self, tag, id, size): + print("RXATTRWALK (tag =", tag, ", xattrsize =", size, ")") + + def v9fs_xattrcreate(self, tag, id, fid, name, size, flags): + print("TXATTRCREATE (tag =", tag, ", fid =", fid, ", name =", name, ", xattrsize =", size, ", flags =", flags, ")") + + def v9fs_readlink(self, tag, id, fid): + print("TREADLINK (tag =", tag, ", fid =", fid, ")") + + def v9fs_readlink_return(self, tag, id, target): + print("RREADLINK (tag =", tag, ", target =", target, ")") + +simpletrace.run(VirtFSRequestTracker()) diff --git a/scripts/analyse-locks-simpletrace.py b/scripts/analyse-locks-simpletrace.py new file mode 100755 index 000000000..63c11f4fc --- /dev/null +++ b/scripts/analyse-locks-simpletrace.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Analyse lock events and compute statistics +# +# Author: Alex Bennée <alex.bennee@linaro.org> +# + +import simpletrace +import argparse +import numpy as np + +class MutexAnalyser(simpletrace.Analyzer): + "A simpletrace Analyser for checking locks." + + def __init__(self): + self.locks = 0 + self.locked = 0 + self.unlocks = 0 + self.mutex_records = {} + + def _get_mutex(self, mutex): + if not mutex in self.mutex_records: + self.mutex_records[mutex] = {"locks": 0, + "lock_time": 0, + "acquire_times": [], + "locked": 0, + "locked_time": 0, + "held_times": [], + "unlocked": 0} + + return self.mutex_records[mutex] + + def qemu_mutex_lock(self, timestamp, mutex, filename, line): + self.locks += 1 + rec = self._get_mutex(mutex) + rec["locks"] += 1 + rec["lock_time"] = timestamp[0] + rec["lock_loc"] = (filename, line) + + def qemu_mutex_locked(self, timestamp, mutex, filename, line): + self.locked += 1 + rec = self._get_mutex(mutex) + rec["locked"] += 1 + rec["locked_time"] = timestamp[0] + acquire_time = rec["locked_time"] - rec["lock_time"] + rec["locked_loc"] = (filename, line) + rec["acquire_times"].append(acquire_time) + + def qemu_mutex_unlock(self, timestamp, mutex, filename, line): + self.unlocks += 1 + rec = self._get_mutex(mutex) + rec["unlocked"] += 1 + held_time = timestamp[0] - rec["locked_time"] + rec["held_times"].append(held_time) + rec["unlock_loc"] = (filename, line) + + +def get_args(): + "Grab options" + parser = argparse.ArgumentParser() + parser.add_argument("--output", "-o", type=str, help="Render plot to file") + parser.add_argument("events", type=str, help='trace file read from') + parser.add_argument("tracefile", type=str, help='trace file read from') + return parser.parse_args() + +if __name__ == '__main__': + args = get_args() + + # Gather data from the trace + analyser = MutexAnalyser() + simpletrace.process(args.events, args.tracefile, analyser) + + print ("Total locks: %d, locked: %d, unlocked: %d" % + (analyser.locks, analyser.locked, analyser.unlocks)) + + # Now dump the individual lock stats + for key, val in sorted(analyser.mutex_records.iteritems(), + key=lambda k_v: k_v[1]["locks"]): + print ("Lock: %#x locks: %d, locked: %d, unlocked: %d" % + (key, val["locks"], val["locked"], val["unlocked"])) + + acquire_times = np.array(val["acquire_times"]) + if len(acquire_times) > 0: + print (" Acquire Time: min:%d median:%d avg:%.2f max:%d" % + (acquire_times.min(), np.median(acquire_times), + acquire_times.mean(), acquire_times.max())) + + held_times = np.array(val["held_times"]) + if len(held_times) > 0: + print (" Held Time: min:%d median:%d avg:%.2f max:%d" % + (held_times.min(), np.median(held_times), + held_times.mean(), held_times.max())) + + # Check if any locks still held + if val["locks"] > val["locked"]: + print (" LOCK HELD (%s:%s)" % (val["locked_loc"])) + print (" BLOCKED (%s:%s)" % (val["lock_loc"])) diff --git a/scripts/analyze-inclusions b/scripts/analyze-inclusions new file mode 100644 index 000000000..14806e18c --- /dev/null +++ b/scripts/analyze-inclusions @@ -0,0 +1,102 @@ +#! /bin/sh +# +# Copyright (C) 2016 Red Hat, Inc. +# +# Author: Paolo Bonzini <pbonzini@redhat.com> +# +# Print statistics about header file inclusions. +# +# The script has two modes of execution: +# +# 1) if invoked with a path on the command line (possibly +# preceded by a "--" argument), it will run the analysis on +# an existing build directory +# +# 2) otherwise, it will configure and builds QEMU itself in a +# "+build" subdirectory which is left around when the script +# exits. In this case the command line is passed directly to +# "make" (typically used for a "-j" argument suitable for your +# system). +# +# Inspired by a post by Markus Armbruster. + +case "x$1" in +x--) + shift + cd "$1" || exit $? + ;; +x-* | x) + mkdir -p +build + cd +build + test -f Makefile && make distclean + ../configure + make "$@" + ;; +*) + cd "$1" || exit $? +esac + +QEMU_CFLAGS=$(sed -n s/^QEMU_CFLAGS=//p config-host.mak) +QEMU_INCLUDES=$(sed -n s/^QEMU_INCLUDES=//p config-host.mak | \ + sed 's/$(SRC_PATH)/../g' ) +CFLAGS=$(sed -n s/^CFLAGS=//p config-host.mak) + +grep_include() { + find . -name "*.d" -exec grep -l "$@" {} + | wc -l +} + +echo Found $(find . -name "*.d" | wc -l) object files +echo $(grep_include -F 'include/qemu-common.h') files include qemu-common.h +echo $(grep_include -F 'hw/hw.h') files include hw/hw.h +echo $(grep_include 'target/[a-z0-9]*/cpu\.h') files include cpu.h +echo $(grep_include -F 'qapi-types.h') files include qapi-types.h +echo $(grep_include -F 'trace/generated-tracers.h') files include generated-tracers.h +echo $(grep_include -F 'qapi/error.h') files include qapi/error.h +echo $(grep_include -F 'qom/object.h') files include qom/object.h +echo $(grep_include -F 'block/aio.h') files include block/aio.h +echo $(grep_include -F 'exec/memory.h') files include exec/memory.h +echo $(grep_include -F 'fpu/softfloat.h') files include fpu/softfloat.h +echo $(grep_include -F 'qemu/bswap.h') files include qemu/bswap.h +echo + +awk1=' + /^# / { file = $3;next } + NR>1 { bytes[file]+=length()+1; lines[file]++ } + END { for(i in lines) print i,lines[i],bytes[i] }' + +awk2=' + {tot_l+=$2;tot_b+=$3;tot_f++} + /\/usr.*\/glib/ {glib_l+=$2;glib_b+=$3;glib_f++;next} + /\/usr/ {sys_l+=$2;sys_b+=$3;sys_f++;next} + {qemu_l+=$2;qemu_b+=$3;qemu_f++;next} + END { + printf "%s\t %s\t %s\t %s\n", "lines", "bytes", "files", "source" + printf "%s\t %s\t %s\t %s\n", qemu_l, qemu_b, qemu_f, "QEMU" + printf "%s\t %s\t %s\t %s\n", sys_l, sys_b, sys_f, "system" + printf "%s\t %s\t %s\t %s\n", glib_l, glib_b, glib_f, "glib" + printf "%s\t %s\t %s\t %s\n", tot_l, tot_b, tot_f, "total" + }' + +analyze() { + cc $QEMU_CFLAGS $QEMU_INCLUDES $CFLAGS -E -o - "$@" | \ + awk "$awk1" | awk "$awk2" + echo +} + +echo osdep.h: +analyze ../include/qemu/osdep.h + +echo qemu-common.h: +analyze -include ../include/qemu/osdep.h ../include/qemu-common.h + +echo hw/hw.h: +analyze -include ../include/qemu/osdep.h ../include/hw/hw.h + +echo trace/generated-tracers.h: +analyze -include ../include/qemu/osdep.h trace/generated-tracers.h + +echo target/i386/cpu.h: +analyze -DNEED_CPU_H -I../target/i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../target/i386/cpu.h + +echo hw/hw.h + NEED_CPU_H: +analyze -DNEED_CPU_H -I../target/i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../include/hw/hw.h diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py new file mode 100755 index 000000000..b82a1b0c5 --- /dev/null +++ b/scripts/analyze-migration.py @@ -0,0 +1,613 @@ +#!/usr/bin/env python3 +# +# Migration Stream Analyzer +# +# Copyright (c) 2015 Alexander Graf <agraf@suse.de> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +import json +import os +import argparse +import collections +import struct +import sys + + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError: + pass + + +class MigrationFile(object): + def __init__(self, filename): + self.filename = filename + self.file = open(self.filename, "rb") + + def read64(self): + return int.from_bytes(self.file.read(8), byteorder='big', signed=True) + + def read32(self): + return int.from_bytes(self.file.read(4), byteorder='big', signed=True) + + def read16(self): + return int.from_bytes(self.file.read(2), byteorder='big', signed=True) + + def read8(self): + return int.from_bytes(self.file.read(1), byteorder='big', signed=True) + + def readstr(self, len = None): + return self.readvar(len).decode('utf-8') + + def readvar(self, size = None): + if size is None: + size = self.read8() + if size == 0: + return "" + value = self.file.read(size) + if len(value) != size: + raise Exception("Unexpected end of %s at 0x%x" % (self.filename, self.file.tell())) + return value + + def tell(self): + return self.file.tell() + + # The VMSD description is at the end of the file, after EOF. Look for + # the last NULL byte, then for the beginning brace of JSON. + def read_migration_debug_json(self): + QEMU_VM_VMDESCRIPTION = 0x06 + + # Remember the offset in the file when we started + entrypos = self.file.tell() + + # Read the last 10MB + self.file.seek(0, os.SEEK_END) + endpos = self.file.tell() + self.file.seek(max(-endpos, -10 * 1024 * 1024), os.SEEK_END) + datapos = self.file.tell() + data = self.file.read() + # The full file read closed the file as well, reopen it + self.file = open(self.filename, "rb") + + # Find the last NULL byte, then the first brace after that. This should + # be the beginning of our JSON data. + nulpos = data.rfind(b'\0') + jsonpos = data.find(b'{', nulpos) + + # Check backwards from there and see whether we guessed right + self.file.seek(datapos + jsonpos - 5, 0) + if self.read8() != QEMU_VM_VMDESCRIPTION: + raise Exception("No Debug Migration device found") + + jsonlen = self.read32() + + # Seek back to where we were at the beginning + self.file.seek(entrypos, 0) + + # explicit decode() needed for Python 3.5 compatibility + return data[jsonpos:jsonpos + jsonlen].decode("utf-8") + + def close(self): + self.file.close() + +class RamSection(object): + RAM_SAVE_FLAG_COMPRESS = 0x02 + RAM_SAVE_FLAG_MEM_SIZE = 0x04 + RAM_SAVE_FLAG_PAGE = 0x08 + RAM_SAVE_FLAG_EOS = 0x10 + RAM_SAVE_FLAG_CONTINUE = 0x20 + RAM_SAVE_FLAG_XBZRLE = 0x40 + RAM_SAVE_FLAG_HOOK = 0x80 + + def __init__(self, file, version_id, ramargs, section_key): + if version_id != 4: + raise Exception("Unknown RAM version %d" % version_id) + + self.file = file + self.section_key = section_key + self.TARGET_PAGE_SIZE = ramargs['page_size'] + self.dump_memory = ramargs['dump_memory'] + self.write_memory = ramargs['write_memory'] + self.sizeinfo = collections.OrderedDict() + self.data = collections.OrderedDict() + self.data['section sizes'] = self.sizeinfo + self.name = '' + if self.write_memory: + self.files = { } + if self.dump_memory: + self.memory = collections.OrderedDict() + self.data['memory'] = self.memory + + def __repr__(self): + return self.data.__repr__() + + def __str__(self): + return self.data.__str__() + + def getDict(self): + return self.data + + def read(self): + # Read all RAM sections + while True: + addr = self.file.read64() + flags = addr & (self.TARGET_PAGE_SIZE - 1) + addr &= ~(self.TARGET_PAGE_SIZE - 1) + + if flags & self.RAM_SAVE_FLAG_MEM_SIZE: + while True: + namelen = self.file.read8() + # We assume that no RAM chunk is big enough to ever + # hit the first byte of the address, so when we see + # a zero here we know it has to be an address, not the + # length of the next block. + if namelen == 0: + self.file.file.seek(-1, 1) + break + self.name = self.file.readstr(len = namelen) + len = self.file.read64() + self.sizeinfo[self.name] = '0x%016x' % len + if self.write_memory: + print(self.name) + mkdir_p('./' + os.path.dirname(self.name)) + f = open('./' + self.name, "wb") + f.truncate(0) + f.truncate(len) + self.files[self.name] = f + flags &= ~self.RAM_SAVE_FLAG_MEM_SIZE + + if flags & self.RAM_SAVE_FLAG_COMPRESS: + if flags & self.RAM_SAVE_FLAG_CONTINUE: + flags &= ~self.RAM_SAVE_FLAG_CONTINUE + else: + self.name = self.file.readstr() + fill_char = self.file.read8() + # The page in question is filled with fill_char now + if self.write_memory and fill_char != 0: + self.files[self.name].seek(addr, os.SEEK_SET) + self.files[self.name].write(chr(fill_char) * self.TARGET_PAGE_SIZE) + if self.dump_memory: + self.memory['%s (0x%016x)' % (self.name, addr)] = 'Filled with 0x%02x' % fill_char + flags &= ~self.RAM_SAVE_FLAG_COMPRESS + elif flags & self.RAM_SAVE_FLAG_PAGE: + if flags & self.RAM_SAVE_FLAG_CONTINUE: + flags &= ~self.RAM_SAVE_FLAG_CONTINUE + else: + self.name = self.file.readstr() + + if self.write_memory or self.dump_memory: + data = self.file.readvar(size = self.TARGET_PAGE_SIZE) + else: # Just skip RAM data + self.file.file.seek(self.TARGET_PAGE_SIZE, 1) + + if self.write_memory: + self.files[self.name].seek(addr, os.SEEK_SET) + self.files[self.name].write(data) + if self.dump_memory: + hexdata = " ".join("{0:02x}".format(ord(c)) for c in data) + self.memory['%s (0x%016x)' % (self.name, addr)] = hexdata + + flags &= ~self.RAM_SAVE_FLAG_PAGE + elif flags & self.RAM_SAVE_FLAG_XBZRLE: + raise Exception("XBZRLE RAM compression is not supported yet") + elif flags & self.RAM_SAVE_FLAG_HOOK: + raise Exception("RAM hooks don't make sense with files") + + # End of RAM section + if flags & self.RAM_SAVE_FLAG_EOS: + break + + if flags != 0: + raise Exception("Unknown RAM flags: %x" % flags) + + def __del__(self): + if self.write_memory: + for key in self.files: + self.files[key].close() + + +class HTABSection(object): + HASH_PTE_SIZE_64 = 16 + + def __init__(self, file, version_id, device, section_key): + if version_id != 1: + raise Exception("Unknown HTAB version %d" % version_id) + + self.file = file + self.section_key = section_key + + def read(self): + + header = self.file.read32() + + if (header == -1): + # "no HPT" encoding + return + + if (header > 0): + # First section, just the hash shift + return + + # Read until end marker + while True: + index = self.file.read32() + n_valid = self.file.read16() + n_invalid = self.file.read16() + + if index == 0 and n_valid == 0 and n_invalid == 0: + break + + self.file.readvar(n_valid * self.HASH_PTE_SIZE_64) + + def getDict(self): + return "" + + +class ConfigurationSection(object): + def __init__(self, file): + self.file = file + + def read(self): + name_len = self.file.read32() + name = self.file.readstr(len = name_len) + +class VMSDFieldGeneric(object): + def __init__(self, desc, file): + self.file = file + self.desc = desc + self.data = "" + + def __repr__(self): + return str(self.__str__()) + + def __str__(self): + return " ".join("{0:02x}".format(c) for c in self.data) + + def getDict(self): + return self.__str__() + + def read(self): + size = int(self.desc['size']) + self.data = self.file.readvar(size) + return self.data + +class VMSDFieldInt(VMSDFieldGeneric): + def __init__(self, desc, file): + super(VMSDFieldInt, self).__init__(desc, file) + self.size = int(desc['size']) + self.format = '0x%%0%dx' % (self.size * 2) + self.sdtype = '>i%d' % self.size + self.udtype = '>u%d' % self.size + + def __repr__(self): + if self.data < 0: + return ('%s (%d)' % ((self.format % self.udata), self.data)) + else: + return self.format % self.data + + def __str__(self): + return self.__repr__() + + def getDict(self): + return self.__str__() + + def read(self): + super(VMSDFieldInt, self).read() + self.sdata = int.from_bytes(self.data, byteorder='big', signed=True) + self.udata = int.from_bytes(self.data, byteorder='big', signed=False) + self.data = self.sdata + return self.data + +class VMSDFieldUInt(VMSDFieldInt): + def __init__(self, desc, file): + super(VMSDFieldUInt, self).__init__(desc, file) + + def read(self): + super(VMSDFieldUInt, self).read() + self.data = self.udata + return self.data + +class VMSDFieldIntLE(VMSDFieldInt): + def __init__(self, desc, file): + super(VMSDFieldIntLE, self).__init__(desc, file) + self.dtype = '<i%d' % self.size + +class VMSDFieldBool(VMSDFieldGeneric): + def __init__(self, desc, file): + super(VMSDFieldBool, self).__init__(desc, file) + + def __repr__(self): + return self.data.__repr__() + + def __str__(self): + return self.data.__str__() + + def getDict(self): + return self.data + + def read(self): + super(VMSDFieldBool, self).read() + if self.data[0] == 0: + self.data = False + else: + self.data = True + return self.data + +class VMSDFieldStruct(VMSDFieldGeneric): + QEMU_VM_SUBSECTION = 0x05 + + def __init__(self, desc, file): + super(VMSDFieldStruct, self).__init__(desc, file) + self.data = collections.OrderedDict() + + # When we see compressed array elements, unfold them here + new_fields = [] + for field in self.desc['struct']['fields']: + if not 'array_len' in field: + new_fields.append(field) + continue + array_len = field.pop('array_len') + field['index'] = 0 + new_fields.append(field) + for i in range(1, array_len): + c = field.copy() + c['index'] = i + new_fields.append(c) + + self.desc['struct']['fields'] = new_fields + + def __repr__(self): + return self.data.__repr__() + + def __str__(self): + return self.data.__str__() + + def read(self): + for field in self.desc['struct']['fields']: + try: + reader = vmsd_field_readers[field['type']] + except: + reader = VMSDFieldGeneric + + field['data'] = reader(field, self.file) + field['data'].read() + + if 'index' in field: + if field['name'] not in self.data: + self.data[field['name']] = [] + a = self.data[field['name']] + if len(a) != int(field['index']): + raise Exception("internal index of data field unmatched (%d/%d)" % (len(a), int(field['index']))) + a.append(field['data']) + else: + self.data[field['name']] = field['data'] + + if 'subsections' in self.desc['struct']: + for subsection in self.desc['struct']['subsections']: + if self.file.read8() != self.QEMU_VM_SUBSECTION: + raise Exception("Subsection %s not found at offset %x" % ( subsection['vmsd_name'], self.file.tell())) + name = self.file.readstr() + version_id = self.file.read32() + self.data[name] = VMSDSection(self.file, version_id, subsection, (name, 0)) + self.data[name].read() + + def getDictItem(self, value): + # Strings would fall into the array category, treat + # them specially + if value.__class__ is ''.__class__: + return value + + try: + return self.getDictOrderedDict(value) + except: + try: + return self.getDictArray(value) + except: + try: + return value.getDict() + except: + return value + + def getDictArray(self, array): + r = [] + for value in array: + r.append(self.getDictItem(value)) + return r + + def getDictOrderedDict(self, dict): + r = collections.OrderedDict() + for (key, value) in dict.items(): + r[key] = self.getDictItem(value) + return r + + def getDict(self): + return self.getDictOrderedDict(self.data) + +vmsd_field_readers = { + "bool" : VMSDFieldBool, + "int8" : VMSDFieldInt, + "int16" : VMSDFieldInt, + "int32" : VMSDFieldInt, + "int32 equal" : VMSDFieldInt, + "int32 le" : VMSDFieldIntLE, + "int64" : VMSDFieldInt, + "uint8" : VMSDFieldUInt, + "uint16" : VMSDFieldUInt, + "uint32" : VMSDFieldUInt, + "uint32 equal" : VMSDFieldUInt, + "uint64" : VMSDFieldUInt, + "int64 equal" : VMSDFieldInt, + "uint8 equal" : VMSDFieldInt, + "uint16 equal" : VMSDFieldInt, + "float64" : VMSDFieldGeneric, + "timer" : VMSDFieldGeneric, + "buffer" : VMSDFieldGeneric, + "unused_buffer" : VMSDFieldGeneric, + "bitmap" : VMSDFieldGeneric, + "struct" : VMSDFieldStruct, + "unknown" : VMSDFieldGeneric, +} + +class VMSDSection(VMSDFieldStruct): + def __init__(self, file, version_id, device, section_key): + self.file = file + self.data = "" + self.vmsd_name = "" + self.section_key = section_key + desc = device + if 'vmsd_name' in device: + self.vmsd_name = device['vmsd_name'] + + # A section really is nothing but a FieldStruct :) + super(VMSDSection, self).__init__({ 'struct' : desc }, file) + +############################################################################### + +class MigrationDump(object): + QEMU_VM_FILE_MAGIC = 0x5145564d + QEMU_VM_FILE_VERSION = 0x00000003 + QEMU_VM_EOF = 0x00 + QEMU_VM_SECTION_START = 0x01 + QEMU_VM_SECTION_PART = 0x02 + QEMU_VM_SECTION_END = 0x03 + QEMU_VM_SECTION_FULL = 0x04 + QEMU_VM_SUBSECTION = 0x05 + QEMU_VM_VMDESCRIPTION = 0x06 + QEMU_VM_CONFIGURATION = 0x07 + QEMU_VM_SECTION_FOOTER= 0x7e + + def __init__(self, filename): + self.section_classes = { ( 'ram', 0 ) : [ RamSection, None ], + ( 'spapr/htab', 0) : ( HTABSection, None ) } + self.filename = filename + self.vmsd_desc = None + + def read(self, desc_only = False, dump_memory = False, write_memory = False): + # Read in the whole file + file = MigrationFile(self.filename) + + # File magic + data = file.read32() + if data != self.QEMU_VM_FILE_MAGIC: + raise Exception("Invalid file magic %x" % data) + + # Version (has to be v3) + data = file.read32() + if data != self.QEMU_VM_FILE_VERSION: + raise Exception("Invalid version number %d" % data) + + self.load_vmsd_json(file) + + # Read sections + self.sections = collections.OrderedDict() + + if desc_only: + return + + ramargs = {} + ramargs['page_size'] = self.vmsd_desc['page_size'] + ramargs['dump_memory'] = dump_memory + ramargs['write_memory'] = write_memory + self.section_classes[('ram',0)][1] = ramargs + + while True: + section_type = file.read8() + if section_type == self.QEMU_VM_EOF: + break + elif section_type == self.QEMU_VM_CONFIGURATION: + section = ConfigurationSection(file) + section.read() + elif section_type == self.QEMU_VM_SECTION_START or section_type == self.QEMU_VM_SECTION_FULL: + section_id = file.read32() + name = file.readstr() + instance_id = file.read32() + version_id = file.read32() + section_key = (name, instance_id) + classdesc = self.section_classes[section_key] + section = classdesc[0](file, version_id, classdesc[1], section_key) + self.sections[section_id] = section + section.read() + elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END: + section_id = file.read32() + self.sections[section_id].read() + elif section_type == self.QEMU_VM_SECTION_FOOTER: + read_section_id = file.read32() + if read_section_id != section_id: + raise Exception("Mismatched section footer: %x vs %x" % (read_section_id, section_id)) + else: + raise Exception("Unknown section type: %d" % section_type) + file.close() + + def load_vmsd_json(self, file): + vmsd_json = file.read_migration_debug_json() + self.vmsd_desc = json.loads(vmsd_json, object_pairs_hook=collections.OrderedDict) + for device in self.vmsd_desc['devices']: + key = (device['name'], device['instance_id']) + value = ( VMSDSection, device ) + self.section_classes[key] = value + + def getDict(self): + r = collections.OrderedDict() + for (key, value) in self.sections.items(): + key = "%s (%d)" % ( value.section_key[0], key ) + r[key] = value.getDict() + return r + +############################################################################### + +class JSONEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, VMSDFieldGeneric): + return str(o) + return json.JSONEncoder.default(self, o) + +parser = argparse.ArgumentParser() +parser.add_argument("-f", "--file", help='migration dump to read from', required=True) +parser.add_argument("-m", "--memory", help='dump RAM contents as well', action='store_true') +parser.add_argument("-d", "--dump", help='what to dump ("state" or "desc")', default='state') +parser.add_argument("-x", "--extract", help='extract contents into individual files', action='store_true') +args = parser.parse_args() + +jsonenc = JSONEncoder(indent=4, separators=(',', ': ')) + +if args.extract: + dump = MigrationDump(args.file) + + dump.read(desc_only = True) + print("desc.json") + f = open("desc.json", "w") + f.truncate() + f.write(jsonenc.encode(dump.vmsd_desc)) + f.close() + + dump.read(write_memory = True) + dict = dump.getDict() + print("state.json") + f = open("state.json", "w") + f.truncate() + f.write(jsonenc.encode(dict)) + f.close() +elif args.dump == "state": + dump = MigrationDump(args.file) + dump.read(dump_memory = args.memory) + dict = dump.getDict() + print(jsonenc.encode(dict)) +elif args.dump == "desc": + dump = MigrationDump(args.file) + dump.read(desc_only = True) + print(jsonenc.encode(dump.vmsd_desc)) +else: + raise Exception("Please specify either -x, -d state or -d desc") diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh new file mode 100755 index 000000000..c6169db69 --- /dev/null +++ b/scripts/archive-source.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# +# Author: Fam Zheng <famz@redhat.com> +# +# Archive source tree, including submodules. This is created for test code to +# export the source files, in order to be built in a different environment, +# such as in a docker instance or VM. +# +# This code is licensed under the GPL version 2 or later. See +# the COPYING file in the top-level directory. + +error() { + printf %s\\n "$*" >&2 + exit 1 +} + +if test $# -lt 1; then + error "Usage: $0 <output tarball>" +fi + +tar_file=$(realpath "$1") +sub_tdir=$(mktemp -d "${tar_file%.tar}.sub.XXXXXXXX") +sub_file="${sub_tdir}/submodule.tar" + +# We want a predictable list of submodules for builds, that is +# independent of what the developer currently has initialized +# in their checkout, because the build environment is completely +# different to the host OS. +submodules="dtc slirp meson ui/keycodemapdb" +submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3" +sub_deinit="" + +function cleanup() { + local status=$? + rm -rf "$sub_tdir" + if test "$sub_deinit" != ""; then + git submodule deinit $sub_deinit + fi + exit $status +} +trap "cleanup" 0 1 2 3 15 + +function tree_ish() { + local retval='HEAD' + if ! git diff-index --quiet --ignore-submodules=all HEAD -- &>/dev/null + then + retval=$(git stash create) + fi + echo "$retval" +} + +git archive --format tar "$(tree_ish)" > "$tar_file" +test $? -ne 0 && error "failed to archive qemu" +for sm in $submodules; do + status="$(git submodule status "$sm")" + smhash="${status#[ +-]}" + smhash="${smhash%% *}" + case "$status" in + -*) + sub_deinit="$sub_deinit $sm" + git submodule update --init "$sm" + test $? -ne 0 && error "failed to update submodule $sm" + ;; + +*) + echo "WARNING: submodule $sm is out of sync" + ;; + esac + (cd $sm; git archive --format tar --prefix "$sm/" $(tree_ish)) > "$sub_file" + test $? -ne 0 && error "failed to archive submodule $sm ($smhash)" + tar --concatenate --file "$tar_file" "$sub_file" + test $? -ne 0 && error "failed append submodule $sm to $tar_file" +done +exit 0 diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py new file mode 100644 index 000000000..08be81340 --- /dev/null +++ b/scripts/block-coroutine-wrapper.py @@ -0,0 +1,176 @@ +#! /usr/bin/env python3 +"""Generate coroutine wrappers for block subsystem. + +The program parses one or several concatenated c files from stdin, +searches for functions with the 'generated_co_wrapper' specifier +and generates corresponding wrappers on stdout. + +Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]... + +Copyright (c) 2020 Virtuozzo International GmbH. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import sys +import re +from typing import Iterator + + +def gen_header(): + copyright = re.sub('^.*Copyright', 'Copyright', __doc__, flags=re.DOTALL) + copyright = re.sub('^(?=.)', ' * ', copyright.strip(), flags=re.MULTILINE) + copyright = re.sub('^$', ' *', copyright, flags=re.MULTILINE) + return f"""\ +/* + * File is generated by scripts/block-coroutine-wrapper.py + * +{copyright} + */ + +#include "qemu/osdep.h" +#include "block/coroutines.h" +#include "block/block-gen.h" +#include "block/block_int.h"\ +""" + + +class ParamDecl: + param_re = re.compile(r'(?P<decl>' + r'(?P<type>.*[ *])' + r'(?P<name>[a-z][a-z0-9_]*)' + r')') + + def __init__(self, param_decl: str) -> None: + m = self.param_re.match(param_decl.strip()) + if m is None: + raise ValueError(f'Wrong parameter declaration: "{param_decl}"') + self.decl = m.group('decl') + self.type = m.group('type') + self.name = m.group('name') + + +class FuncDecl: + def __init__(self, return_type: str, name: str, args: str) -> None: + self.return_type = return_type.strip() + self.name = name.strip() + self.args = [ParamDecl(arg.strip()) for arg in args.split(',')] + + def gen_list(self, format: str) -> str: + return ', '.join(format.format_map(arg.__dict__) for arg in self.args) + + def gen_block(self, format: str) -> str: + return '\n'.join(format.format_map(arg.__dict__) for arg in self.args) + + +# Match wrappers declared with a generated_co_wrapper mark +func_decl_re = re.compile(r'^int\s*generated_co_wrapper\s*' + r'(?P<wrapper_name>[a-z][a-z0-9_]*)' + r'\((?P<args>[^)]*)\);$', re.MULTILINE) + + +def func_decl_iter(text: str) -> Iterator: + for m in func_decl_re.finditer(text): + yield FuncDecl(return_type='int', + name=m.group('wrapper_name'), + args=m.group('args')) + + +def snake_to_camel(func_name: str) -> str: + """ + Convert underscore names like 'some_function_name' to camel-case like + 'SomeFunctionName' + """ + words = func_name.split('_') + words = [w[0].upper() + w[1:] for w in words] + return ''.join(words) + + +def gen_wrapper(func: FuncDecl) -> str: + assert not '_co_' in func.name + assert func.return_type == 'int' + assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *', + 'BlockBackend *'] + + subsystem, subname = func.name.split('_', 1) + + name = f'{subsystem}_co_{subname}' + + t = func.args[0].type + if t == 'BlockDriverState *': + bs = 'bs' + elif t == 'BdrvChild *': + bs = 'child->bs' + else: + bs = 'blk_bs(blk)' + struct_name = snake_to_camel(name) + + return f"""\ +/* + * Wrappers for {name} + */ + +typedef struct {struct_name} {{ + BdrvPollCo poll_state; +{ func.gen_block(' {decl};') } +}} {struct_name}; + +static void coroutine_fn {name}_entry(void *opaque) +{{ + {struct_name} *s = opaque; + + s->poll_state.ret = {name}({ func.gen_list('s->{name}') }); + s->poll_state.in_progress = false; + + aio_wait_kick(); +}} + +int {func.name}({ func.gen_list('{decl}') }) +{{ + if (qemu_in_coroutine()) {{ + return {name}({ func.gen_list('{name}') }); + }} else {{ + {struct_name} s = {{ + .poll_state.bs = {bs}, + .poll_state.in_progress = true, + +{ func.gen_block(' .{name} = {name},') } + }}; + + s.poll_state.co = qemu_coroutine_create({name}_entry, &s); + + return bdrv_poll_co(&s.poll_state); + }} +}}""" + + +def gen_wrappers(input_code: str) -> str: + res = '' + for func in func_decl_iter(input_code): + res += '\n\n\n' + res += gen_wrapper(func) + + return res + + +if __name__ == '__main__': + if len(sys.argv) < 3: + exit(f'Usage: {sys.argv[0]} OUT_FILE.c IN_FILE.[ch]...') + + with open(sys.argv[1], 'w', encoding='utf-8') as f_out: + f_out.write(gen_header()) + for fname in sys.argv[2:]: + with open(fname, encoding='utf-8') as f_in: + f_out.write(gen_wrappers(f_in.read())) + f_out.write('\n') diff --git a/scripts/check_sparse.py b/scripts/check_sparse.py new file mode 100644 index 000000000..295612444 --- /dev/null +++ b/scripts/check_sparse.py @@ -0,0 +1,59 @@ +#! /usr/bin/env python3 + +# Invoke sparse based on the contents of compile_commands.json, +# also working around several deficiencies in cgcc's command line +# parsing + +import json +import subprocess +import os +import sys +import shlex + +def cmdline_for_sparse(sparse, cmdline): + # Do not include the C compiler executable + skip = True + arg = False + out = sparse + ['-no-compile'] + for x in cmdline: + if arg: + out.append(x) + arg = False + continue + if skip: + skip = False + continue + # prevent sparse from treating output files as inputs + if x == '-MF' or x == '-MQ' or x == '-o': + skip = True + continue + # cgcc ignores -no-compile if it sees -M or -MM? + if x.startswith('-M'): + continue + # sparse does not understand these! + if x == '-iquote' or x == '-isystem': + x = '-I' + if x == '-I': + arg = True + out.append(x) + return out + +root_path = os.getenv('MESON_BUILD_ROOT') +def build_path(s): + return s if not root_path else os.path.join(root_path, s) + +ccjson_path = build_path(sys.argv[1]) +with open(ccjson_path, 'r') as fd: + compile_commands = json.load(fd) + +sparse = sys.argv[2:] +sparse_env = os.environ.copy() +for cmd in compile_commands: + cmdline = shlex.split(cmd['command']) + cmd = cmdline_for_sparse(sparse, cmdline) + print('REAL_CC=%s' % shlex.quote(cmdline[0]), + ' '.join((shlex.quote(x) for x in cmd))) + sparse_env['REAL_CC'] = cmdline[0] + r = subprocess.run(cmd, env=sparse_env, cwd=root_path) + if r.returncode != 0: + sys.exit(r.returncode) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl new file mode 100755 index 000000000..cb8eff233 --- /dev/null +++ b/scripts/checkpatch.pl @@ -0,0 +1,3053 @@ +#!/usr/bin/env perl +# (c) 2001, Dave Jones. (the file handling bit) +# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) +# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite) +# (c) 2008-2010 Andy Whitcroft <apw@canonical.com> +# Licensed under the terms of the GNU GPL License version 2 + +use strict; +use warnings; +use Term::ANSIColor qw(:constants); + +my $P = $0; +$P =~ s@.*/@@g; + +our $SrcFile = qr{\.(?:(h|c)(\.inc)?|cpp|s|S|pl|py|sh)$}; + +my $V = '0.31'; + +use Getopt::Long qw(:config no_auto_abbrev); + +my $quiet = 0; +my $tree = 1; +my $chk_signoff = 1; +my $chk_patch = undef; +my $chk_branch = undef; +my $tst_only; +my $emacs = 0; +my $terse = 0; +my $file = undef; +my $color = "auto"; +my $no_warnings = 0; +my $summary = 1; +my $mailback = 0; +my $summary_file = 0; +my $root; +my %debug; +my $help = 0; + +sub help { + my ($exitcode) = @_; + + print << "EOM"; +Usage: + + $P [OPTION]... [FILE]... + $P [OPTION]... [GIT-REV-LIST] + +Version: $V + +Options: + -q, --quiet quiet + --no-tree run without a qemu tree + --no-signoff do not check for 'Signed-off-by' line + --patch treat FILE as patchfile + --branch treat args as GIT revision list + --emacs emacs compile window format + --terse one line per report + -f, --file treat FILE as regular source file + --strict fail if only warnings are found + --root=PATH PATH to the qemu tree root + --no-summary suppress the per-file summary + --mailback only produce a report in case of warnings/errors + --summary-file include the filename in summary + --debug KEY=[0|1] turn on/off debugging of KEY, where KEY is one of + 'values', 'possible', 'type', and 'attr' (default + is all off) + --test-only=WORD report only warnings/errors containing WORD + literally + --color[=WHEN] Use colors 'always', 'never', or only when output + is a terminal ('auto'). Default is 'auto'. + -h, --help, --version display this help and exit + +When FILE is - read standard input. +EOM + + exit($exitcode); +} + +# Perl's Getopt::Long allows options to take optional arguments after a space. +# Prevent --color by itself from consuming other arguments +foreach (@ARGV) { + if ($_ eq "--color" || $_ eq "-color") { + $_ = "--color=$color"; + } +} + +GetOptions( + 'q|quiet+' => \$quiet, + 'tree!' => \$tree, + 'signoff!' => \$chk_signoff, + 'patch!' => \$chk_patch, + 'branch!' => \$chk_branch, + 'emacs!' => \$emacs, + 'terse!' => \$terse, + 'f|file!' => \$file, + 'strict!' => \$no_warnings, + 'root=s' => \$root, + 'summary!' => \$summary, + 'mailback!' => \$mailback, + 'summary-file!' => \$summary_file, + + 'debug=s' => \%debug, + 'test-only=s' => \$tst_only, + 'color=s' => \$color, + 'no-color' => sub { $color = 'never'; }, + 'h|help' => \$help, + 'version' => \$help +) or help(1); + +help(0) if ($help); + +my $exit = 0; + +if ($#ARGV < 0) { + print "$P: no input files\n"; + exit(1); +} + +if (!defined $chk_branch && !defined $chk_patch && !defined $file) { + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; + $file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0; + $chk_patch = $chk_branch || $file ? 0 : 1; +} elsif (!defined $chk_branch && !defined $chk_patch) { + if ($file) { + $chk_branch = $chk_patch = 0; + } else { + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; + $chk_patch = $chk_branch ? 0 : 1; + } +} elsif (!defined $chk_branch && !defined $file) { + if ($chk_patch) { + $chk_branch = $file = 0; + } else { + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; + $file = $chk_branch ? 0 : 1; + } +} elsif (!defined $chk_patch && !defined $file) { + if ($chk_branch) { + $chk_patch = $file = 0; + } else { + $file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0; + $chk_patch = $file ? 0 : 1; + } +} elsif (!defined $chk_branch) { + $chk_branch = $chk_patch || $file ? 0 : 1; +} elsif (!defined $chk_patch) { + $chk_patch = $chk_branch || $file ? 0 : 1; +} elsif (!defined $file) { + $file = $chk_patch || $chk_branch ? 0 : 1; +} + +if (($chk_patch && $chk_branch) || + ($chk_patch && $file) || + ($chk_branch && $file)) { + die "Only one of --file, --branch, --patch is permitted\n"; +} +if (!$chk_patch && !$chk_branch && !$file) { + die "One of --file, --branch, --patch is required\n"; +} + +if ($color =~ /^always$/i) { + $color = 1; +} elsif ($color =~ /^never$/i) { + $color = 0; +} elsif ($color =~ /^auto$/i) { + $color = (-t STDOUT); +} else { + die "Invalid color mode: $color\n"; +} + +my $dbg_values = 0; +my $dbg_possible = 0; +my $dbg_type = 0; +my $dbg_attr = 0; +my $dbg_adv_dcs = 0; +my $dbg_adv_checking = 0; +my $dbg_adv_apw = 0; +for my $key (keys %debug) { + ## no critic + eval "\${dbg_$key} = '$debug{$key}';"; + die "$@" if ($@); +} + +my $rpt_cleaners = 0; + +if ($terse) { + $emacs = 1; + $quiet++; +} + +if ($tree) { + if (defined $root) { + if (!top_of_kernel_tree($root)) { + die "$P: $root: --root does not point at a valid tree\n"; + } + } else { + if (top_of_kernel_tree('.')) { + $root = '.'; + } elsif ($0 =~ m@(.*)/scripts/[^/]*$@ && + top_of_kernel_tree($1)) { + $root = $1; + } + } + + if (!defined $root) { + print "Must be run from the top-level dir. of a qemu tree\n"; + exit(2); + } +} + +my $emitted_corrupt = 0; + +our $Ident = qr{ + [A-Za-z_][A-Za-z\d_]* + (?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)* + }x; +our $Storage = qr{extern|static|asmlinkage}; +our $Sparse = qr{ + __force + }x; + +# Notes to $Attribute: +our $Attribute = qr{ + const| + volatile| + QEMU_NORETURN| + QEMU_WARN_UNUSED_RESULT| + QEMU_SENTINEL| + QEMU_PACKED| + GCC_FMT_ATTR + }x; +our $Modifier; +our $Inline = qr{inline}; +our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]}; +our $Lval = qr{$Ident(?:$Member)*}; + +our $Constant = qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*}; +our $Assignment = qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)}; +our $Compare = qr{<=|>=|==|!=|<|>}; +our $Operators = qr{ + <=|>=|==|!=| + =>|->|<<|>>|<|>|!|~| + &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|% + }x; + +our $NonptrType; +our $Type; +our $Declare; + +our $NON_ASCII_UTF8 = qr{ + [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte + | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte + | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates + | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 + | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 + | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 +}x; + +our $UTF8 = qr{ + [\x09\x0A\x0D\x20-\x7E] # ASCII + | $NON_ASCII_UTF8 +}x; + +# some readers default to ISO-8859-1 when showing email source. detect +# when UTF-8 is incorrectly interpreted as ISO-8859-1 and reencoded back. +# False positives are possible but very unlikely. +our $UTF8_MOJIBAKE = qr{ + \xC3[\x82-\x9F] \xC2[\x80-\xBF] # c2-df 80-bf + | \xC3\xA0 \xC2[\xA0-\xBF] \xC2[\x80-\xBF] # e0 a0-bf 80-bf + | \xC3[\xA1-\xAC\xAE\xAF] (?: \xC2[\x80-\xBF]){2} # e1-ec/ee/ef 80-bf 80-bf + | \xC3\xAD \xC2[\x80-\x9F] \xC2[\x80-\xBF] # ed 80-9f 80-bf + | \xC3\xB0 \xC2[\x90-\xBF] (?: \xC2[\x80-\xBF]){2} # f0 90-bf 80-bf 80-bf + | \xC3[\xB1-\xB3] (?: \xC2[\x80-\xBF]){3} # f1-f3 80-bf 80-bf 80-bf + | \xC3\xB4 \xC2[\x80-\x8F] (?: \xC2[\x80-\xBF]){2} # f4 80-b8 80-bf 80-bf +}x; + +# There are still some false positives, but this catches most +# common cases. +our $typeTypedefs = qr{(?x: + (?![KMGTPE]iB) # IEC binary prefix (do not match) + [A-Z][A-Z\d_]*[a-z][A-Za-z\d_]* # camelcase + | [A-Z][A-Z\d_]*AIOCB # all uppercase + | [A-Z][A-Z\d_]*CPU # all uppercase + | QEMUBH # all uppercase +)}; + +our @typeList = ( + qr{void}, + qr{(?:unsigned\s+)?char}, + qr{(?:unsigned\s+)?short}, + qr{(?:unsigned\s+)?int}, + qr{(?:unsigned\s+)?long}, + qr{(?:unsigned\s+)?long\s+int}, + qr{(?:unsigned\s+)?long\s+long}, + qr{(?:unsigned\s+)?long\s+long\s+int}, + qr{unsigned}, + qr{float}, + qr{double}, + qr{bool}, + qr{struct\s+$Ident}, + qr{union\s+$Ident}, + qr{enum\s+$Ident}, + qr{${Ident}_t}, + qr{${Ident}_handler}, + qr{${Ident}_handler_fn}, + qr{target_(?:u)?long}, + qr{hwaddr}, + # external libraries + qr{xml${Ident}}, + qr{xen\w+_handle}, + # Glib definitions + qr{gchar}, + qr{gshort}, + qr{glong}, + qr{gint}, + qr{gboolean}, + qr{guchar}, + qr{gushort}, + qr{gulong}, + qr{guint}, + qr{gfloat}, + qr{gdouble}, + qr{gpointer}, + qr{gconstpointer}, + qr{gint8}, + qr{guint8}, + qr{gint16}, + qr{guint16}, + qr{gint32}, + qr{guint32}, + qr{gint64}, + qr{guint64}, + qr{gsize}, + qr{gssize}, + qr{goffset}, + qr{gintptr}, + qr{guintptr}, +); + +# This can be modified by sub possible. Since it can be empty, be careful +# about regexes that always match, because they can cause infinite loops. +our @modifierList = ( +); + +sub build_types { + my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; + if (@modifierList > 0) { + my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; + $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; + } else { + $Modifier = qr{(?:$Attribute|$Sparse)}; + } + $NonptrType = qr{ + (?:$Modifier\s+|const\s+)* + (?: + (?:typeof|__typeof__)\s*\(\s*\**\s*$Ident\s*\)| + (?:$typeTypedefs\b)| + (?:${all}\b) + ) + (?:\s+$Modifier|\s+const)* + }x; + $Type = qr{ + $NonptrType + (?:[\s\*]+\s*const|[\s\*]+|(?:\s*\[\s*\])+)? + (?:\s+$Inline|\s+$Modifier)* + }x; + $Declare = qr{(?:$Storage\s+)?$Type}; +} +build_types(); + +$chk_signoff = 0 if ($file); + +my @rawlines = (); +my @lines = (); +my $vname; +if ($chk_branch) { + my @patches; + my %git_commits = (); + my $HASH; + open($HASH, "-|", "git", "log", "--reverse", "--no-merges", "--format=%H %s", $ARGV[0]) || + die "$P: git log --reverse --no-merges --format='%H %s' $ARGV[0] failed - $!\n"; + + for my $line (<$HASH>) { + $line =~ /^([0-9a-fA-F]{40,40}) (.*)$/; + next if (!defined($1) || !defined($2)); + my $sha1 = $1; + my $subject = $2; + push(@patches, $sha1); + $git_commits{$sha1} = $subject; + } + + close $HASH; + + die "$P: no revisions returned for revlist '$ARGV[0]'\n" + unless @patches; + + my $i = 1; + my $num_patches = @patches; + for my $hash (@patches) { + my $FILE; + open($FILE, '-|', "git", + "-c", "diff.renamelimit=0", + "-c", "diff.renames=True", + "-c", "diff.algorithm=histogram", + "show", + "--patch-with-stat", $hash) || + die "$P: git show $hash - $!\n"; + while (<$FILE>) { + chomp; + push(@rawlines, $_); + } + close($FILE); + $vname = substr($hash, 0, 12) . ' (' . $git_commits{$hash} . ')'; + if ($num_patches > 1 && $quiet == 0) { + my $prefix = "$i/$num_patches"; + $prefix = BLUE . BOLD . $prefix . RESET if $color; + print "$prefix Checking commit $vname\n"; + $vname = "Patch $i/$num_patches"; + } else { + $vname = "Commit " . $vname; + } + if (!process($hash)) { + $exit = 1; + print "\n" if ($num_patches > 1 && $quiet == 0); + } + @rawlines = (); + @lines = (); + $i++; + } +} else { + for my $filename (@ARGV) { + my $FILE; + if ($file) { + open($FILE, '-|', "diff -u /dev/null $filename") || + die "$P: $filename: diff failed - $!\n"; + } elsif ($filename eq '-') { + open($FILE, '<&STDIN'); + } else { + open($FILE, '<', "$filename") || + die "$P: $filename: open failed - $!\n"; + } + if ($filename eq '-') { + $vname = 'Your patch'; + } else { + $vname = $filename; + } + print "Checking $filename...\n" if @ARGV > 1 && $quiet == 0; + while (<$FILE>) { + chomp; + push(@rawlines, $_); + } + close($FILE); + if (!process($filename)) { + $exit = 1; + } + @rawlines = (); + @lines = (); + } +} + +exit($exit); + +sub top_of_kernel_tree { + my ($root) = @_; + + my @tree_check = ( + "COPYING", "MAINTAINERS", "Makefile", + "README.rst", "docs", "VERSION", + "linux-user", "softmmu" + ); + + foreach my $check (@tree_check) { + if (! -e $root . '/' . $check) { + return 0; + } + } + return 1; +} + +sub expand_tabs { + my ($str) = @_; + + my $res = ''; + my $n = 0; + for my $c (split(//, $str)) { + if ($c eq "\t") { + $res .= ' '; + $n++; + for (; ($n % 8) != 0; $n++) { + $res .= ' '; + } + next; + } + $res .= $c; + $n++; + } + + return $res; +} +sub copy_spacing { + (my $res = shift) =~ tr/\t/ /c; + return $res; +} + +sub line_stats { + my ($line) = @_; + + # Drop the diff line leader and expand tabs + $line =~ s/^.//; + $line = expand_tabs($line); + + # Pick the indent from the front of the line. + my ($white) = ($line =~ /^(\s*)/); + + return (length($line), length($white)); +} + +my $sanitise_quote = ''; + +sub sanitise_line_reset { + my ($in_comment) = @_; + + if ($in_comment) { + $sanitise_quote = '*/'; + } else { + $sanitise_quote = ''; + } +} +sub sanitise_line { + my ($line) = @_; + + my $res = ''; + my $l = ''; + + my $qlen = 0; + my $off = 0; + my $c; + + # Always copy over the diff marker. + $res = substr($line, 0, 1); + + for ($off = 1; $off < length($line); $off++) { + $c = substr($line, $off, 1); + + # Comments we are wacking completely including the begin + # and end, all to $;. + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') { + $sanitise_quote = '*/'; + + substr($res, $off, 2, "$;$;"); + $off++; + next; + } + if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') { + $sanitise_quote = ''; + substr($res, $off, 2, "$;$;"); + $off++; + next; + } + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') { + $sanitise_quote = '//'; + + substr($res, $off, 2, $sanitise_quote); + $off++; + next; + } + + # A \ in a string means ignore the next character. + if (($sanitise_quote eq "'" || $sanitise_quote eq '"') && + $c eq "\\") { + substr($res, $off, 2, 'XX'); + $off++; + next; + } + # Regular quotes. + if ($c eq "'" || $c eq '"') { + if ($sanitise_quote eq '') { + $sanitise_quote = $c; + + substr($res, $off, 1, $c); + next; + } elsif ($sanitise_quote eq $c) { + $sanitise_quote = ''; + } + } + + #print "c<$c> SQ<$sanitise_quote>\n"; + if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") { + substr($res, $off, 1, $;); + } elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") { + substr($res, $off, 1, $;); + } elsif ($off != 0 && $sanitise_quote && $c ne "\t") { + substr($res, $off, 1, 'X'); + } else { + substr($res, $off, 1, $c); + } + } + + if ($sanitise_quote eq '//') { + $sanitise_quote = ''; + } + + # The pathname on a #include may be surrounded by '<' and '>'. + if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) { + my $clean = 'X' x length($1); + $res =~ s@\<.*\>@<$clean>@; + + # The whole of a #error is a string. + } elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) { + my $clean = 'X' x length($1); + $res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@; + } + + return $res; +} + +sub ctx_statement_block { + my ($linenr, $remain, $off) = @_; + my $line = $linenr - 1; + my $blk = ''; + my $soff = $off; + my $coff = $off - 1; + my $coff_set = 0; + + my $loff = 0; + + my $type = ''; + my $level = 0; + my @stack = (); + my $p; + my $c; + my $len = 0; + + my $remainder; + while (1) { + @stack = (['', 0]) if ($#stack == -1); + + #warn "CSB: blk<$blk> remain<$remain>\n"; + # If we are about to drop off the end, pull in more + # context. + if ($off >= $len) { + for (; $remain > 0; $line++) { + last if (!defined $lines[$line]); + next if ($lines[$line] =~ /^-/); + $remain--; + $loff = $len; + $blk .= $lines[$line] . "\n"; + $len = length($blk); + $line++; + last; + } + # Bail if there is no further context. + #warn "CSB: blk<$blk> off<$off> len<$len>\n"; + if ($off >= $len) { + last; + } + } + $p = $c; + $c = substr($blk, $off, 1); + $remainder = substr($blk, $off); + + #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n"; + + # Handle nested #if/#else. + if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) { + push(@stack, [ $type, $level ]); + } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) { + ($type, $level) = @{$stack[$#stack - 1]}; + } elsif ($remainder =~ /^#\s*endif\b/) { + ($type, $level) = @{pop(@stack)}; + } + + # Statement ends at the ';' or a close '}' at the + # outermost level. + if ($level == 0 && $c eq ';') { + last; + } + + # An else is really a conditional as long as its not else if + if ($level == 0 && $coff_set == 0 && + (!defined($p) || $p =~ /(?:\s|\}|\+)/) && + $remainder =~ /^(else)(?:\s|{)/ && + $remainder !~ /^else\s+if\b/) { + $coff = $off + length($1) - 1; + $coff_set = 1; + #warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n"; + #warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n"; + } + + if (($type eq '' || $type eq '(') && $c eq '(') { + $level++; + $type = '('; + } + if ($type eq '(' && $c eq ')') { + $level--; + $type = ($level != 0)? '(' : ''; + + if ($level == 0 && $coff < $soff) { + $coff = $off; + $coff_set = 1; + #warn "CSB: mark coff<$coff>\n"; + } + } + if (($type eq '' || $type eq '{') && $c eq '{') { + $level++; + $type = '{'; + } + if ($type eq '{' && $c eq '}') { + $level--; + $type = ($level != 0)? '{' : ''; + + if ($level == 0) { + if (substr($blk, $off + 1, 1) eq ';') { + $off++; + } + last; + } + } + $off++; + } + # We are truly at the end, so shuffle to the next line. + if ($off == $len) { + $loff = $len + 1; + $line++; + $remain--; + } + + my $statement = substr($blk, $soff, $off - $soff + 1); + my $condition = substr($blk, $soff, $coff - $soff + 1); + + #warn "STATEMENT<$statement>\n"; + #warn "CONDITION<$condition>\n"; + + #print "coff<$coff> soff<$off> loff<$loff>\n"; + + return ($statement, $condition, + $line, $remain + 1, $off - $loff + 1, $level); +} + +sub statement_lines { + my ($stmt) = @_; + + # Strip the diff line prefixes and rip blank lines at start and end. + $stmt =~ s/(^|\n)./$1/g; + $stmt =~ s/^\s*//; + $stmt =~ s/\s*$//; + + my @stmt_lines = ($stmt =~ /\n/g); + + return $#stmt_lines + 2; +} + +sub statement_rawlines { + my ($stmt) = @_; + + my @stmt_lines = ($stmt =~ /\n/g); + + return $#stmt_lines + 2; +} + +sub statement_block_size { + my ($stmt) = @_; + + $stmt =~ s/(^|\n)./$1/g; + $stmt =~ s/^\s*\{//; + $stmt =~ s/}\s*$//; + $stmt =~ s/^\s*//; + $stmt =~ s/\s*$//; + + my @stmt_lines = ($stmt =~ /\n/g); + my @stmt_statements = ($stmt =~ /;/g); + + my $stmt_lines = $#stmt_lines + 2; + my $stmt_statements = $#stmt_statements + 1; + + if ($stmt_lines > $stmt_statements) { + return $stmt_lines; + } else { + return $stmt_statements; + } +} + +sub ctx_statement_full { + my ($linenr, $remain, $off) = @_; + my ($statement, $condition, $level); + + my (@chunks); + + # Grab the first conditional/block pair. + ($statement, $condition, $linenr, $remain, $off, $level) = + ctx_statement_block($linenr, $remain, $off); + #print "F: c<$condition> s<$statement> remain<$remain>\n"; + push(@chunks, [ $condition, $statement ]); + if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) { + return ($level, $linenr, @chunks); + } + + # Pull in the following conditional/block pairs and see if they + # could continue the statement. + for (;;) { + ($statement, $condition, $linenr, $remain, $off, $level) = + ctx_statement_block($linenr, $remain, $off); + #print "C: c<$condition> s<$statement> remain<$remain>\n"; + last if (!($remain > 0 && $condition =~ /^(?:\s*\n[+-])*\s*(?:else|do)\b/s)); + #print "C: push\n"; + push(@chunks, [ $condition, $statement ]); + } + + return ($level, $linenr, @chunks); +} + +sub ctx_block_get { + my ($linenr, $remain, $outer, $open, $close, $off) = @_; + my $line; + my $start = $linenr - 1; + my $blk = ''; + my @o; + my @c; + my @res = (); + + my $level = 0; + my @stack = ($level); + for ($line = $start; $remain > 0; $line++) { + next if ($rawlines[$line] =~ /^-/); + $remain--; + + $blk .= $rawlines[$line]; + + # Handle nested #if/#else. + if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) { + push(@stack, $level); + } elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) { + $level = $stack[$#stack - 1]; + } elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) { + $level = pop(@stack); + } + + foreach my $c (split(//, $lines[$line])) { + ##print "C<$c>L<$level><$open$close>O<$off>\n"; + if ($off > 0) { + $off--; + next; + } + + if ($c eq $close && $level > 0) { + $level--; + last if ($level == 0); + } elsif ($c eq $open) { + $level++; + } + } + + if (!$outer || $level <= 1) { + push(@res, $rawlines[$line]); + } + + last if ($level == 0); + } + + return ($level, @res); +} +sub ctx_block_outer { + my ($linenr, $remain) = @_; + + my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0); + return @r; +} +sub ctx_block { + my ($linenr, $remain) = @_; + + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0); + return @r; +} +sub ctx_statement { + my ($linenr, $remain, $off) = @_; + + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off); + return @r; +} +sub ctx_block_level { + my ($linenr, $remain) = @_; + + return ctx_block_get($linenr, $remain, 0, '{', '}', 0); +} +sub ctx_statement_level { + my ($linenr, $remain, $off) = @_; + + return ctx_block_get($linenr, $remain, 0, '(', ')', $off); +} + +sub ctx_locate_comment { + my ($first_line, $end_line) = @_; + + # Catch a comment on the end of the line itself. + my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@); + return $current_comment if (defined $current_comment); + + # Look through the context and try and figure out if there is a + # comment. + my $in_comment = 0; + $current_comment = ''; + for (my $linenr = $first_line; $linenr < $end_line; $linenr++) { + my $line = $rawlines[$linenr - 1]; + #warn " $line\n"; + if ($linenr == $first_line and $line =~ m@^.\s*\*@) { + $in_comment = 1; + } + if ($line =~ m@/\*@) { + $in_comment = 1; + } + if (!$in_comment && $current_comment ne '') { + $current_comment = ''; + } + $current_comment .= $line . "\n" if ($in_comment); + if ($line =~ m@\*/@) { + $in_comment = 0; + } + } + + chomp($current_comment); + return($current_comment); +} +sub ctx_has_comment { + my ($first_line, $end_line) = @_; + my $cmt = ctx_locate_comment($first_line, $end_line); + + ##print "LINE: $rawlines[$end_line - 1 ]\n"; + ##print "CMMT: $cmt\n"; + + return ($cmt ne ''); +} + +sub raw_line { + my ($linenr, $cnt) = @_; + + my $offset = $linenr - 1; + $cnt++; + + my $line; + while ($cnt) { + $line = $rawlines[$offset++]; + next if (defined($line) && $line =~ /^-/); + $cnt--; + } + + return $line; +} + +sub cat_vet { + my ($vet) = @_; + my ($res, $coded); + + $res = ''; + while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) { + $res .= $1; + if ($2 ne '') { + $coded = sprintf("^%c", unpack('C', $2) + 64); + $res .= $coded; + } + } + $res =~ s/$/\$/; + + return $res; +} + +my $av_preprocessor = 0; +my $av_pending; +my @av_paren_type; +my $av_pend_colon; + +sub annotate_reset { + $av_preprocessor = 0; + $av_pending = '_'; + @av_paren_type = ('E'); + $av_pend_colon = 'O'; +} + +sub annotate_values { + my ($stream, $type) = @_; + + my $res; + my $var = '_' x length($stream); + my $cur = $stream; + + print "$stream\n" if ($dbg_values > 1); + + while (length($cur)) { + @av_paren_type = ('E') if ($#av_paren_type < 0); + print " <" . join('', @av_paren_type) . + "> <$type> <$av_pending>" if ($dbg_values > 1); + if ($cur =~ /^(\s+)/o) { + print "WS($1)\n" if ($dbg_values > 1); + if ($1 =~ /\n/ && $av_preprocessor) { + $type = pop(@av_paren_type); + $av_preprocessor = 0; + } + + } elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') { + print "CAST($1)\n" if ($dbg_values > 1); + push(@av_paren_type, $type); + $type = 'C'; + + } elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) { + print "DECLARE($1)\n" if ($dbg_values > 1); + $type = 'T'; + + } elsif ($cur =~ /^($Modifier)\s*/) { + print "MODIFIER($1)\n" if ($dbg_values > 1); + $type = 'T'; + + } elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) { + print "DEFINE($1,$2)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + push(@av_paren_type, $type); + if ($2 ne '') { + $av_pending = 'N'; + } + $type = 'E'; + + } elsif ($cur =~ /^(\#\s*(?:undef\s*$Ident|include\b))/o) { + print "UNDEF($1)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + push(@av_paren_type, $type); + + } elsif ($cur =~ /^(\#\s*(?:ifdef|ifndef|if))/o) { + print "PRE_START($1)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + + push(@av_paren_type, $type); + push(@av_paren_type, $type); + $type = 'E'; + + } elsif ($cur =~ /^(\#\s*(?:else|elif))/o) { + print "PRE_RESTART($1)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + + push(@av_paren_type, $av_paren_type[$#av_paren_type]); + + $type = 'E'; + + } elsif ($cur =~ /^(\#\s*(?:endif))/o) { + print "PRE_END($1)\n" if ($dbg_values > 1); + + $av_preprocessor = 1; + + # Assume all arms of the conditional end as this + # one does, and continue as if the #endif was not here. + pop(@av_paren_type); + push(@av_paren_type, $type); + $type = 'E'; + + } elsif ($cur =~ /^(\\\n)/o) { + print "PRECONT($1)\n" if ($dbg_values > 1); + + } elsif ($cur =~ /^(__attribute__)\s*\(?/o) { + print "ATTR($1)\n" if ($dbg_values > 1); + $av_pending = $type; + $type = 'N'; + + } elsif ($cur =~ /^(sizeof)\s*(\()?/o) { + print "SIZEOF($1)\n" if ($dbg_values > 1); + if (defined $2) { + $av_pending = 'V'; + } + $type = 'N'; + + } elsif ($cur =~ /^(if|while|for)\b/o) { + print "COND($1)\n" if ($dbg_values > 1); + $av_pending = 'E'; + $type = 'N'; + + } elsif ($cur =~/^(case)/o) { + print "CASE($1)\n" if ($dbg_values > 1); + $av_pend_colon = 'C'; + $type = 'N'; + + } elsif ($cur =~/^(return|else|goto|typeof|__typeof__)\b/o) { + print "KEYWORD($1)\n" if ($dbg_values > 1); + $type = 'N'; + + } elsif ($cur =~ /^(\()/o) { + print "PAREN('$1')\n" if ($dbg_values > 1); + push(@av_paren_type, $av_pending); + $av_pending = '_'; + $type = 'N'; + + } elsif ($cur =~ /^(\))/o) { + my $new_type = pop(@av_paren_type); + if ($new_type ne '_') { + $type = $new_type; + print "PAREN('$1') -> $type\n" + if ($dbg_values > 1); + } else { + print "PAREN('$1')\n" if ($dbg_values > 1); + } + + } elsif ($cur =~ /^($Ident)\s*\(/o) { + print "FUNC($1)\n" if ($dbg_values > 1); + $type = 'V'; + $av_pending = 'V'; + + } elsif ($cur =~ /^($Ident\s*):(?:\s*\d+\s*(,|=|;))?/) { + if (defined $2 && $type eq 'C' || $type eq 'T') { + $av_pend_colon = 'B'; + } elsif ($type eq 'E') { + $av_pend_colon = 'L'; + } + print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1); + $type = 'V'; + + } elsif ($cur =~ /^($Ident|$Constant)/o) { + print "IDENT($1)\n" if ($dbg_values > 1); + $type = 'V'; + + } elsif ($cur =~ /^($Assignment)/o) { + print "ASSIGN($1)\n" if ($dbg_values > 1); + $type = 'N'; + + } elsif ($cur =~/^(;|{|})/) { + print "END($1)\n" if ($dbg_values > 1); + $type = 'E'; + $av_pend_colon = 'O'; + + } elsif ($cur =~/^(,)/) { + print "COMMA($1)\n" if ($dbg_values > 1); + $type = 'C'; + + } elsif ($cur =~ /^(\?)/o) { + print "QUESTION($1)\n" if ($dbg_values > 1); + $type = 'N'; + + } elsif ($cur =~ /^(:)/o) { + print "COLON($1,$av_pend_colon)\n" if ($dbg_values > 1); + + substr($var, length($res), 1, $av_pend_colon); + if ($av_pend_colon eq 'C' || $av_pend_colon eq 'L') { + $type = 'E'; + } else { + $type = 'N'; + } + $av_pend_colon = 'O'; + + } elsif ($cur =~ /^(\[)/o) { + print "CLOSE($1)\n" if ($dbg_values > 1); + $type = 'N'; + + } elsif ($cur =~ /^(-(?![->])|\+(?!\+)|\*|\&\&|\&)/o) { + my $variant; + + print "OPV($1)\n" if ($dbg_values > 1); + if ($type eq 'V') { + $variant = 'B'; + } else { + $variant = 'U'; + } + + substr($var, length($res), 1, $variant); + $type = 'N'; + + } elsif ($cur =~ /^($Operators)/o) { + print "OP($1)\n" if ($dbg_values > 1); + if ($1 ne '++' && $1 ne '--') { + $type = 'N'; + } + + } elsif ($cur =~ /(^.)/o) { + print "C($1)\n" if ($dbg_values > 1); + } + if (defined $1) { + $cur = substr($cur, length($1)); + $res .= $type x length($1); + } + } + + return ($res, $var); +} + +sub possible { + my ($possible, $line) = @_; + my $notPermitted = qr{(?: + ^(?: + $Modifier| + $Storage| + $Type| + DEFINE_\S+ + )$| + ^(?: + goto| + return| + case| + else| + asm|__asm__| + do + )(?:\s|$)| + ^(?:typedef|struct|enum)\b| + ^\# + )}x; + warn "CHECK<$possible> ($line)\n" if ($dbg_possible > 2); + if ($possible !~ $notPermitted) { + # Check for modifiers. + $possible =~ s/\s*$Storage\s*//g; + $possible =~ s/\s*$Sparse\s*//g; + if ($possible =~ /^\s*$/) { + + } elsif ($possible =~ /\s/) { + $possible =~ s/\s*(?:$Type|\#\#)\s*//g; + for my $modifier (split(' ', $possible)) { + if ($modifier !~ $notPermitted) { + warn "MODIFIER: $modifier ($possible) ($line)\n" if ($dbg_possible); + push(@modifierList, $modifier); + } + } + + } else { + warn "POSSIBLE: $possible ($line)\n" if ($dbg_possible); + push(@typeList, $possible); + } + build_types(); + } else { + warn "NOTPOSS: $possible ($line)\n" if ($dbg_possible > 1); + } +} + +my $prefix = ''; + +sub report { + my ($level, $msg) = @_; + if (defined $tst_only && $msg !~ /\Q$tst_only\E/) { + return 0; + } + + my $output = ''; + $output .= BOLD if $color; + $output .= $prefix; + $output .= RED if $color && $level eq 'ERROR'; + $output .= MAGENTA if $color && $level eq 'WARNING'; + $output .= $level . ':'; + $output .= RESET if $color; + $output .= ' ' . $msg . "\n"; + + $output = (split('\n', $output))[0] . "\n" if ($terse); + + push(our @report, $output); + + return 1; +} +sub report_dump { + our @report; +} +sub ERROR { + if (report("ERROR", $_[0])) { + our $clean = 0; + our $cnt_error++; + } +} +sub WARN { + if (report("WARNING", $_[0])) { + our $clean = 0; + our $cnt_warn++; + } +} + +# According to tests/qtest/bios-tables-test.c: do not +# change expected file in the same commit with adding test +sub checkfilename { + my ($name, $acpi_testexpected, $acpi_nontestexpected) = @_; + + # Note: shell script that rebuilds the expected files is in the same + # directory as files themselves. + # Note: allowed diff list can be changed both when changing expected + # files and when changing tests. + if ($name =~ m#^tests/data/acpi/# and not $name =~ m#^\.sh$#) { + $$acpi_testexpected = $name; + } elsif ($name !~ m#^tests/qtest/bios-tables-test-allowed-diff.h$#) { + $$acpi_nontestexpected = $name; + } + if (defined $$acpi_testexpected and defined $$acpi_nontestexpected) { + ERROR("Do not add expected files together with tests, " . + "follow instructions in " . + "tests/qtest/bios-tables-test.c: both " . + $$acpi_testexpected . " and " . + $$acpi_nontestexpected . " found\n"); + } +} + +sub process { + my $filename = shift; + + my $linenr=0; + my $prevline=""; + my $prevrawline=""; + my $stashline=""; + my $stashrawline=""; + + my $length; + my $indent; + my $previndent=0; + my $stashindent=0; + + our $clean = 1; + my $signoff = 0; + my $is_patch = 0; + + my $in_header_lines = $file ? 0 : 1; + my $in_commit_log = 0; #Scanning lines before patch + my $reported_maintainer_file = 0; + my $non_utf8_charset = 0; + + our @report = (); + our $cnt_lines = 0; + our $cnt_error = 0; + our $cnt_warn = 0; + our $cnt_chk = 0; + + # Trace the real file/line as we go. + my $realfile = ''; + my $realline = 0; + my $realcnt = 0; + my $here = ''; + my $in_comment = 0; + my $comment_edge = 0; + my $first_line = 0; + my $p1_prefix = ''; + + my $prev_values = 'E'; + + # suppression flags + my %suppress_ifbraces; + my %suppress_whiletrailers; + my %suppress_export; + + my $acpi_testexpected; + my $acpi_nontestexpected; + + # Pre-scan the patch sanitizing the lines. + + sanitise_line_reset(); + my $line; + foreach my $rawline (@rawlines) { + $linenr++; + $line = $rawline; + + if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { + $realline=$1-1; + if (defined $2) { + $realcnt=$3+1; + } else { + $realcnt=1+1; + } + $in_comment = 0; + + # Guestimate if this is a continuing comment. Run + # the context looking for a comment "edge". If this + # edge is a close comment then we must be in a comment + # at context start. + my $edge; + my $cnt = $realcnt; + for (my $ln = $linenr + 1; $cnt > 0; $ln++) { + next if (defined $rawlines[$ln - 1] && + $rawlines[$ln - 1] =~ /^-/); + $cnt--; + #print "RAW<$rawlines[$ln - 1]>\n"; + last if (!defined $rawlines[$ln - 1]); + if ($rawlines[$ln - 1] =~ m@(/\*|\*/)@ && + $rawlines[$ln - 1] !~ m@"[^"]*(?:/\*|\*/)[^"]*"@) { + ($edge) = $1; + last; + } + } + if (defined $edge && $edge eq '*/') { + $in_comment = 1; + } + + # Guestimate if this is a continuing comment. If this + # is the start of a diff block and this line starts + # ' *' then it is very likely a comment. + if (!defined $edge && + $rawlines[$linenr] =~ m@^.\s*(?:\*\*+| \*)(?:\s|$)@) + { + $in_comment = 1; + } + + ##print "COMMENT:$in_comment edge<$edge> $rawline\n"; + sanitise_line_reset($in_comment); + + } elsif ($realcnt && $rawline =~ /^(?:\+| |$)/) { + # Standardise the strings and chars within the input to + # simplify matching -- only bother with positive lines. + $line = sanitise_line($rawline); + } + push(@lines, $line); + + if ($realcnt > 1) { + $realcnt-- if ($line =~ /^(?:\+| |$)/); + } else { + $realcnt = 0; + } + + #print "==>$rawline\n"; + #print "-->$line\n"; + } + + $prefix = ''; + + $realcnt = 0; + $linenr = 0; + foreach my $line (@lines) { + $linenr++; + + my $rawline = $rawlines[$linenr - 1]; + +#extract the line range in the file after the patch is applied + if ($line=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { + $is_patch = 1; + $first_line = $linenr + 1; + $realline=$1-1; + if (defined $2) { + $realcnt=$3+1; + } else { + $realcnt=1+1; + } + annotate_reset(); + $prev_values = 'E'; + + %suppress_ifbraces = (); + %suppress_whiletrailers = (); + %suppress_export = (); + next; + +# track the line number as we move through the hunk, note that +# new versions of GNU diff omit the leading space on completely +# blank context lines so we need to count that too. + } elsif ($line =~ /^( |\+|$)/) { + $realline++; + $realcnt-- if ($realcnt != 0); + + # Measure the line length and indent. + ($length, $indent) = line_stats($rawline); + + # Track the previous line. + ($prevline, $stashline) = ($stashline, $line); + ($previndent, $stashindent) = ($stashindent, $indent); + ($prevrawline, $stashrawline) = ($stashrawline, $rawline); + + #warn "line<$line>\n"; + + } elsif ($realcnt == 1) { + $realcnt--; + } + + my $hunk_line = ($realcnt != 0); + +#make up the handle for any error we report on this line + $prefix = "$filename:$realline: " if ($emacs && $file); + $prefix = "$filename:$linenr: " if ($emacs && !$file); + + $here = "#$linenr: " if (!$file); + $here = "#$realline: " if ($file); + + # extract the filename as it passes + if ($line =~ /^diff --git.*?(\S+)$/) { + $realfile = $1; + $realfile =~ s@^([^/]*)/@@ if (!$file); + checkfilename($realfile, \$acpi_testexpected, \$acpi_nontestexpected); + } elsif ($line =~ /^\+\+\+\s+(\S+)/) { + $realfile = $1; + $realfile =~ s@^([^/]*)/@@ if (!$file); + checkfilename($realfile, \$acpi_testexpected, \$acpi_nontestexpected); + + $p1_prefix = $1; + if (!$file && $tree && $p1_prefix ne '' && + -e "$root/$p1_prefix") { + WARN("patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n"); + } + + next; + } + + $here .= "FILE: $realfile:$realline:" if ($realcnt != 0); + + my $hereline = "$here\n$rawline\n"; + my $herecurr = "$here\n$rawline\n"; + my $hereprev = "$here\n$prevrawline\n$rawline\n"; + + $cnt_lines++ if ($realcnt != 0); + +# Check for incorrect file permissions + if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) { + my $permhere = $here . "FILE: $realfile\n"; + if ($realfile =~ /(\bMakefile(?:\.objs)?|\.c|\.cc|\.cpp|\.h|\.mak|\.[sS])$/) { + ERROR("do not set execute permissions for source files\n" . $permhere); + } + } + +# Only allow Python 3 interpreter + if ($realline == 1 && + $line =~ /^\+#!\ *\/usr\/bin\/(?:env )?python$/) { + ERROR("please use python3 interpreter\n" . $herecurr); + } + +# Accept git diff extended headers as valid patches + if ($line =~ /^(?:rename|copy) (?:from|to) [\w\/\.\-]+\s*$/) { + $is_patch = 1; + } + + if ($line =~ /^(Author|From): .* via .*<qemu-devel\@nongnu.org>/) { + ERROR("Author email address is mangled by the mailing list\n" . $herecurr); + } + +#check the patch for a signoff: + if ($line =~ /^\s*signed-off-by:/i) { + # This is a signoff, if ugly, so do not double report. + $signoff++; + $in_commit_log = 0; + + if (!($line =~ /^\s*Signed-off-by:/)) { + ERROR("The correct form is \"Signed-off-by\"\n" . + $herecurr); + } + if ($line =~ /^\s*signed-off-by:\S/i) { + ERROR("space required after Signed-off-by:\n" . + $herecurr); + } + } + +# Check if MAINTAINERS is being updated. If so, there's probably no need to +# emit the "does MAINTAINERS need updating?" message on file add/move/delete + if ($line =~ /^\s*MAINTAINERS\s*\|/) { + $reported_maintainer_file = 1; + } + +# Check for added, moved or deleted files + if (!$reported_maintainer_file && !$in_commit_log && + ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ || + $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ || + ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ && + (defined($1) || defined($2)))) && + !(($realfile ne '') && + defined($acpi_testexpected) && + ($realfile eq $acpi_testexpected))) { + $reported_maintainer_file = 1; + WARN("added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr); + } + +# Check for wrappage within a valid hunk of the file + if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) { + ERROR("patch seems to be corrupt (line wrapped?)\n" . + $herecurr) if (!$emitted_corrupt++); + } + +# UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php + if (($realfile =~ /^$/ || $line =~ /^\+/) && + $rawline !~ m/^$UTF8*$/) { + my ($utf8_prefix) = ($rawline =~ /^($UTF8*)/); + + my $blank = copy_spacing($rawline); + my $ptr = substr($blank, 0, length($utf8_prefix)) . "^"; + my $hereptr = "$hereline$ptr\n"; + + ERROR("Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); + } + + if ($rawline =~ m/$UTF8_MOJIBAKE/) { + ERROR("Doubly-encoded UTF-8\n" . $herecurr); + } +# Check if it's the start of a commit log +# (not a header line and we haven't seen the patch filename) + if ($in_header_lines && $realfile =~ /^$/ && + !($rawline =~ /^\s+\S/ || + $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) { + $in_header_lines = 0; + $in_commit_log = 1; + } + +# Check if there is UTF-8 in a commit log when a mail header has explicitly +# declined it, i.e defined some charset where it is missing. + if ($in_header_lines && + $rawline =~ /^Content-Type:.+charset="(.+)".*$/ && + $1 !~ /utf-8/i) { + $non_utf8_charset = 1; + } + + if ($in_commit_log && $non_utf8_charset && $realfile =~ /^$/ && + $rawline =~ /$NON_ASCII_UTF8/) { + WARN("8-bit UTF-8 used in possible commit log\n" . $herecurr); + } + +# ignore non-hunk lines and lines being removed + next if (!$hunk_line || $line =~ /^-/); + +# ignore files that are being periodically imported from Linux + next if ($realfile =~ /^(linux-headers|include\/standard-headers)\//); + +#trailing whitespace + if ($line =~ /^\+.*\015/) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + ERROR("DOS line endings\n" . $herevet); + + } elsif ($realfile =~ /^docs\/.+\.txt/ || + $realfile =~ /^docs\/.+\.md/) { + if ($rawline =~ /^\+\s+$/ && $rawline !~ /^\+ {4}$/) { + # TODO: properly check we're in a code block + # (surrounding text is 4-column aligned) + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + ERROR("code blocks in documentation should have " . + "empty lines with exactly 4 columns of " . + "whitespace\n" . $herevet); + } + } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + ERROR("trailing whitespace\n" . $herevet); + $rpt_cleaners = 1; + } + +# checks for trace-events files + if ($realfile =~ /trace-events$/ && $line =~ /^\+/) { + if ($rawline =~ /%[-+ 0]*#/) { + ERROR("Don't use '#' flag of printf format ('%#') in " . + "trace-events, use '0x' prefix instead\n" . $herecurr); + } else { + my $hex = + qr/%[-+ *.0-9]*([hljztL]|ll|hh)?(x|X|"\s*PRI[xX][^"]*"?)/; + + # don't consider groups splitted by [.:/ ], like 2A.20:12ab + my $tmpline = $rawline; + $tmpline =~ s/($hex[.:\/ ])+$hex//g; + + if ($tmpline =~ /(?<!0x)$hex/) { + ERROR("Hex numbers must be prefixed with '0x'\n" . + $herecurr); + } + } + } + +# check we are in a valid source file if not then ignore this hunk + next if ($realfile !~ /$SrcFile/); + +#90 column limit; exempt URLs, if no other words on line + if ($line =~ /^\+/ && + !($line =~ /^\+\s*"[^"]*"\s*(?:\s*|,|\)\s*;)\s*$/) && + !($rawline =~ /^[^[:alnum:]]*https?:\S*$/) && + $length > 80) + { + if ($length > 90) { + ERROR("line over 90 characters\n" . $herecurr); + } else { + WARN("line over 80 characters\n" . $herecurr); + } + } + +# check for spaces before a quoted newline + if ($rawline =~ /^.*\".*\s\\n/) { + ERROR("unnecessary whitespace before a quoted newline\n" . $herecurr); + } + +# check for adding lines without a newline. + if ($line =~ /^\+/ && defined $lines[$linenr] && $lines[$linenr] =~ /^\\ No newline at end of file/) { + ERROR("adding a line without newline at end of file\n" . $herecurr); + } + +# check for RCS/CVS revision markers + if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|\b)/) { + ERROR("CVS style keyword markers, these will _not_ be updated\n". $herecurr); + } + +# tabs are only allowed in assembly source code, and in +# some scripts we imported from other projects. + next if ($realfile =~ /\.(s|S)$/); + next if ($realfile =~ /(checkpatch|get_maintainer)\.pl$/); + + if ($rawline =~ /^\+.*\t/) { + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + ERROR("code indent should never use tabs\n" . $herevet); + $rpt_cleaners = 1; + } + +# check we are in a valid C source file if not then ignore this hunk + next if ($realfile !~ /\.((h|c)(\.inc)?|cpp)$/); + +# Block comment styles + + # Block comments use /* on a line of its own + if ($rawline !~ m@^\+.*/\*.*\*/[ \t)}]*$@ && #inline /*...*/ + $rawline =~ m@^\+.*/\*\*?+[ \t]*[^ \t]@) { # /* or /** non-blank + WARN("Block comments use a leading /* on a separate line\n" . $herecurr); + } + +# Block comments use * on subsequent lines + if ($prevline =~ /$;[ \t]*$/ && #ends in comment + $prevrawline =~ /^\+.*?\/\*/ && #starting /* + $prevrawline !~ /\*\/[ \t]*$/ && #no trailing */ + $rawline =~ /^\+/ && #line is new + $rawline !~ /^\+[ \t]*\*/) { #no leading * + WARN("Block comments use * on subsequent lines\n" . $hereprev); + } + +# Block comments use */ on trailing lines + if ($rawline !~ m@^\+[ \t]*\*/[ \t]*$@ && #trailing */ + $rawline !~ m@^\+.*/\*.*\*/[ \t]*$@ && #inline /*...*/ + $rawline !~ m@^\+.*\*{2,}/[ \t]*$@ && #trailing **/ + $rawline =~ m@^\+[ \t]*.+\*\/[ \t]*$@) { #non blank */ + WARN("Block comments use a trailing */ on a separate line\n" . $herecurr); + } + +# Block comment * alignment + if ($prevline =~ /$;[ \t]*$/ && #ends in comment + $line =~ /^\+[ \t]*$;/ && #leading comment + $rawline =~ /^\+[ \t]*\*/ && #leading * + (($prevrawline =~ /^\+.*?\/\*/ && #leading /* + $prevrawline !~ /\*\/[ \t]*$/) || #no trailing */ + $prevrawline =~ /^\+[ \t]*\*/)) { #leading * + my $oldindent; + $prevrawline =~ m@^\+([ \t]*/?)\*@; + if (defined($1)) { + $oldindent = expand_tabs($1); + } else { + $prevrawline =~ m@^\+(.*/?)\*@; + $oldindent = expand_tabs($1); + } + $rawline =~ m@^\+([ \t]*)\*@; + my $newindent = $1; + $newindent = expand_tabs($newindent); + if (length($oldindent) ne length($newindent)) { + WARN("Block comments should align the * on each line\n" . $hereprev); + } + } + +# Check for potential 'bare' types + my ($stat, $cond, $line_nr_next, $remain_next, $off_next, + $realline_next); + if ($realcnt && $line =~ /.\s*\S/) { + ($stat, $cond, $line_nr_next, $remain_next, $off_next) = + ctx_statement_block($linenr, $realcnt, 0); + $stat =~ s/\n./\n /g; + $cond =~ s/\n./\n /g; + + # Find the real next line. + $realline_next = $line_nr_next; + if (defined $realline_next && + (!defined $lines[$realline_next - 1] || + substr($lines[$realline_next - 1], $off_next) =~ /^\s*$/)) { + $realline_next++; + } + + my $s = $stat; + $s =~ s/{.*$//s; + + # Ignore goto labels. + if ($s =~ /$Ident:\*$/s) { + + # Ignore functions being called + } elsif ($s =~ /^.\s*$Ident\s*\(/s) { + + } elsif ($s =~ /^.\s*else\b/s) { + + # declarations always start with types + } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) { + my $type = $1; + $type =~ s/\s+/ /g; + possible($type, "A:" . $s); + + # definitions in global scope can only start with types + } elsif ($s =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b\s*(?!:)/s) { + possible($1, "B:" . $s); + } + + # any (foo ... *) is a pointer cast, and foo is a type + while ($s =~ /\(($Ident)(?:\s+$Sparse)*[\s\*]+\s*\)/sg) { + possible($1, "C:" . $s); + } + + # Check for any sort of function declaration. + # int foo(something bar, other baz); + # void (*store_gdt)(x86_descr_ptr *); + if ($prev_values eq 'E' && $s =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/s) { + my ($name_len) = length($1); + + my $ctx = $s; + substr($ctx, 0, $name_len + 1, ''); + $ctx =~ s/\)[^\)]*$//; + + for my $arg (split(/\s*,\s*/, $ctx)) { + if ($arg =~ /^(?:const\s+)?($Ident)(?:\s+$Sparse)*\s*\**\s*(:?\b$Ident)?$/s || $arg =~ /^($Ident)$/s) { + + possible($1, "D:" . $s); + } + } + } + + } + +# +# Checks which may be anchored in the context. +# + +# Check for switch () and associated case and default +# statements should be at the same indent. + if ($line=~/\bswitch\s*\(.*\)/) { + my $err = ''; + my $sep = ''; + my @ctx = ctx_block_outer($linenr, $realcnt); + shift(@ctx); + for my $ctx (@ctx) { + my ($clen, $cindent) = line_stats($ctx); + if ($ctx =~ /^\+\s*(case\s+|default:)/ && + $indent != $cindent) { + $err .= "$sep$ctx\n"; + $sep = ''; + } else { + $sep = "[...]\n"; + } + } + if ($err ne '') { + ERROR("switch and case should be at the same indent\n$hereline$err"); + } + } + +# if/while/etc brace do not go on next line, unless defining a do while loop, +# or if that brace on the next line is for something else + if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) { + my $pre_ctx = "$1$2"; + + my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0); + my $ctx_cnt = $realcnt - $#ctx - 1; + my $ctx = join("\n", @ctx); + + my $ctx_ln = $linenr; + my $ctx_skip = $realcnt; + + while ($ctx_skip > $ctx_cnt || ($ctx_skip == $ctx_cnt && + defined $lines[$ctx_ln - 1] && + $lines[$ctx_ln - 1] =~ /^-/)) { + ##print "SKIP<$ctx_skip> CNT<$ctx_cnt>\n"; + $ctx_skip-- if (!defined $lines[$ctx_ln - 1] || $lines[$ctx_ln - 1] !~ /^-/); + $ctx_ln++; + } + + #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n"; + #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n"; + + # The length of the "previous line" is checked against 80 because it + # includes the + at the beginning of the line (if the actual line has + # 79 or 80 characters, it is no longer possible to add a space and an + # opening brace there) + if ($#ctx == 0 && $ctx !~ /{\s*/ && + defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*\{/ && + defined($lines[$ctx_ln - 2]) && length($lines[$ctx_ln - 2]) < 80) { + ERROR("that open brace { should be on the previous line\n" . + "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); + } + if ($level == 0 && $pre_ctx !~ /}\s*while\s*\($/ && + $ctx =~ /\)\s*\;\s*$/ && + defined $lines[$ctx_ln - 1]) + { + my ($nlength, $nindent) = line_stats($lines[$ctx_ln - 1]); + if ($nindent > $indent) { + ERROR("trailing semicolon indicates no statements, indent implies otherwise\n" . + "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); + } + } + } + +# 'do ... while (0/false)' only makes sense in macros, without trailing ';' + if ($line =~ /while\s*\((0|false)\);/) { + ERROR("suspicious ; after while (0)\n" . $herecurr); + } + +# Check superfluous trailing ';' + if ($line =~ /;;$/) { + ERROR("superfluous trailing semicolon\n" . $herecurr); + } + +# Check relative indent for conditionals and blocks. + if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) { + my ($s, $c) = ($stat, $cond); + + substr($s, 0, length($c), ''); + + # Make sure we remove the line prefixes as we have + # none on the first line, and are going to re-add them + # where necessary. + $s =~ s/\n./\n/gs; + + # Find out how long the conditional actually is. + my @newlines = ($c =~ /\n/gs); + my $cond_lines = 1 + $#newlines; + + # We want to check the first line inside the block + # starting at the end of the conditional, so remove: + # 1) any blank line termination + # 2) any opening brace { on end of the line + # 3) any do (...) { + my $continuation = 0; + my $check = 0; + $s =~ s/^.*\bdo\b//; + $s =~ s/^\s*\{//; + if ($s =~ s/^\s*\\//) { + $continuation = 1; + } + if ($s =~ s/^\s*?\n//) { + $check = 1; + $cond_lines++; + } + + # Also ignore a loop construct at the end of a + # preprocessor statement. + if (($prevline =~ /^.\s*#\s*define\s/ || + $prevline =~ /\\\s*$/) && $continuation == 0) { + $check = 0; + } + + my $cond_ptr = -1; + $continuation = 0; + while ($cond_ptr != $cond_lines) { + $cond_ptr = $cond_lines; + + # If we see an #else/#elif then the code + # is not linear. + if ($s =~ /^\s*\#\s*(?:else|elif)/) { + $check = 0; + } + + # Ignore: + # 1) blank lines, they should be at 0, + # 2) preprocessor lines, and + # 3) labels. + if ($continuation || + $s =~ /^\s*?\n/ || + $s =~ /^\s*#\s*?/ || + $s =~ /^\s*$Ident\s*:/) { + $continuation = ($s =~ /^.*?\\\n/) ? 1 : 0; + if ($s =~ s/^.*?\n//) { + $cond_lines++; + } + } + } + + my (undef, $sindent) = line_stats("+" . $s); + my $stat_real = raw_line($linenr, $cond_lines); + + # Check if either of these lines are modified, else + # this is not this patch's fault. + if (!defined($stat_real) || + $stat !~ /^\+/ && $stat_real !~ /^\+/) { + $check = 0; + } + if (defined($stat_real) && $cond_lines > 1) { + $stat_real = "[...]\n$stat_real"; + } + + #print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s> cond_lines<$cond_lines> stat_real<$stat_real> stat<$stat>\n"; + + if ($check && (($sindent % 4) != 0 || + ($sindent <= $indent && $s ne ''))) { + ERROR("suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n"); + } + } + + # Track the 'values' across context and added lines. + my $opline = $line; $opline =~ s/^./ /; + my ($curr_values, $curr_vars) = + annotate_values($opline . "\n", $prev_values); + $curr_values = $prev_values . $curr_values; + if ($dbg_values) { + my $outline = $opline; $outline =~ s/\t/ /g; + print "$linenr > .$outline\n"; + print "$linenr > $curr_values\n"; + print "$linenr > $curr_vars\n"; + } + $prev_values = substr($curr_values, -1); + +#ignore lines not being added + if ($line=~/^[^\+]/) {next;} + +# TEST: allow direct testing of the type matcher. + if ($dbg_type) { + if ($line =~ /^.\s*$Declare\s*$/) { + ERROR("TEST: is type\n" . $herecurr); + } elsif ($dbg_type > 1 && $line =~ /^.+($Declare)/) { + ERROR("TEST: is not type ($1 is)\n". $herecurr); + } + next; + } +# TEST: allow direct testing of the attribute matcher. + if ($dbg_attr) { + if ($line =~ /^.\s*$Modifier\s*$/) { + ERROR("TEST: is attr\n" . $herecurr); + } elsif ($dbg_attr > 1 && $line =~ /^.+($Modifier)/) { + ERROR("TEST: is not attr ($1 is)\n". $herecurr); + } + next; + } + +# check for initialisation to aggregates open brace on the next line + if ($line =~ /^.\s*\{/ && + $prevline =~ /(?:^|[^=])=\s*$/) { + ERROR("that open brace { should be on the previous line\n" . $hereprev); + } + +# +# Checks which are anchored on the added line. +# + +# check for malformed paths in #include statements (uses RAW line) + if ($rawline =~ m{^.\s*\#\s*include\s+[<"](.*)[">]}) { + my $path = $1; + if ($path =~ m{//}) { + ERROR("malformed #include filename\n" . + $herecurr); + } + } + +# no C99 // comments + if ($line =~ m{//} && + $rawline !~ m{// SPDX-License-Identifier: }) { + ERROR("do not use C99 // comments\n" . $herecurr); + } + # Remove C99 comments. + $line =~ s@//.*@@; + $opline =~ s@//.*@@; + +# check for global initialisers. + if ($line =~ /^.$Type\s*$Ident\s*(?:\s+$Modifier)*\s*=\s*(0|NULL|false)\s*;/) { + ERROR("do not initialise globals to 0 or NULL\n" . + $herecurr); + } +# check for static initialisers. + if ($line =~ /\bstatic\s.*=\s*(0|NULL|false)\s*;/) { + ERROR("do not initialise statics to 0 or NULL\n" . + $herecurr); + } + +# * goes on variable not on type + # (char*[ const]) + if ($line =~ m{\($NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)\)}) { + my ($from, $to) = ($1, $1); + + # Should start with a space. + $to =~ s/^(\S)/ $1/; + # Should not end with a space. + $to =~ s/\s+$//; + # '*'s should not have spaces between. + while ($to =~ s/\*\s+\*/\*\*/) { + } + + #print "from<$from> to<$to>\n"; + if ($from ne $to) { + ERROR("\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr); + } + } elsif ($line =~ m{\b$NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)($Ident)}) { + my ($from, $to, $ident) = ($1, $1, $2); + + # Should start with a space. + $to =~ s/^(\S)/ $1/; + # Should not end with a space. + $to =~ s/\s+$//; + # '*'s should not have spaces between. + while ($to =~ s/\*\s+\*/\*\*/) { + } + # Modifiers should have spaces. + $to =~ s/(\b$Modifier$)/$1 /; + + #print "from<$from> to<$to> ident<$ident>\n"; + if ($from ne $to && $ident !~ /^$Modifier$/) { + ERROR("\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr); + } + } + +# function brace can't be on same line, except for #defines of do while, +# or if closed on same line + if (($line=~/$Type\s*$Ident\(.*\).*\s\{/) and + !($line=~/\#\s*define.*do\s\{/) and !($line=~/}/)) { + ERROR("open brace '{' following function declarations go on the next line\n" . $herecurr); + } + +# open braces for enum, union and struct go on the same line. + if ($line =~ /^.\s*\{/ && + $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) { + ERROR("open brace '{' following $1 go on the same line\n" . $hereprev); + } + +# missing space after union, struct or enum definition + if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?(?:\s+$Ident)?[=\{]/) { + ERROR("missing space after $1 definition\n" . $herecurr); + } + +# check for spacing round square brackets; allowed: +# 1. with a type on the left -- int [] a; +# 2. at the beginning of a line for slice initialisers -- [0...10] = 5, +# 3. inside a curly brace -- = { [0...10] = 5 } +# 4. after a comma -- [1] = 5, [2] = 6 +# 5. in a macro definition -- #define abc(x) [x] = y + while ($line =~ /(.*?\s)\[/g) { + my ($where, $prefix) = ($-[1], $1); + if ($prefix !~ /$Type\s+$/ && + ($where != 0 || $prefix !~ /^.\s+$/) && + $prefix !~ /\#\s*define[^(]*\([^)]*\)\s+$/ && + $prefix !~ /[,{:]\s+$/) { + ERROR("space prohibited before open square bracket '['\n" . $herecurr); + } + } + +# check for spaces between functions and their parentheses. + while ($line =~ /($Ident)\s+\(/g) { + my $name = $1; + my $ctx_before = substr($line, 0, $-[1]); + my $ctx = "$ctx_before$name"; + + # Ignore those directives where spaces _are_ permitted. + if ($name =~ /^(?: + if|for|while|switch|return|case| + volatile|__volatile__|coroutine_fn| + __attribute__|format|__extension__| + asm|__asm__)$/x) + { + + # Ignore 'catch (...)' in C++ + } elsif ($name =~ /^catch$/ && $realfile =~ /(\.cpp|\.h)$/) { + + # cpp #define statements have non-optional spaces, ie + # if there is a space between the name and the open + # parenthesis it is simply not a parameter group. + } elsif ($ctx_before =~ /^.\s*\#\s*define\s*$/) { + + # cpp #elif statement condition may start with a ( + } elsif ($ctx =~ /^.\s*\#\s*elif\s*$/) { + + # If this whole things ends with a type its most + # likely a typedef for a function. + } elsif ($ctx =~ /$Type$/) { + + } else { + ERROR("space prohibited between function name and open parenthesis '('\n" . $herecurr); + } + } +# Check operator spacing. + if (!($line=~/\#\s*include/)) { + my $ops = qr{ + <<=|>>=|<=|>=|==|!=| + \+=|-=|\*=|\/=|%=|\^=|\|=|&=| + =>|->|<<|>>|<|>|=|!|~| + &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%| + \?|::|: + }x; + my @elements = split(/($ops|;)/, $opline); + my $off = 0; + + my $blank = copy_spacing($opline); + + for (my $n = 0; $n < $#elements; $n += 2) { + $off += length($elements[$n]); + + # Pick up the preceding and succeeding characters. + my $ca = substr($opline, 0, $off); + my $cc = ''; + if (length($opline) >= ($off + length($elements[$n + 1]))) { + $cc = substr($opline, $off + length($elements[$n + 1])); + } + my $cb = "$ca$;$cc"; + + my $a = ''; + $a = 'V' if ($elements[$n] ne ''); + $a = 'W' if ($elements[$n] =~ /\s$/); + $a = 'C' if ($elements[$n] =~ /$;$/); + $a = 'B' if ($elements[$n] =~ /(\[|\()$/); + $a = 'O' if ($elements[$n] eq ''); + $a = 'E' if ($ca =~ /^\s*$/); + + my $op = $elements[$n + 1]; + + my $c = ''; + if (defined $elements[$n + 2]) { + $c = 'V' if ($elements[$n + 2] ne ''); + $c = 'W' if ($elements[$n + 2] =~ /^\s/); + $c = 'C' if ($elements[$n + 2] =~ /^$;/); + $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/); + $c = 'O' if ($elements[$n + 2] eq ''); + $c = 'E' if ($elements[$n + 2] =~ /^\s*\\$/); + } else { + $c = 'E'; + } + + my $ctx = "${a}x${c}"; + + my $at = "(ctx:$ctx)"; + + my $ptr = substr($blank, 0, $off) . "^"; + my $hereptr = "$hereline$ptr\n"; + + # Pull out the value of this operator. + my $op_type = substr($curr_values, $off + 1, 1); + + # Get the full operator variant. + my $opv = $op . substr($curr_vars, $off, 1); + + # Ignore operators passed as parameters. + if ($op_type ne 'V' && + $ca =~ /\s$/ && $cc =~ /^\s*,/) { + +# # Ignore comments +# } elsif ($op =~ /^$;+$/) { + + # ; should have either the end of line or a space or \ after it + } elsif ($op eq ';') { + if ($ctx !~ /.x[WEBC]/ && + $cc !~ /^\\/ && $cc !~ /^;/) { + ERROR("space required after that '$op' $at\n" . $hereptr); + } + + # // is a comment + } elsif ($op eq '//') { + + # Ignore : used in class declaration in C++ + } elsif ($opv eq ':B' && $ctx =~ /Wx[WE]/ && + $line =~ /class/ && $realfile =~ /(\.cpp|\.h)$/) { + + # No spaces for: + # -> + # : when part of a bitfield + } elsif ($op eq '->' || $opv eq ':B') { + if ($ctx =~ /Wx.|.xW/) { + ERROR("spaces prohibited around that '$op' $at\n" . $hereptr); + } + + # , must have a space on the right. + # not required when having a single },{ on one line + } elsif ($op eq ',') { + if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/ && + ($elements[$n] . $elements[$n + 2]) !~ " *}\\{") { + ERROR("space required after that '$op' $at\n" . $hereptr); + } + + # '*' as part of a type definition -- reported already. + } elsif ($opv eq '*_') { + #warn "'*' is part of type\n"; + + # unary operators should have a space before and + # none after. May be left adjacent to another + # unary operator, or a cast + } elsif ($op eq '!' || $op eq '~' || + $opv eq '*U' || $opv eq '-U' || + $opv eq '&U' || $opv eq '&&U') { + if ($op eq '~' && $ca =~ /::$/ && $realfile =~ /(\.cpp|\.h)$/) { + # '~' used as a name of Destructor + + } elsif ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { + ERROR("space required before that '$op' $at\n" . $hereptr); + } + if ($op eq '*' && $cc =~/\s*$Modifier\b/) { + # A unary '*' may be const + + } elsif ($ctx =~ /.xW/) { + ERROR("space prohibited after that '$op' $at\n" . $hereptr); + } + + # unary ++ and unary -- are allowed no space on one side. + } elsif ($op eq '++' or $op eq '--') { + if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) { + ERROR("space required one side of that '$op' $at\n" . $hereptr); + } + if ($ctx =~ /Wx[BE]/ || + ($ctx =~ /Wx./ && $cc =~ /^;/)) { + ERROR("space prohibited before that '$op' $at\n" . $hereptr); + } + if ($ctx =~ /ExW/) { + ERROR("space prohibited after that '$op' $at\n" . $hereptr); + } + + # A colon needs no spaces before when it is + # terminating a case value or a label. + } elsif ($opv eq ':C' || $opv eq ':L') { + if ($ctx =~ /Wx./) { + ERROR("space prohibited before that '$op' $at\n" . $hereptr); + } + + # All the others need spaces both sides. + } elsif ($ctx !~ /[EWC]x[CWE]/) { + my $ok = 0; + + if ($realfile =~ /\.cpp|\.h$/) { + # Ignore template arguments <...> in C++ + if (($op eq '<' || $op eq '>') && $line =~ /<.*>/) { + $ok = 1; + } + + # Ignore :: in C++ + if ($op eq '::') { + $ok = 1; + } + } + + # Ignore email addresses <foo@bar> + if (($op eq '<' && + $cc =~ /^\S+\@\S+>/) || + ($op eq '>' && + $ca =~ /<\S+\@\S+$/)) + { + $ok = 1; + } + + # Ignore ?: + if (($opv eq ':O' && $ca =~ /\?$/) || + ($op eq '?' && $cc =~ /^:/)) { + $ok = 1; + } + + if ($ok == 0) { + ERROR("spaces required around that '$op' $at\n" . $hereptr); + } + } + $off += length($elements[$n + 1]); + } + } + +#need space before brace following if, while, etc + if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) || + $line =~ /do\{/) { + ERROR("space required before the open brace '{'\n" . $herecurr); + } + +# closing brace should have a space following it when it has anything +# on the line + if ($line =~ /}(?!(?:,|;|\)))\S/) { + ERROR("space required after that close brace '}'\n" . $herecurr); + } + +# check spacing on square brackets + if ($line =~ /\[\s/ && $line !~ /\[\s*$/) { + ERROR("space prohibited after that open square bracket '['\n" . $herecurr); + } + if ($line =~ /\s\]/) { + ERROR("space prohibited before that close square bracket ']'\n" . $herecurr); + } + +# check spacing on parentheses + if ($line =~ /\(\s/ && $line !~ /\(\s*(?:\\)?$/ && + $line !~ /for\s*\(\s+;/) { + ERROR("space prohibited after that open parenthesis '('\n" . $herecurr); + } + if ($line =~ /(\s+)\)/ && $line !~ /^.\s*\)/ && + $line !~ /for\s*\(.*;\s+\)/ && + $line !~ /:\s+\)/) { + ERROR("space prohibited before that close parenthesis ')'\n" . $herecurr); + } + +# Return is not a function. + if (defined($stat) && $stat =~ /^.\s*return(\s*)(\(.*);/s) { + my $spacing = $1; + my $value = $2; + + # Flatten any parentheses + $value =~ s/\(/ \(/g; + $value =~ s/\)/\) /g; + while ($value =~ s/\[[^\{\}]*\]/1/ || + $value !~ /(?:$Ident|-?$Constant)\s* + $Compare\s* + (?:$Ident|-?$Constant)/x && + $value =~ s/\([^\(\)]*\)/1/) { + } +#print "value<$value>\n"; + if ($value =~ /^\s*(?:$Ident|-?$Constant)\s*$/ && + $line =~ /;$/) { + ERROR("return is not a function, parentheses are not required\n" . $herecurr); + + } elsif ($spacing !~ /\s+/) { + ERROR("space required before the open parenthesis '('\n" . $herecurr); + } + } +# Return of what appears to be an errno should normally be -'ve + if ($line =~ /^.\s*return\s*(E[A-Z]*)\s*;/) { + my $name = $1; + if ($name ne 'EOF' && $name ne 'ERROR') { + ERROR("return of an errno should typically be -ve (return -$1)\n" . $herecurr); + } + } + + if ($line =~ /^.\s*(Q(?:S?LIST|SIMPLEQ|TAILQ)_HEAD)\s*\(\s*[^,]/ && + $line !~ /^.typedef/) { + ERROR("named $1 should be typedefed separately\n" . $herecurr); + } + +# Need a space before open parenthesis after if, while etc + if ($line=~/\b(if|while|for|switch)\(/) { + ERROR("space required before the open parenthesis '('\n" . $herecurr); + } + +# Check for illegal assignment in if conditional -- and check for trailing +# statements after the conditional. + if ($line =~ /do\s*(?!{)/) { + my ($stat_next) = ctx_statement_block($line_nr_next, + $remain_next, $off_next); + $stat_next =~ s/\n./\n /g; + ##print "stat<$stat> stat_next<$stat_next>\n"; + + if ($stat_next =~ /^\s*while\b/) { + # If the statement carries leading newlines, + # then count those as offsets. + my ($whitespace) = + ($stat_next =~ /^((?:\s*\n[+-])*\s*)/s); + my $offset = + statement_rawlines($whitespace) - 1; + + $suppress_whiletrailers{$line_nr_next + + $offset} = 1; + } + } + if (!defined $suppress_whiletrailers{$linenr} && + $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) { + my ($s, $c) = ($stat, $cond); + + if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) { + ERROR("do not use assignment in if condition\n" . $herecurr); + } + + # Find out what is on the end of the line after the + # conditional. + substr($s, 0, length($c), ''); + $s =~ s/\n.*//g; + $s =~ s/$;//g; # Remove any comments + if (length($c) && $s !~ /^\s*{?\s*\\*\s*$/ && + $c !~ /}\s*while\s*/) + { + # Find out how long the conditional actually is. + my @newlines = ($c =~ /\n/gs); + my $cond_lines = 1 + $#newlines; + my $stat_real = ''; + + $stat_real = raw_line($linenr, $cond_lines) + . "\n" if ($cond_lines); + if (defined($stat_real) && $cond_lines > 1) { + $stat_real = "[...]\n$stat_real"; + } + + ERROR("trailing statements should be on next line\n" . $herecurr . $stat_real); + } + } + +# Check for bitwise tests written as boolean + if ($line =~ / + (?: + (?:\[|\(|\&\&|\|\|) + \s*0[xX][0-9]+\s* + (?:\&\&|\|\|) + | + (?:\&\&|\|\|) + \s*0[xX][0-9]+\s* + (?:\&\&|\|\||\)|\]) + )/x) + { + ERROR("boolean test with hexadecimal, perhaps just 1 \& or \|?\n" . $herecurr); + } + +# if and else should not have general statements after it + if ($line =~ /^.\s*(?:}\s*)?else\b(.*)/) { + my $s = $1; + $s =~ s/$;//g; # Remove any comments + if ($s !~ /^\s*(?:\sif|(?:{|)\s*\\?\s*$)/) { + ERROR("trailing statements should be on next line\n" . $herecurr); + } + } +# if should not continue a brace + if ($line =~ /}\s*if\b/) { + ERROR("trailing statements should be on next line\n" . + $herecurr); + } +# case and default should not have general statements after them + if ($line =~ /^.\s*(?:case\s*.*|default\s*):/g && + $line !~ /\G(?: + (?:\s*$;*)(?:\s*{)?(?:\s*$;*)(?:\s*\\)?\s*$| + \s*return\s+ + )/xg) + { + ERROR("trailing statements should be on next line\n" . $herecurr); + } + + # Check for }<nl>else {, these must be at the same + # indent level to be relevant to each other. + if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and + $previndent == $indent) { + ERROR("else should follow close brace '}'\n" . $hereprev); + } + + if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and + $previndent == $indent) { + my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0); + + # Find out what is on the end of the line after the + # conditional. + substr($s, 0, length($c), ''); + $s =~ s/\n.*//g; + + if ($s =~ /^\s*;/) { + ERROR("while should follow close brace '}'\n" . $hereprev); + } + } + +#studly caps, commented out until figure out how to distinguish between use of existing and adding new +# if (($line=~/[\w_][a-z\d]+[A-Z]/) and !($line=~/print/)) { +# print "No studly caps, use _\n"; +# print "$herecurr"; +# $clean = 0; +# } + +#no spaces allowed after \ in define + if ($line=~/\#\s*define.*\\\s$/) { + ERROR("Whitespace after \\ makes next lines useless\n" . $herecurr); + } + +# multi-statement macros should be enclosed in a do while loop, grab the +# first statement and ensure its the whole macro if its not enclosed +# in a known good container + if ($realfile !~ m@/vmlinux.lds.h$@ && + $line =~ /^.\s*\#\s*define\s*$Ident(\()?/) { + my $ln = $linenr; + my $cnt = $realcnt; + my ($off, $dstat, $dcond, $rest); + my $ctx = ''; + + my $args = defined($1); + + # Find the end of the macro and limit our statement + # search to that. + while ($cnt > 0 && defined $lines[$ln - 1] && + $lines[$ln - 1] =~ /^(?:-|..*\\$)/) + { + $ctx .= $rawlines[$ln - 1] . "\n"; + $cnt-- if ($lines[$ln - 1] !~ /^-/); + $ln++; + } + $ctx .= $rawlines[$ln - 1]; + + ($dstat, $dcond, $ln, $cnt, $off) = + ctx_statement_block($linenr, $ln - $linenr + 1, 0); + #print "dstat<$dstat> dcond<$dcond> cnt<$cnt> off<$off>\n"; + #print "LINE<$lines[$ln-1]> len<" . length($lines[$ln-1]) . "\n"; + + # Extract the remainder of the define (if any) and + # rip off surrounding spaces, and trailing \'s. + $rest = ''; + while ($off != 0 || ($cnt > 0 && $rest =~ /\\\s*$/)) { + #print "ADDING cnt<$cnt> $off <" . substr($lines[$ln - 1], $off) . "> rest<$rest>\n"; + if ($off != 0 || $lines[$ln - 1] !~ /^-/) { + $rest .= substr($lines[$ln - 1], $off) . "\n"; + $cnt--; + } + $ln++; + $off = 0; + } + $rest =~ s/\\\n.//g; + $rest =~ s/^\s*//s; + $rest =~ s/\s*$//s; + + # Clean up the original statement. + if ($args) { + substr($dstat, 0, length($dcond), ''); + } else { + $dstat =~ s/^.\s*\#\s*define\s+$Ident\s*//; + } + $dstat =~ s/$;//g; + $dstat =~ s/\\\n.//g; + $dstat =~ s/^\s*//s; + $dstat =~ s/\s*$//s; + + # Flatten any parentheses and braces + while ($dstat =~ s/\([^\(\)]*\)/1/ || + $dstat =~ s/\{[^\{\}]*\}/1/ || + $dstat =~ s/\[[^\{\}]*\]/1/) + { + } + + my $exceptions = qr{ + $Declare| + module_param_named| + MODULE_PARAM_DESC| + DECLARE_PER_CPU| + DEFINE_PER_CPU| + __typeof__\(| + union| + struct| + \.$Ident\s*=\s*| + ^\"|\"$ + }x; + #print "REST<$rest> dstat<$dstat> ctx<$ctx>\n"; + if ($rest ne '' && $rest ne ',') { + if ($rest !~ /while\s*\(/ && + $dstat !~ /$exceptions/) + { + ERROR("Macros with multiple statements should be enclosed in a do - while loop\n" . "$here\n$ctx\n"); + } + + } elsif ($ctx !~ /;/) { + if ($dstat ne '' && + $dstat !~ /^(?:$Ident|-?$Constant)$/ && + $dstat !~ /$exceptions/ && + $dstat !~ /^\.$Ident\s*=/ && + $dstat =~ /$Operators/) + { + ERROR("Macros with complex values should be enclosed in parenthesis\n" . "$here\n$ctx\n"); + } + } + } + +# check for missing bracing around if etc + if ($line =~ /(^.*)\b(?:if|while|for)\b/ && + $line !~ /\#\s*if/) { + my $allowed = 0; + + # Check the pre-context. + if ($line =~ /(\}.*?)$/) { + my $pre = $1; + + if ($line !~ /else/) { + print "APW: ALLOWED: pre<$pre> line<$line>\n" + if $dbg_adv_apw; + $allowed = 1; + } + } + my ($level, $endln, @chunks) = + ctx_statement_full($linenr, $realcnt, 1); + if ($dbg_adv_apw) { + print "APW: chunks<$#chunks> linenr<$linenr> endln<$endln> level<$level>\n"; + print "APW: <<$chunks[1][0]>><<$chunks[1][1]>>\n" + if $#chunks >= 1; + } + if ($#chunks >= 0 && $level == 0) { + my $seen = 0; + my $herectx = $here . "\n"; + my $ln = $linenr - 1; + for my $chunk (@chunks) { + my ($cond, $block) = @{$chunk}; + + # If the condition carries leading newlines, then count those as offsets. + my ($whitespace) = ($cond =~ /^((?:\s*\n[+-])*\s*)/s); + my $offset = statement_rawlines($whitespace) - 1; + + #print "COND<$cond> whitespace<$whitespace> offset<$offset>\n"; + + # We have looked at and allowed this specific line. + $suppress_ifbraces{$ln + $offset} = 1; + + $herectx .= "$rawlines[$ln + $offset]\n[...]\n"; + $ln += statement_rawlines($block) - 1; + + substr($block, 0, length($cond), ''); + + my $spaced_block = $block; + $spaced_block =~ s/\n\+/ /g; + + $seen++ if ($spaced_block =~ /^\s*\{/); + + print "APW: cond<$cond> block<$block> allowed<$allowed>\n" + if $dbg_adv_apw; + if (statement_lines($cond) > 1) { + print "APW: ALLOWED: cond<$cond>\n" + if $dbg_adv_apw; + $allowed = 1; + } + if ($block =~/\b(?:if|for|while)\b/) { + print "APW: ALLOWED: block<$block>\n" + if $dbg_adv_apw; + $allowed = 1; + } + if (statement_block_size($block) > 1) { + print "APW: ALLOWED: lines block<$block>\n" + if $dbg_adv_apw; + $allowed = 1; + } + } + if ($seen != ($#chunks + 1) && !$allowed) { + ERROR("braces {} are necessary for all arms of this statement\n" . $herectx); + } + } + } + if (!defined $suppress_ifbraces{$linenr - 1} && + $line =~ /\b(if|while|for|else)\b/ && + $line !~ /\#\s*if/ && + $line !~ /\#\s*else/) { + my $allowed = 0; + + # Check the pre-context. + if (substr($line, 0, $-[0]) =~ /(\}\s*)$/) { + my $pre = $1; + + if ($line !~ /else/) { + print "APW: ALLOWED: pre<$pre> line<$line>\n" + if $dbg_adv_apw; + $allowed = 1; + } + } + + my ($level, $endln, @chunks) = + ctx_statement_full($linenr, $realcnt, $-[0]); + + # Check the condition. + my ($cond, $block) = @{$chunks[0]}; + print "CHECKING<$linenr> cond<$cond> block<$block>\n" + if $dbg_adv_checking; + if (defined $cond) { + substr($block, 0, length($cond), ''); + } + if (statement_lines($cond) > 1) { + print "APW: ALLOWED: cond<$cond>\n" + if $dbg_adv_apw; + $allowed = 1; + } + if ($block =~/\b(?:if|for|while)\b/) { + print "APW: ALLOWED: block<$block>\n" + if $dbg_adv_apw; + $allowed = 1; + } + if (statement_block_size($block) > 1) { + print "APW: ALLOWED: lines block<$block>\n" + if $dbg_adv_apw; + $allowed = 1; + } + # Check the post-context. + if (defined $chunks[1]) { + my ($cond, $block) = @{$chunks[1]}; + if (defined $cond) { + substr($block, 0, length($cond), ''); + } + if ($block =~ /^\s*\{/) { + print "APW: ALLOWED: chunk-1 block<$block>\n" + if $dbg_adv_apw; + $allowed = 1; + } + } + print "DCS: level=$level block<$block> allowed=$allowed\n" + if $dbg_adv_dcs; + if ($level == 0 && $block !~ /^\s*\{/ && !$allowed) { + my $herectx = $here . "\n";; + my $cnt = statement_rawlines($block); + + for (my $n = 0; $n < $cnt; $n++) { + $herectx .= raw_line($linenr, $n) . "\n";; + } + + ERROR("braces {} are necessary even for single statement blocks\n" . $herectx); + } + } + +# no volatiles please + my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b}; + if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/ && + $line !~ /sig_atomic_t/ && + !ctx_has_comment($first_line, $linenr)) { + my $msg = "Use of volatile is usually wrong, please add a comment\n" . $herecurr; + ERROR($msg); + } + +# warn about #if 0 + if ($line =~ /^.\s*\#\s*if\s+0\b/) { + ERROR("if this code is redundant consider removing it\n" . + $herecurr); + } + +# check for needless g_free() checks + if ($prevline =~ /\bif\s*\(([^\)]*)\)/) { + my $expr = $1; + if ($line =~ /\bg_free\(\Q$expr\E\);/) { + ERROR("g_free(NULL) is safe this check is probably not required\n" . $hereprev); + } + } + +# warn about #ifdefs in C files +# if ($line =~ /^.\s*\#\s*if(|n)def/ && ($realfile =~ /\.c$/)) { +# print "#ifdef in C files should be avoided\n"; +# print "$herecurr"; +# $clean = 0; +# } + +# warn about spacing in #ifdefs + if ($line =~ /^.\s*\#\s*(ifdef|ifndef|elif)\s\s+/) { + ERROR("exactly one space required after that #$1\n" . $herecurr); + } +# check for memory barriers without a comment. + if ($line =~ /\b(smp_mb|smp_rmb|smp_wmb|smp_read_barrier_depends)\(/) { + if (!ctx_has_comment($first_line, $linenr)) { + ERROR("memory barrier without comment\n" . $herecurr); + } + } +# check of hardware specific defines +# we have e.g. CONFIG_LINUX and CONFIG_WIN32 for common cases +# where they might be necessary. + if ($line =~ m@^.\s*\#\s*if.*\b__@) { + WARN("architecture specific defines should be avoided\n" . $herecurr); + } + +# Check that the storage class is at the beginning of a declaration + if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) { + ERROR("storage class should be at the beginning of the declaration\n" . $herecurr) + } + +# check the location of the inline attribute, that it is between +# storage class and type. + if ($line =~ /\b$Type\s+$Inline\b/ || + $line =~ /\b$Inline\s+$Storage\b/) { + ERROR("inline keyword should sit between storage class and type\n" . $herecurr); + } + +# check for sizeof(&) + if ($line =~ /\bsizeof\s*\(\s*\&/) { + ERROR("sizeof(& should be avoided\n" . $herecurr); + } + +# check for new externs in .c files. + if ($realfile =~ /\.c$/ && defined $stat && + $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s) + { + my $function_name = $1; + my $paren_space = $2; + + my $s = $stat; + if (defined $cond) { + substr($s, 0, length($cond), ''); + } + if ($s =~ /^\s*;/ && + $function_name ne 'uninitialized_var') + { + ERROR("externs should be avoided in .c files\n" . $herecurr); + } + + if ($paren_space =~ /\n/) { + ERROR("arguments for function declarations should follow identifier\n" . $herecurr); + } + + } elsif ($realfile =~ /\.c$/ && defined $stat && + $stat =~ /^.\s*extern\s+/) + { + ERROR("externs should be avoided in .c files\n" . $herecurr); + } + +# check for pointless casting of g_malloc return + if ($line =~ /\*\s*\)\s*g_(try)?(m|re)alloc(0?)(_n)?\b/) { + if ($2 == 'm') { + ERROR("unnecessary cast may hide bugs, use g_$1new$3 instead\n" . $herecurr); + } else { + ERROR("unnecessary cast may hide bugs, use g_$1renew$3 instead\n" . $herecurr); + } + } + +# check for gcc specific __FUNCTION__ + if ($line =~ /__FUNCTION__/) { + ERROR("__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr); + } + +# recommend g_path_get_* over g_strdup(basename/dirname(...)) + if ($line =~ /\bg_strdup\s*\(\s*(basename|dirname)\s*\(/) { + WARN("consider using g_path_get_$1() in preference to g_strdup($1())\n" . $herecurr); + } + +# recommend qemu_strto* over strto* for numeric conversions + if ($line =~ /\b(strto[^kd].*?)\s*\(/) { + ERROR("consider using qemu_$1 in preference to $1\n" . $herecurr); + } +# recommend sigaction over signal for portability, when establishing a handler + if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { + ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); + } +# check for module_init(), use category-specific init macros explicitly please + if ($line =~ /^module_init\s*\(/) { + ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); + } +# check for various ops structs, ensure they are const. + my $struct_ops = qr{AIOCBInfo| + BdrvActionOps| + BlockDevOps| + BlockJobDriver| + DisplayChangeListenerOps| + GraphicHwOps| + IDEDMAOps| + KVMCapabilityInfo| + MemoryRegionIOMMUOps| + MemoryRegionOps| + MemoryRegionPortio| + QEMUFileOps| + SCSIBusInfo| + SCSIReqOps| + Spice[A-Z][a-zA-Z0-9]*Interface| + USBDesc[A-Z][a-zA-Z0-9]*| + VhostOps| + VMStateDescription| + VMStateInfo}x; + if ($line !~ /\bconst\b/ && + $line =~ /\b($struct_ops)\b.*=/) { + ERROR("initializer for struct $1 should normally be const\n" . + $herecurr); + } + +# format strings checks + my $string; + while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) { + $string = substr($rawline, $-[1], $+[1] - $-[1]); + $string =~ s/%%/__/g; + # check for %L{u,d,i} in strings + if ($string =~ /(?<!%)%L[udi]/) { + ERROR("\%Ld/%Lu are not-standard C, use %lld/%llu\n" . $herecurr); + } + # check for %# or %0# in printf-style format strings + if ($string =~ /(?<!%)%0?#/) { + ERROR("Don't use '#' flag of printf format " . + "('%#') in format strings, use '0x' " . + "prefix instead\n" . $herecurr); + } + } + +# QEMU specific tests + if ($rawline =~ /\b(?:Qemu|QEmu)\b/) { + ERROR("use QEMU instead of Qemu or QEmu\n" . $herecurr); + } + +# Qemu error function tests + + # Find newlines in error messages + my $qemu_error_funcs = qr{error_setg| + error_setg_errno| + error_setg_win32| + error_setg_file_open| + error_set| + error_prepend| + warn_reportf_err| + error_reportf_err| + error_vreport| + warn_vreport| + info_vreport| + error_report| + warn_report| + info_report| + g_test_message}x; + + if ($rawline =~ /\b(?:$qemu_error_funcs)\s*\(.*\".*\\n/) { + ERROR("Error messages should not contain newlines\n" . $herecurr); + } + + # Continue checking for error messages that contains newlines. This + # check handles cases where string literals are spread over multiple lines. + # Example: + # error_report("Error msg line #1" + # "Error msg line #2\n"); + my $quoted_newline_regex = qr{\+\s*\".*\\n.*\"}; + my $continued_str_literal = qr{\+\s*\".*\"}; + + if ($rawline =~ /$quoted_newline_regex/) { + # Backtrack to first line that does not contain only a quoted literal + # and assume that it is the start of the statement. + my $i = $linenr - 2; + + while (($i >= 0) & $rawlines[$i] =~ /$continued_str_literal/) { + $i--; + } + + if ($rawlines[$i] =~ /\b(?:$qemu_error_funcs)\s*\(/) { + ERROR("Error messages should not contain newlines\n" . $herecurr); + } + } + +# check for non-portable libc calls that have portable alternatives in QEMU + if ($line =~ /\bffs\(/) { + ERROR("use ctz32() instead of ffs()\n" . $herecurr); + } + if ($line =~ /\bffsl\(/) { + ERROR("use ctz32() or ctz64() instead of ffsl()\n" . $herecurr); + } + if ($line =~ /\bffsll\(/) { + ERROR("use ctz64() instead of ffsll()\n" . $herecurr); + } + if ($line =~ /\bbzero\(/) { + ERROR("use memset() instead of bzero()\n" . $herecurr); + } + if ($line =~ /\bgetpagesize\(\)/) { + ERROR("use qemu_real_host_page_size instead of getpagesize()\n" . $herecurr); + } + if ($line =~ /\bsysconf\(_SC_PAGESIZE\)/) { + ERROR("use qemu_real_host_page_size instead of sysconf(_SC_PAGESIZE)\n" . $herecurr); + } + my $non_exit_glib_asserts = qr{g_assert_cmpstr| + g_assert_cmpint| + g_assert_cmpuint| + g_assert_cmphex| + g_assert_cmpfloat| + g_assert_true| + g_assert_false| + g_assert_nonnull| + g_assert_null| + g_assert_no_error| + g_assert_error| + g_test_assert_expected_messages| + g_test_trap_assert_passed| + g_test_trap_assert_stdout| + g_test_trap_assert_stdout_unmatched| + g_test_trap_assert_stderr| + g_test_trap_assert_stderr_unmatched}x; + if ($realfile !~ /^tests\// && + $line =~ /\b(?:$non_exit_glib_asserts)\(/) { + ERROR("Use g_assert or g_assert_not_reached\n". $herecurr); + } + } + + if ($is_patch && $chk_signoff && $signoff == 0) { + ERROR("Missing Signed-off-by: line(s)\n"); + } + + # If we have no input at all, then there is nothing to report on + # so just keep quiet. + if ($#rawlines == -1) { + return 1; + } + + # In mailback mode only produce a report in the negative, for + # things that appear to be patches. + if ($mailback && ($clean == 1 || !$is_patch)) { + return 1; + } + + # This is not a patch, and we are are in 'no-patch' mode so + # just keep quiet. + if (!$chk_patch && !$is_patch) { + return 1; + } + + if (!$is_patch && $filename !~ /cover-letter\.patch$/) { + ERROR("Does not appear to be a unified-diff format patch\n"); + } + + print report_dump(); + if ($summary && !($clean == 1 && $quiet == 1)) { + print "$filename " if ($summary_file); + print "total: $cnt_error errors, $cnt_warn warnings, " . + "$cnt_lines lines checked\n"; + print "\n" if ($quiet == 0); + } + + if ($quiet == 0) { + # If there were whitespace errors which cleanpatch can fix + # then suggest that. +# if ($rpt_cleaners) { +# print "NOTE: whitespace errors detected, you may wish to use scripts/cleanpatch or\n"; +# print " scripts/cleanfile\n\n"; +# } + } + + if ($clean == 1 && $quiet == 0) { + print "$vname has no obvious style problems and is ready for submission.\n" + } + if ($clean == 0 && $quiet == 0) { + print "$vname has style problems, please review. If any of these errors\n"; + print "are false positives report them to the maintainer, see\n"; + print "CHECKPATCH in MAINTAINERS.\n"; + } + + return ($no_warnings ? $clean : $cnt_error == 0); +} diff --git a/scripts/ci/coverage-summary.sh b/scripts/ci/coverage-summary.sh new file mode 100755 index 000000000..8d9fb4de4 --- /dev/null +++ b/scripts/ci/coverage-summary.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# +# Author: Alex Bennée <alex.bennee@linaro.org> +# +# Summerise the state of code coverage with gcovr and tweak the output +# to be more sane on CI runner. As we expect to be executed on a +# throw away CI instance we do spam temp files all over the shop. You +# most likely don't want to execute this script but just call gcovr +# directly. See also "make coverage-report" +# +# This code is licensed under the GPL version 2 or later. See +# the COPYING file in the top-level directory. + +# first generate the coverage report +gcovr -p -o raw-report.txt + +# strip the full-path and line markers +sed s@$PWD\/@@ raw-report.txt | sed s/[0-9]\*[,-]//g > simplified.txt + +# reflow lines that got split +awk '/.[ch]$/ { printf("%s", $0); next } 1' simplified.txt > rejoined.txt + +# columnify +column -t rejoined.txt > final.txt + +# and dump, stripping out 0% coverage +grep -v "0%" final.txt diff --git a/scripts/ci/gitlab-pipeline-status b/scripts/ci/gitlab-pipeline-status new file mode 100755 index 000000000..924db327f --- /dev/null +++ b/scripts/ci/gitlab-pipeline-status @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2019-2020 Red Hat, Inc. +# +# Author: +# Cleber Rosa <crosa@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +""" +Checks the GitLab pipeline status for a given commit ID +""" + +# pylint: disable=C0103 + +import argparse +import http.client +import json +import os +import subprocess +import time +import sys + + +class CommunicationFailure(Exception): + """Failed to communicate to gitlab.com APIs.""" + + +class NoPipelineFound(Exception): + """Communication is successfull but pipeline is not found.""" + + +def get_local_branch_commit(branch): + """ + Returns the commit sha1 for the *local* branch named "staging" + """ + result = subprocess.run(['git', 'rev-parse', branch], + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + cwd=os.path.dirname(__file__), + universal_newlines=True).stdout.strip() + if result == branch: + raise ValueError("There's no local branch named '%s'" % branch) + if len(result) != 40: + raise ValueError("Branch '%s' HEAD doesn't look like a sha1" % branch) + return result + + +def get_json_http_response(url): + """ + Returns the JSON content of an HTTP GET request to gitlab.com + """ + connection = http.client.HTTPSConnection('gitlab.com') + connection.request('GET', url=url) + response = connection.getresponse() + if response.code != http.HTTPStatus.OK: + msg = "Received unsuccessful response: %s (%s)" % (response.code, + response.reason) + raise CommunicationFailure(msg) + return json.loads(response.read()) + + +def get_pipeline_status(project_id, commit_sha1): + """ + Returns the JSON content of the pipeline status API response + """ + url = '/api/v4/projects/{}/pipelines?sha={}'.format(project_id, + commit_sha1) + json_response = get_json_http_response(url) + + # As far as I can tell, there should be only one pipeline for the same + # project + commit. If this assumption is false, we can add further + # filters to the url, such as username, and order_by. + if not json_response: + msg = "No pipeline found for project %s and commit %s" % (project_id, + commit_sha1) + raise NoPipelineFound(msg) + return json_response[0] + + +def wait_on_pipeline_success(timeout, interval, + project_id, commit_sha): + """ + Waits for the pipeline to finish within the given timeout + """ + start = time.time() + while True: + if time.time() >= (start + timeout): + msg = ("Timeout (-t/--timeout) of %i seconds reached, " + "won't wait any longer for the pipeline to complete") + msg %= timeout + print(msg) + return False + + try: + status = get_pipeline_status(project_id, commit_sha) + except NoPipelineFound: + print('Pipeline has not been found, it may not have been created yet.') + time.sleep(1) + continue + + pipeline_status = status['status'] + status_to_wait = ('created', 'waiting_for_resource', 'preparing', + 'pending', 'running') + if pipeline_status in status_to_wait: + print('%s...' % pipeline_status) + time.sleep(interval) + continue + + if pipeline_status == 'success': + return True + + msg = "Pipeline failed, check: %s" % status['web_url'] + print(msg) + return False + + +def create_parser(): + parser = argparse.ArgumentParser( + prog='pipeline-status', + description='check or wait on a pipeline status') + + parser.add_argument('-t', '--timeout', type=int, default=7200, + help=('Amount of time (in seconds) to wait for the ' + 'pipeline to complete. Defaults to ' + '%(default)s')) + parser.add_argument('-i', '--interval', type=int, default=60, + help=('Amount of time (in seconds) to wait between ' + 'checks of the pipeline status. Defaults ' + 'to %(default)s')) + parser.add_argument('-w', '--wait', action='store_true', default=False, + help=('Wether to wait, instead of checking only once ' + 'the status of a pipeline')) + parser.add_argument('-p', '--project-id', type=int, default=11167699, + help=('The GitLab project ID. Defaults to the project ' + 'for https://gitlab.com/qemu-project/qemu, that ' + 'is, "%(default)s"')) + parser.add_argument('-b', '--branch', type=str, default="staging", + help=('Specify the branch to check. ' + 'Use HEAD for your current branch. ' + 'Otherwise looks at "%(default)s"')) + parser.add_argument('-c', '--commit', + default=None, + help=('Look for a pipeline associated with the given ' + 'commit. If one is not explicitly given, the ' + 'commit associated with the default branch ' + 'is used.')) + parser.add_argument('--verbose', action='store_true', default=False, + help=('A minimal verbosity level that prints the ' + 'overall result of the check/wait')) + return parser + +def main(): + """ + Script entry point + """ + parser = create_parser() + args = parser.parse_args() + + if not args.commit: + args.commit = get_local_branch_commit(args.branch) + + success = False + try: + if args.wait: + success = wait_on_pipeline_success( + args.timeout, + args.interval, + args.project_id, + args.commit) + else: + status = get_pipeline_status(args.project_id, + args.commit) + success = status['status'] == 'success' + except Exception as error: # pylint: disable=W0703 + if args.verbose: + print("ERROR: %s" % error.args[0]) + except KeyboardInterrupt: + if args.verbose: + print("Exiting on user's request") + + if success: + if args.verbose: + print('success') + sys.exit(0) + else: + if args.verbose: + print('failure') + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/scripts/ci/org.centos/stream/8/build-environment.yml b/scripts/ci/org.centos/stream/8/build-environment.yml new file mode 100644 index 000000000..42b047163 --- /dev/null +++ b/scripts/ci/org.centos/stream/8/build-environment.yml @@ -0,0 +1,51 @@ +--- +- name: Installation of extra packages to build QEMU + hosts: all + tasks: + - name: Extra check for CentOS Stream 8 + lineinfile: + path: /etc/redhat-release + line: CentOS Stream release 8 + state: present + check_mode: yes + register: centos_stream_8 + + - name: Enable PowerTools repo on CentOS Stream 8 + ini_file: + path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo + section: powertools + option: enabled + value: "1" + when: + - ansible_facts['distribution'] == 'CentOS' + - ansible_facts['distribution_major_version'] == '8' + - centos_stream_8 + + - name: Install basic packages to build QEMU on CentOS Stream 8 + dnf: + name: + - device-mapper-multipath-devel + - glusterfs-api-devel + - gnutls-devel + - libcap-ng-devel + - libcurl-devel + - libfdt-devel + - libiscsi-devel + - libpmem-devel + - librados-devel + - librbd-devel + - libseccomp-devel + - libssh-devel + - libxkbcommon-devel + - ninja-build + - numactl-devel + - python3-sphinx + - redhat-rpm-config + - snappy-devel + - spice-server-devel + - systemd-devel + state: present + when: + - ansible_facts['distribution'] == 'CentOS' + - ansible_facts['distribution_major_version'] == '8' + - centos_stream_8 diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure new file mode 100755 index 000000000..048e80dc4 --- /dev/null +++ b/scripts/ci/org.centos/stream/8/x86_64/configure @@ -0,0 +1,208 @@ +#!/bin/sh -e +# +# Configuration for QEMU based on CentOS Stream 8 x86_64 builds +# +# The "configure" command line is based on: +# +# https://git.centos.org/rpms/qemu-kvm/blob/c8s-stream-rhel/f/SPECS/qemu-kvm.spec +# +# But, because the SPEC file contains a number of conditionals and +# variable and expansions only available at RPM build time, this version +# was initially generated from an actual RPM build on an x86_64 platform. +# +# From that initial version, options that are required or are a +# consequence of non-upstream patches have been adapted. One example +# is "--without-default-devices" which is *not* present here, given +# that patches adding downstream specific devices are not available. +# +../configure \ +--prefix="/usr" \ +--libdir="/usr/lib64" \ +--datadir="/usr/share" \ +--sysconfdir="/etc" \ +--interp-prefix=/usr/qemu-%M \ +--localstatedir="/var" \ +--docdir="/usr/share/doc" \ +--libexecdir="/usr/libexec" \ +--extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ +--extra-cflags="-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection" \ +--with-suffix="qemu-kvm" \ +--firmwarepath=/usr/share/qemu-firmware \ +--with-git=meson \ +--with-git-submodules=update \ +--target-list="x86_64-softmmu" \ +--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \ +--audio-drv-list="" \ +--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \ +--with-coroutine=ucontext \ +--with-git=git \ +--tls-priority=@QEMU,SYSTEM \ +--disable-attr \ +--disable-auth-pam \ +--disable-avx2 \ +--disable-avx512f \ +--disable-bochs \ +--disable-bpf \ +--disable-brlapi \ +--disable-bsd-user \ +--disable-bzip2 \ +--disable-cap-ng \ +--disable-capstone \ +--disable-cfi \ +--disable-cfi-debug \ +--disable-cloop \ +--disable-cocoa \ +--disable-coroutine-pool \ +--disable-crypto-afalg \ +--disable-curl \ +--disable-curses \ +--disable-debug-info \ +--disable-debug-mutex \ +--disable-debug-tcg \ +--disable-dmg \ +--disable-docs \ +--disable-fuse \ +--disable-fuse-lseek \ +--disable-gcrypt \ +--disable-gio \ +--disable-glusterfs \ +--disable-gnutls \ +--disable-gtk \ +--disable-guest-agent \ +--disable-guest-agent-msi \ +--disable-hax \ +--disable-hvf \ +--disable-iconv \ +--disable-kvm \ +--disable-libdaxctl \ +--disable-libiscsi \ +--disable-libnfs \ +--disable-libpmem \ +--disable-libssh \ +--disable-libudev \ +--disable-libusb \ +--disable-libxml2 \ +--disable-linux-aio \ +--disable-linux-io-uring \ +--disable-linux-user \ +--disable-live-block-migration \ +--disable-lto \ +--disable-lzfse \ +--disable-lzo \ +--disable-malloc-trim \ +--disable-membarrier \ +--disable-modules \ +--disable-module-upgrades \ +--disable-mpath \ +--disable-multiprocess \ +--disable-netmap \ +--disable-nettle \ +--disable-numa \ +--disable-nvmm \ +--disable-opengl \ +--disable-parallels \ +--disable-pie \ +--disable-pvrdma \ +--disable-qcow1 \ +--disable-qed \ +--disable-qom-cast-debug \ +--disable-rbd \ +--disable-rdma \ +--disable-replication \ +--disable-rng-none \ +--disable-safe-stack \ +--disable-sanitizers \ +--disable-sdl \ +--disable-sdl-image \ +--disable-seccomp \ +--disable-slirp-smbd \ +--disable-smartcard \ +--disable-snappy \ +--disable-sparse \ +--disable-spice \ +--disable-strip \ +--disable-system \ +--disable-tcg \ +--disable-tools \ +--disable-tpm \ +--disable-u2f \ +--disable-usb-redir \ +--disable-user \ +--disable-vde \ +--disable-vdi \ +--disable-vhost-crypto \ +--disable-vhost-kernel \ +--disable-vhost-net \ +--disable-vhost-scsi \ +--disable-vhost-user \ +--disable-vhost-user-blk-server \ +--disable-vhost-vdpa \ +--disable-vhost-vsock \ +--disable-virglrenderer \ +--disable-virtfs \ +--disable-virtiofsd \ +--disable-vnc \ +--disable-vnc-jpeg \ +--disable-vnc-png \ +--disable-vnc-sasl \ +--disable-vte \ +--disable-vvfat \ +--disable-werror \ +--disable-whpx \ +--disable-xen \ +--disable-xen-pci-passthrough \ +--disable-xfsctl \ +--disable-xkbcommon \ +--disable-zstd \ +--enable-attr \ +--enable-avx2 \ +--enable-cap-ng \ +--enable-capstone \ +--enable-coroutine-pool \ +--enable-curl \ +--enable-debug-info \ +--enable-docs \ +--enable-fdt \ +--enable-gcrypt \ +--enable-glusterfs \ +--enable-gnutls \ +--enable-guest-agent \ +--enable-iconv \ +--enable-kvm \ +--enable-libiscsi \ +--enable-libpmem \ +--enable-libssh \ +--enable-libusb \ +--enable-libudev \ +--enable-linux-aio \ +--enable-lzo \ +--enable-malloc-trim \ +--enable-modules \ +--enable-mpath \ +--enable-numa \ +--enable-opengl \ +--enable-pie \ +--enable-rbd \ +--enable-rdma \ +--enable-seccomp \ +--enable-snappy \ +--enable-smartcard \ +--enable-spice \ +--enable-system \ +--enable-tcg \ +--enable-tools \ +--enable-tpm \ +--enable-trace-backend=dtrace \ +--enable-usb-redir \ +--enable-virtiofsd \ +--enable-vhost-kernel \ +--enable-vhost-net \ +--enable-vhost-user \ +--enable-vhost-user-blk-server \ +--enable-vhost-vdpa \ +--enable-vhost-vsock \ +--enable-vnc \ +--enable-vnc-png \ +--enable-vnc-sasl \ +--enable-werror \ +--enable-xkbcommon diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado new file mode 100755 index 000000000..7aeecbcfb --- /dev/null +++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado @@ -0,0 +1,70 @@ +#!/bin/sh -e +# +# Runs a previously vetted list of tests, either marked explicitly for +# KVM and x86_64, or tests that are generic enough to be valid for all +# targets. Such a test list can be generated with: +# +# ./tests/venv/bin/avocado list --filter-by-tags-include-empty \ +# --filter-by-tags-include-empty-key -t accel:kvm,arch:x86_64 \ +# tests/avocado/ +# +# This is almost the complete list of avocado based tests available at +# the time this was compile, with the following exceptions: +# +# * Require machine type "x-remote": +# - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64 +# +# * Needs superuser privileges: +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_virtiofsd_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_launch_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_launch_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_mount_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_two_runs +# +# * Requires display type "egl-headless": +# - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl +# - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl +# +# * Test is marked (unconditionally) to be skipped: +# - tests/avocado/virtio_check_params.py:VirtioMaxSegSettingsCheck.test_machine_types +# +make get-vm-images +./tests/venv/bin/avocado run \ + --job-results-dir=tests/results/ \ + tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_i440fx_kvm \ + tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm \ + tests/avocado/boot_linux_console.py:BootLinuxConsole.test_x86_64_pc \ + tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \ + tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \ + tests/avocado/hotplug_cpu.py:HotPlugCPU.test \ + tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict_cm \ + tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_exit_error_msg_with_linux_v3_6 \ + tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_work_with_linux_v4_16 \ + tests/avocado/migration.py:Migration.test_migration_with_exec \ + tests/avocado/migration.py:Migration.test_migration_with_tcp_localhost \ + tests/avocado/migration.py:Migration.test_migration_with_unix \ + tests/avocado/pc_cpu_hotplug_props.py:OmittedCPUProps.test_no_die_id \ + tests/avocado/replay_kernel.py:ReplayKernelNormal.test_x86_64_pc \ + tests/avocado/reverse_debugging.py:ReverseDebugging_X86_64.test_x86_64_pc \ + tests/avocado/version.py:Version.test_qmp_human_info_version \ + tests/avocado/virtio_version.py:VirtioVersionCheck.test_conventional_devs \ + tests/avocado/virtio_version.py:VirtioVersionCheck.test_modern_only_devs \ + tests/avocado/vnc.py:Vnc.test_change_password \ + tests/avocado/vnc.py:Vnc.test_change_password_requires_a_password \ + tests/avocado/vnc.py:Vnc.test_no_vnc \ + tests/avocado/vnc.py:Vnc.test_no_vnc_change_password \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_1 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_set_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_unset_4_1 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_set_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_unset_4_1 \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_0_alias_compatibility \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_1_alias \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_none_alias diff --git a/scripts/ci/org.centos/stream/README b/scripts/ci/org.centos/stream/README new file mode 100644 index 000000000..e3eadfe3e --- /dev/null +++ b/scripts/ci/org.centos/stream/README @@ -0,0 +1,17 @@ +This directory contains scripts for generating a build of QEMU that +closely matches the CentOS Stream[1] builds of the qemu-kvm package. + +To have the environment ready to configure, build QEMU and run tests, +please start with a CentOS Stream machine and: + + * apply the generic "build-environment.yml" playbook located at + scripts/ci/setup + + * apply the "build-environment.yml" in the directory following the + CentOS Stream version (such as "8"). + +This currently only covers CentOS Stream 8 environments and +packages[2]. + +[1] https://www.centos.org/centos-stream/ +[2] https://git.centos.org/rpms/qemu-kvm/commits/c8s-stream-rhel diff --git a/scripts/ci/setup/.gitignore b/scripts/ci/setup/.gitignore new file mode 100644 index 000000000..f4a6183f1 --- /dev/null +++ b/scripts/ci/setup/.gitignore @@ -0,0 +1,2 @@ +inventory +vars.yml diff --git a/scripts/ci/setup/build-environment.yml b/scripts/ci/setup/build-environment.yml new file mode 100644 index 000000000..599896cc5 --- /dev/null +++ b/scripts/ci/setup/build-environment.yml @@ -0,0 +1,154 @@ +# Copyright (c) 2021 Red Hat, Inc. +# +# Author: +# Cleber Rosa <crosa@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. +# +# This is an ansible playbook file. Run it to set up systems with the +# environment needed to build QEMU. +--- +- name: Installation of basic packages to build QEMU + hosts: all + tasks: + - name: Check for suitable ansible version + delegate_to: localhost + assert: + that: + - '((ansible_version.major == 2) and (ansible_version.minor >= 8)) or (ansible_version.major >= 3)' + msg: "Unsuitable ansible version, please use version 2.8.0 or later" + + - name: Update apt cache / upgrade packages via apt + apt: + update_cache: yes + upgrade: yes + when: + - ansible_facts['distribution'] == 'Ubuntu' + + - name: Install basic packages to build QEMU on Ubuntu 18.04/20.04 + package: + name: + # Originally from tests/docker/dockerfiles/ubuntu1804.docker + - ccache + - gcc + - gettext + - git + - glusterfs-common + - libaio-dev + - libattr1-dev + - libbrlapi-dev + - libbz2-dev + - libcacard-dev + - libcap-ng-dev + - libcurl4-gnutls-dev + - libdrm-dev + - libepoxy-dev + - libfdt-dev + - libgbm-dev + - libgtk-3-dev + - libibverbs-dev + - libiscsi-dev + - libjemalloc-dev + - libjpeg-turbo8-dev + - liblzo2-dev + - libncurses5-dev + - libncursesw5-dev + - libnfs-dev + - libnss3-dev + - libnuma-dev + - libpixman-1-dev + - librados-dev + - librbd-dev + - librdmacm-dev + - libsasl2-dev + - libsdl2-dev + - libseccomp-dev + - libsnappy-dev + - libspice-protocol-dev + - libssh-dev + - libusb-1.0-0-dev + - libusbredirhost-dev + - libvdeplug-dev + - libvte-2.91-dev + - libzstd-dev + - make + - python3-yaml + - python3-sphinx + - python3-sphinx-rtd-theme + - ninja-build + - sparse + - xfslibs-dev + state: present + when: + - ansible_facts['distribution'] == 'Ubuntu' + + - name: Install packages to build QEMU on Ubuntu 18.04/20.04 on non-s390x + package: + name: + - libspice-server-dev + - libxen-dev + state: present + when: + - ansible_facts['distribution'] == 'Ubuntu' + - ansible_facts['architecture'] != 's390x' + + - name: Install basic packages to build QEMU on Ubuntu 18.04 + package: + name: + # Originally from tests/docker/dockerfiles/ubuntu1804.docker + - clang + when: + - ansible_facts['distribution'] == 'Ubuntu' + - ansible_facts['distribution_version'] == '18.04' + + - name: Install basic packages to build QEMU on Ubuntu 20.04 + package: + name: + # Originally from tests/docker/dockerfiles/ubuntu2004.docker + - clang-10 + - genisoimage + - liblttng-ust-dev + - libslirp-dev + - netcat-openbsd + when: + - ansible_facts['distribution'] == 'Ubuntu' + - ansible_facts['distribution_version'] == '20.04' + + - name: Install basic packages to build QEMU on EL8 + dnf: + # This list of packages start with tests/docker/dockerfiles/centos8.docker + # but only include files that are common to all distro variants and present + # in the standard repos (no add-ons) + name: + - bzip2 + - bzip2-devel + - dbus-daemon + - diffutils + - gcc + - gcc-c++ + - genisoimage + - gettext + - git + - glib2-devel + - libaio-devel + - libepoxy-devel + - libgcrypt-devel + - lzo-devel + - make + - mesa-libEGL-devel + - nettle-devel + - nmap-ncat + - perl-Test-Harness + - pixman-devel + - python36 + - rdma-core-devel + - spice-glib-devel + - spice-server + - systemtap-sdt-devel + - tar + - zlib-devel + state: present + when: + - ansible_facts['distribution_file_variety'] == 'RedHat' + - ansible_facts['distribution_version'] == '8' diff --git a/scripts/ci/setup/gitlab-runner.yml b/scripts/ci/setup/gitlab-runner.yml new file mode 100644 index 000000000..1127db516 --- /dev/null +++ b/scripts/ci/setup/gitlab-runner.yml @@ -0,0 +1,71 @@ +# Copyright (c) 2021 Red Hat, Inc. +# +# Author: +# Cleber Rosa <crosa@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. +# +# This is an ansible playbook file. Run it to set up systems with the +# gitlab-runner agent. +--- +- name: Installation of gitlab-runner + hosts: all + vars_files: + - vars.yml + tasks: + - debug: + msg: 'Checking for a valid GitLab registration token' + failed_when: "gitlab_runner_registration_token == 'PLEASE_PROVIDE_A_VALID_TOKEN'" + + - name: Create a group for the gitlab-runner service + group: + name: gitlab-runner + + - name: Create a user for the gitlab-runner service + user: + user: gitlab-runner + group: gitlab-runner + comment: GitLab Runner + home: /home/gitlab-runner + shell: /bin/bash + + - name: Remove the .bash_logout file when on Ubuntu systems + file: + path: /home/gitlab-runner/.bash_logout + state: absent + when: "ansible_facts['distribution'] == 'Ubuntu'" + + - name: Set the Operating System for gitlab-runner + set_fact: + gitlab_runner_os: "{{ ansible_facts[\"system\"]|lower }}" + - debug: + msg: gitlab-runner OS is {{ gitlab_runner_os }} + + - name: Set the architecture for gitlab-runner + set_fact: + gitlab_runner_arch: "{{ ansible_to_gitlab_arch[ansible_facts[\"architecture\"]] }}" + - debug: + msg: gitlab-runner arch is {{ gitlab_runner_arch }} + + - name: Download the matching gitlab-runner + get_url: + dest: /usr/local/bin/gitlab-runner + url: "https://s3.amazonaws.com/gitlab-runner-downloads/v{{ gitlab_runner_version }}/binaries/gitlab-runner-{{ gitlab_runner_os }}-{{ gitlab_runner_arch }}" + owner: gitlab-runner + group: gitlab-runner + mode: u=rwx,g=rwx,o=rx + + - name: Register the gitlab-runner + command: "/usr/local/bin/gitlab-runner register --non-interactive --url {{ gitlab_runner_server_url }} --registration-token {{ gitlab_runner_registration_token }} --executor shell --tag-list {{ ansible_facts[\"architecture\"] }},{{ ansible_facts[\"distribution\"]|lower }}_{{ ansible_facts[\"distribution_version\"] }} --description '{{ ansible_facts[\"distribution\"] }} {{ ansible_facts[\"distribution_version\"] }} {{ ansible_facts[\"architecture\"] }} ({{ ansible_facts[\"os_family\"] }})'" + + - name: Install the gitlab-runner service using its own functionality + command: /usr/local/bin/gitlab-runner install --user gitlab-runner --working-directory /home/gitlab-runner + register: gitlab_runner_install_service_result + failed_when: "gitlab_runner_install_service_result.rc != 0 and \"already exists\" not in gitlab_runner_install_service_result.stderr" + + - name: Enable the gitlab-runner service + service: + name: gitlab-runner + state: started + enabled: yes diff --git a/scripts/ci/setup/inventory.template b/scripts/ci/setup/inventory.template new file mode 100644 index 000000000..2fbb50c4a --- /dev/null +++ b/scripts/ci/setup/inventory.template @@ -0,0 +1 @@ +localhost diff --git a/scripts/ci/setup/vars.yml.template b/scripts/ci/setup/vars.yml.template new file mode 100644 index 000000000..e48089761 --- /dev/null +++ b/scripts/ci/setup/vars.yml.template @@ -0,0 +1,12 @@ +# The version of the gitlab-runner to use +gitlab_runner_version: 13.12.0 +# The URL of the gitlab server to use, usually https://gitlab.com unless you're +# using a private GitLab instance +gitlab_runner_server_url: https://gitlab.com +# A mapping of the ansible to gitlab architecture nomenclature +ansible_to_gitlab_arch: + x86_64: amd64 + aarch64: arm64 + s390x: s390x +# A unique token made available by GitLab to your project for registering runners +gitlab_runner_registration_token: PLEASE_PROVIDE_A_VALID_TOKEN diff --git a/scripts/clean-header-guards.pl b/scripts/clean-header-guards.pl new file mode 100755 index 000000000..a6680253b --- /dev/null +++ b/scripts/clean-header-guards.pl @@ -0,0 +1,216 @@ +#!/usr/bin/env perl +# +# Clean up include guards in headers +# +# Copyright (C) 2016 Red Hat, Inc. +# +# Authors: +# Markus Armbruster <armbru@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# (at your option) any later version. See the COPYING file in the +# top-level directory. +# +# Usage: scripts/clean-header-guards.pl [OPTION]... [FILE]... +# -c CC Use a compiler other than cc +# -n Suppress actual cleanup +# -v Show which files are cleaned up, and which are skipped +# +# Does the following: +# - Header files without a recognizable header guard are skipped. +# - Clean up any untidy header guards in-place. Warn if the cleanup +# renames guard symbols, and explain how to find occurrences of these +# symbols that may have to be updated manually. +# - Warn about duplicate header guard symbols. To make full use of +# this warning, you should clean up *all* headers in one run. +# - Warn when preprocessing a header with its guard symbol defined +# produces anything but whitespace. The preprocessor is run like +# "cc -E -DGUARD_H -c -P -", and fed the test program on stdin. + +use strict; +use warnings; +use Getopt::Std; + +# Stuff we don't want to clean because we import it into our tree: +my $exclude = qr,^(disas/libvixl/|include/standard-headers/ + |linux-headers/|pc-bios/|tests/tcg/|tests/multiboot/),x; +# Stuff that is expected to fail the preprocessing test: +my $exclude_cpp = qr,^include/libdecnumber/decNumberLocal.h,; + +my %guarded = (); +my %old_guard = (); + +our $opt_c = "cc"; +our $opt_n = 0; +our $opt_v = 0; +getopts("c:nv"); + +sub skipping { + my ($fname, $msg, $line1, $line2) = @_; + + return if !$opt_v or $fname =~ $exclude; + print "$fname skipped: $msg\n"; + print " $line1" if defined $line1; + print " $line2" if defined $line2; +} + +sub gripe { + my ($fname, $msg) = @_; + return if $fname =~ $exclude; + print STDERR "$fname: warning: $msg\n"; +} + +sub slurp { + my ($fname) = @_; + local $/; # slurp + open(my $in, "<", $fname) + or die "can't open $fname for reading: $!"; + return <$in>; +} + +sub unslurp { + my ($fname, $contents) = @_; + open (my $out, ">", $fname) + or die "can't open $fname for writing: $!"; + print $out $contents + or die "error writing $fname: $!"; + close $out + or die "error writing $fname: $!"; +} + +sub fname2guard { + my ($fname) = @_; + $fname =~ tr/a-z/A-Z/; + $fname =~ tr/A-Z0-9/_/cs; + return $fname; +} + +sub preprocess { + my ($fname, $guard) = @_; + + open(my $pipe, "-|", "$opt_c -E -D$guard -c -P - <$fname") + or die "can't run $opt_c: $!"; + while (<$pipe>) { + if ($_ =~ /\S/) { + gripe($fname, "not blank after preprocessing"); + last; + } + } + close $pipe + or gripe($fname, "preprocessing failed ($opt_c exit status $?)"); +} + +for my $fname (@ARGV) { + my $text = slurp($fname); + + $text =~ m,\A(\s*\n|\s*//\N*\n|\s*/\*.*?\*/\s*\n)*|,sg; + my $pre = $&; + unless ($text =~ /\G(.*\n)/g) { + $text =~ /\G.*/; + skipping($fname, "no recognizable header guard", "$&\n"); + next; + } + my $line1 = $1; + unless ($text =~ /\G(.*\n)/g) { + $text =~ /\G.*/; + skipping($fname, "no recognizable header guard", "$&\n"); + next; + } + my $line2 = $1; + my $body = substr($text, pos($text)); + + unless ($line1 =~ /^\s*\#\s*(if\s*\!\s*defined(\s*\()?|ifndef)\s* + ([A-Za-z0-9_]+)/x) { + skipping($fname, "no recognizable header guard", $line1, $line2); + next; + } + my $guard = $3; + unless ($line2 =~ /^\s*\#\s*define\s+([A-Za-z0-9_]+)/) { + skipping($fname, "no recognizable header guard", $line1, $line2); + next; + } + my $guard2 = $1; + unless ($guard2 eq $guard) { + skipping($fname, "mismatched header guard ($guard vs. $guard2) ", + $line1, $line2); + next; + } + + unless ($body =~ m,\A((.*\n)*) + ([ \t]*\#[ \t]*endif([ \t]*\N*)\n) + ((?s)(\s*\n|\s*//\N*\n|\s*/\*.*?\*/\s*\n)*) + \Z,x) { + skipping($fname, "can't find end of header guard"); + next; + } + $body = $1; + my $line3 = $3; + my $endif_comment = $4; + my $post = $5; + + my $oldg = $guard; + + unless ($fname =~ $exclude) { + my @issues = (); + $guard =~ tr/a-z/A-Z/ + and push @issues, "contains lowercase letters"; + $guard =~ s/^_+// + and push @issues, "is a reserved identifier"; + $guard =~ s/(_H)?_*$/_H/ + and $& ne "_H" and push @issues, "doesn't end with _H"; + unless ($guard =~ /^[A-Z][A-Z0-9_]*_H/) { + skipping($fname, "can't clean up odd guard symbol $oldg\n", + $line1, $line2); + next; + } + + my $exp = fname2guard($fname =~ s,.*/,,r); + unless ($guard =~ /\Q$exp\E\Z/) { + $guard = fname2guard($fname =~ s,^include/,,r); + push @issues, "doesn't match the file name"; + } + if (@issues and $opt_v) { + print "$fname guard $oldg needs cleanup:\n ", + join(", ", @issues), "\n"; + } + } + + $old_guard{$guard} = $oldg + if $guard ne $oldg; + + if (exists $guarded{$guard}) { + gripe($fname, "guard $guard also used by $guarded{$guard}"); + } else { + $guarded{$guard} = $fname; + } + + unless ($fname =~ $exclude) { + my $newl1 = "#ifndef $guard\n"; + my $newl2 = "#define $guard\n"; + my $newl3 = "#endif\n"; + $newl3 =~ s,\Z, /* $guard */, if $endif_comment; + if ($line1 ne $newl1 or $line2 ne $newl2 or $line3 ne $newl3) { + $pre =~ s/\n*\Z/\n\n/ if $pre =~ /\N/; + $body =~ s/\A\n*/\n/; + if ($opt_n) { + print "$fname would be cleaned up\n" if $opt_v; + } else { + unslurp($fname, "$pre$newl1$newl2$body$newl3$post"); + print "$fname cleaned up\n" if $opt_v; + } + } + } + + preprocess($fname, $opt_n ? $oldg : $guard) + unless $fname =~ $exclude or $fname =~ $exclude_cpp; +} + +if (%old_guard) { + print STDERR "warning: guard symbol renaming may break things\n"; + for my $guard (sort keys %old_guard) { + print STDERR " $old_guard{$guard} -> $guard\n"; + } + print STDERR "To find uses that may have to be updated try:\n"; + print STDERR " git grep -Ew '", join("|", sort values %old_guard), + "'\n"; +} diff --git a/scripts/clean-includes b/scripts/clean-includes new file mode 100755 index 000000000..aaa7d4ceb --- /dev/null +++ b/scripts/clean-includes @@ -0,0 +1,198 @@ +#!/bin/sh -e +# +# Clean up QEMU #include lines by ensuring that qemu/osdep.h +# is the first include listed in .c files, and no headers provided +# by osdep.h itself are redundantly included in either .c or .h files. +# +# Copyright (c) 2015 Linaro Limited +# +# Authors: +# Peter Maydell <peter.maydell@linaro.org> +# +# This work is licensed under the terms of the GNU GPL, version 2 +# or (at your option) any later version. See the COPYING file in +# the top-level directory. + +# Usage: +# clean-includes [--git subjectprefix] [--check-dup-head] file ... +# or +# clean-includes [--git subjectprefix] [--check-dup-head] --all +# +# If the --git subjectprefix option is given, then after making +# the changes to the files this script will create a git commit +# with the subject line "subjectprefix: Clean up includes" +# and a boilerplate commit message. +# +# If --check-dup-head is specified, additionally check for duplicate +# header includes. +# +# Using --all will cause clean-includes to run on the whole source +# tree (excluding certain directories which are known not to need +# handling). + +# This script requires Coccinelle to be installed. + +# .c files will have the osdep.h included added, and redundant +# includes removed. +# .h files will have redundant includes (including includes of osdep.h) +# removed. +# Other files (including C++ and ObjectiveC) can't be handled by this script. + +# The following one-liner may be handy for finding files to run this on. +# However some caution is required regarding files that might be part +# of the guest agent or standalone tests. + +# for i in $(git ls-tree --name-only HEAD) ; do test -f $i && \ +# grep -E '^# *include' $i | head -1 | grep 'osdep.h' ; test $? != 0 && \ +# echo $i ; done + + +GIT=no +DUPHEAD=no + +# Extended regular expression defining files to ignore when using --all +XDIRREGEX='^(tests/tcg|tests/multiboot|pc-bios|disas/libvixl)' + +while true +do + case $1 in + "--git") + if [ $# -eq 1 ]; then + echo "--git option requires an argument" + exit 1 + fi + GITSUBJ="$2" + GIT=yes + shift + shift + ;; + "--check-dup-head") + DUPHEAD=yes + shift + ;; + "--") + shift + break + ;; + *) + break + ;; + esac +done + +if [ $# -eq 0 ]; then + echo "Usage: clean-includes [--git subjectprefix] [--check-dup-head] [--all | foo.c ...]" + echo "(modifies the files in place)" + exit 1 +fi + +if [ "$1" = "--all" ]; then + # We assume there are no files in the tree with spaces in their name + set -- $(git ls-files '*.[ch]' | grep -E -v "$XDIRREGEX") +fi + +# Annoyingly coccinelle won't read a scriptfile unless its +# name ends '.cocci', so write it out to a tempfile with the +# right kind of name. +COCCIFILE="$(mktemp --suffix=.cocci)" + +trap 'rm -f -- "$COCCIFILE"' INT TERM HUP EXIT + +cat >"$COCCIFILE" <<EOT +@@ +@@ + +( ++ #include "qemu/osdep.h" + #include "..." +| ++ #include "qemu/osdep.h" + #include <...> +) +EOT + +for f in "$@"; do + case "$f" in + *.c.inc) + # These aren't standalone C source files + echo "SKIPPING $f (not a standalone source file)" + continue + ;; + *.c) + MODE=c + ;; + *include/qemu/osdep.h | \ + *include/qemu/compiler.h | \ + *include/qemu/qemu-plugin.h | \ + *include/glib-compat.h | \ + *include/sysemu/os-posix.h | \ + *include/sysemu/os-win32.h | \ + *include/standard-headers/ ) + # Removing include lines from osdep.h itself would be counterproductive. + echo "SKIPPING $f (special case header)" + continue + ;; + *include/standard-headers/*) + echo "SKIPPING $f (autogenerated header)" + continue + ;; + *.h) + MODE=h + ;; + *) + echo "WARNING: ignoring $f (cannot handle non-C files)" + continue + ;; + esac + + if [ "$MODE" = "c" ]; then + # First, use Coccinelle to add qemu/osdep.h before the first existing include + # (this will add two lines if the file uses both "..." and <...> #includes, + # but we will remove the extras in the next step) + spatch --in-place --no-show-diff --cocci-file "$COCCIFILE" "$f" + + # Now remove any duplicate osdep.h includes + perl -n -i -e 'print if !/#include "qemu\/osdep.h"/ || !$n++;' "$f" + else + # Remove includes of osdep.h itself + perl -n -i -e 'print if !/\s*#\s*include\s*(["<][^>"]*[">])/ || + ! (grep { $_ eq $1 } qw ("qemu/osdep.h"))' "$f" + fi + + # Remove includes that osdep.h already provides + perl -n -i -e 'print if !/\s*#\s*include\s*(["<][^>"]*[">])/ || + ! (grep { $_ eq $1 } qw ( + "config-host.h" "config-target.h" "qemu/compiler.h" + <setjmp.h> <stdarg.h> <stddef.h> <stdbool.h> <stdint.h> <sys/types.h> + <stdlib.h> <stdio.h> <string.h> <strings.h> <inttypes.h> + <limits.h> <unistd.h> <time.h> <ctype.h> <errno.h> <fcntl.h> + <sys/stat.h> <sys/time.h> <assert.h> <signal.h> <glib.h> + <sys/stat.h> <sys/time.h> <assert.h> <signal.h> <glib.h> <sys/mman.h> + "sysemu/os-posix.h, sysemu/os-win32.h "glib-compat.h" + "qemu/typedefs.h" + ))' "$f" + +done + +if [ "$DUPHEAD" = "yes" ]; then + egrep "^[[:space:]]*#[[:space:]]*include" "$@" | tr -d '[:blank:]' \ + | sort | uniq -c | awk '{if ($1 > 1) print $0}' + if [ $? -eq 0 ]; then + echo "Found duplicate header file includes. Please check the above files manually." + exit 1 + fi +fi + +if [ "$GIT" = "yes" ]; then + git add -- "$@" + git commit --signoff -F - <<EOF +$GITSUBJ: Clean up includes + +Clean up includes so that osdep.h is included first and headers +which it implies are not included manually. + +This commit was created with scripts/clean-includes. + +EOF + +fi diff --git a/scripts/cleanup-trace-events.pl b/scripts/cleanup-trace-events.pl new file mode 100755 index 000000000..c40d2fcc5 --- /dev/null +++ b/scripts/cleanup-trace-events.pl @@ -0,0 +1,65 @@ +#!/usr/bin/env perl +# Copyright (C) 2013 Red Hat, Inc. +# +# Authors: +# Markus Armbruster <armbru@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +# Usage: cleanup-trace-events.pl trace-events +# +# Print cleaned up trace-events to standard output. + +use warnings; +use strict; +use File::Basename; + +my @files = (); +my $events = ''; +my %seen = (); + +sub out { + print sort @files; + print $events; + @files = (); + $events = ''; + %seen = (); +} + +$#ARGV == 0 or die "usage: $0 FILE"; +my $in = $ARGV[0]; +my $dir = dirname($in); +open(IN, $in) or die "open $in: $!"; +chdir($dir) or die "chdir $dir: $!"; + +while (<IN>) { + if (/^(disable |(tcg) |(vcpu) )*([a-z_0-9]+)\(/i) { + my $pat = "trace_$4"; + $pat .= '_tcg' if defined $2; + open GREP, '-|', 'git', 'grep', '-lw', + defined $3 ? () : ('--max-depth', '1'), + $pat + or die "run git grep: $!"; + while (my $fname = <GREP>) { + chomp $fname; + next if $seen{$fname} || $fname eq 'trace-events'; + $seen{$fname} = 1; + push @files, "# $fname\n"; + } + unless (close GREP) { + die "close git grep: $!" + if $!; + next; + } + } elsif (/^# ([^ ]*\.[ch])$/) { + out; + next; + } elsif (!/^#|^$/) { + warn "unintelligible line"; + } + $events .= $_; +} + +out; +close(IN) or die "close $in: $!"; diff --git a/scripts/cocci-macro-file.h b/scripts/cocci-macro-file.h new file mode 100644 index 000000000..20eea6b70 --- /dev/null +++ b/scripts/cocci-macro-file.h @@ -0,0 +1,118 @@ +/* Macro file for Coccinelle + * + * Copyright (C) 2015 Red Hat, Inc. + * + * Authors: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or, at your + * option, any later version. See the COPYING file in the top-level directory. + */ + +/* Coccinelle only does limited parsing of headers, and chokes on some idioms + * defined in compiler.h and queue.h. Macros that Coccinelle must know about + * in order to parse .c files must be in a separate macro file---which is + * exactly what you're staring at now. + * + * To use this file, add the "--macro-file scripts/cocci-macro-file.h" to the + * Coccinelle command line. + */ + +/* From qemu/compiler.h */ +#define QEMU_NORETURN __attribute__ ((__noreturn__)) +#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +#define QEMU_SENTINEL __attribute__((sentinel)) + +#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) +# define QEMU_PACKED __attribute__((gcc_struct, packed)) +#else +# define QEMU_PACKED __attribute__((packed)) +#endif + +#define cat(x,y) x ## y +#define cat2(x,y) cat(x,y) +#define QEMU_BUILD_BUG_ON(x) \ + typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1] __attribute__((unused)); + +#define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m))) + +#define xglue(x, y) x ## y +#define glue(x, y) xglue(x, y) +#define stringify(s) tostring(s) +#define tostring(s) #s + +#define typeof_field(type, field) typeof(((type *)0)->field) +#define type_check(t1,t2) ((t1*)0 - (t2*)0) + +/* From qemu/queue.h */ + +#define QLIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define QLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define QLIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * Singly-linked List definitions. + */ +#define QSLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define QSLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define QSLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Simple queue definitions. + */ +#define QSIMPLEQ_HEAD(name, type) \ +struct name { \ + struct type *sqh_first; /* first element */ \ + struct type **sqh_last; /* addr of last next element */ \ +} + +#define QSIMPLEQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).sqh_first } + +#define QSIMPLEQ_ENTRY(type) \ +struct { \ + struct type *sqe_next; /* next element */ \ +} + +/* + * Tail queue definitions. + */ +#define QTAILQ_HEAD(name, type) \ +union name { \ + struct type *tqh_first; /* first element */ \ + QTailQLink tqh_circ; /* link for last element */ \ +} + +#define QTAILQ_HEAD_INITIALIZER(head) \ + { .tqh_circ = { NULL, &(head).tqh_circ } } + +#define QTAILQ_ENTRY(type) \ +union { \ + struct type *tqe_next; /* next element */ \ + QTailQLink tqe_circ; /* link for prev element */ \ +} + +/* From glib */ +#define g_assert_cmpint(a, op, b) g_assert(a op b) +#define g_assert_cmpuint(a, op, b) g_assert(a op b) +#define g_assert_cmphex(a, op, b) g_assert(a op b) +#define g_assert_cmpstr(a, op, b) g_assert(strcmp(a, b) op 0) diff --git a/scripts/coccinelle/cpu-reset.cocci b/scripts/coccinelle/cpu-reset.cocci new file mode 100644 index 000000000..396a724e5 --- /dev/null +++ b/scripts/coccinelle/cpu-reset.cocci @@ -0,0 +1,47 @@ +// Convert targets using the old CPUState reset to DeviceState reset +// +// Copyright Linaro Ltd 2020 +// This work is licensed under the terms of the GNU GPLv2 or later. +// +// spatch --macro-file scripts/cocci-macro-file.h \ +// --sp-file scripts/coccinelle/cpu-reset.cocci \ +// --keep-comments --smpl-spacing --in-place --include-headers --dir target +// +// For simplicity we assume some things about the code we're modifying +// that happen to be true for all our targets: +// * all cpu_class_set_parent_reset() callsites have a 'DeviceClass *dc' local +// * the parent reset field in the target CPU class is 'parent_reset' +// * no reset function already has a 'dev' local + +@@ +identifier cpu, x; +typedef CPUState; +@@ +struct x { +... +- void (*parent_reset)(CPUState *cpu); ++ DeviceReset parent_reset; +... +}; +@ rule1 @ +identifier resetfn; +expression resetfield; +identifier cc; +@@ +- cpu_class_set_parent_reset(cc, resetfn, resetfield) ++ device_class_set_parent_reset(dc, resetfn, resetfield) +@@ +identifier rule1.resetfn; +identifier cpu, cc; +typedef CPUState, DeviceState; +@@ +-resetfn(CPUState *cpu) +-{ ++resetfn(DeviceState *dev) ++{ ++ CPUState *cpu = CPU(dev); +<... +- cc->parent_reset(cpu); ++ cc->parent_reset(dev); +...> +} diff --git a/scripts/coccinelle/cpu_restore_state.cocci b/scripts/coccinelle/cpu_restore_state.cocci new file mode 100644 index 000000000..61bc749d1 --- /dev/null +++ b/scripts/coccinelle/cpu_restore_state.cocci @@ -0,0 +1,19 @@ +// Remove unneeded tests before calling cpu_restore_state +// +// spatch --macro-file scripts/cocci-macro-file.h \ +// --sp-file ./scripts/coccinelle/cpu_restore_state.cocci \ +// --keep-comments --in-place --use-gitgrep --dir target +@@ +expression A; +expression C; +@@ +-if (A) { + cpu_restore_state(C, A); +-} +@@ +expression A; +expression C; +@@ +- cpu_restore_state(C, A); +- cpu_loop_exit(C); ++ cpu_loop_exit_restore(C, A); diff --git a/scripts/coccinelle/err-bad-newline.cocci b/scripts/coccinelle/err-bad-newline.cocci new file mode 100644 index 000000000..539442187 --- /dev/null +++ b/scripts/coccinelle/err-bad-newline.cocci @@ -0,0 +1,49 @@ +// Error messages should not contain newlines. This script finds +// messages that do. Fixing them is manual. +@r@ +expression errp, err, eno, cls, fmt, ap; +position p; +@@ +( +error_vreport(fmt, ap)@p +| +warn_vreport(fmt, ap)@p +| +info_vreport(fmt, ap)@p +| +error_report(fmt, ...)@p +| +warn_report(fmt, ...)@p +| +info_report(fmt, ...)@p +| +error_report_once(fmt, ...)@p +| +warn_report_once(fmt, ...)@p +| +error_setg(errp, fmt, ...)@p +| +error_setg_errno(errp, eno, fmt, ...)@p +| +error_setg_win32(errp, eno, cls, fmt, ...)@p +| +error_propagate_prepend(errp, err, fmt, ...)@p +| +error_vprepend(errp, fmt, ap)@p +| +error_prepend(errp, fmt, ...)@p +| +error_setg_file_open(errp, eno, cls, fmt, ...)@p +| +warn_reportf_err(errp, fmt, ...)@p +| +error_reportf_err(errp, fmt, ...)@p +| +error_set(errp, cls, fmt, ...)@p +) +@script:python@ +fmt << r.fmt; +p << r.p; +@@ +if "\\n" in str(fmt): + print("%s:%s:%s:%s" % (p[0].file, p[0].line, p[0].column, fmt)) diff --git a/scripts/coccinelle/error-use-after-free.cocci b/scripts/coccinelle/error-use-after-free.cocci new file mode 100644 index 000000000..72ae9fdeb --- /dev/null +++ b/scripts/coccinelle/error-use-after-free.cocci @@ -0,0 +1,52 @@ +// Find and fix trivial use-after-free of Error objects +// +// Copyright (c) 2020 Virtuozzo International GmbH. +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see +// <http://www.gnu.org/licenses/>. +// +// How to use: +// spatch --sp-file scripts/coccinelle/error-use-after-free.cocci \ +// --macro-file scripts/cocci-macro-file.h --in-place \ +// --no-show-diff ( FILES... | --use-gitgrep . ) + +@ exists@ +identifier fn, fn2; +expression err; +@@ + + fn(...) + { + <... +( + error_free(err); ++ err = NULL; +| + error_report_err(err); ++ err = NULL; +| + error_reportf_err(err, ...); ++ err = NULL; +| + warn_report_err(err); ++ err = NULL; +| + warn_reportf_err(err, ...); ++ err = NULL; +) + ... when != err = NULL + when != exit(...) + fn2(..., err, ...) + ...> + } diff --git a/scripts/coccinelle/error_propagate_null.cocci b/scripts/coccinelle/error_propagate_null.cocci new file mode 100644 index 000000000..c23638007 --- /dev/null +++ b/scripts/coccinelle/error_propagate_null.cocci @@ -0,0 +1,10 @@ +// error_propagate() already ignores local_err==NULL, so there's +// no need to check it before calling. + +@@ +identifier L; +expression E; +@@ +-if (L) { + error_propagate(E, L); +-} diff --git a/scripts/coccinelle/errp-guard.cocci b/scripts/coccinelle/errp-guard.cocci new file mode 100644 index 000000000..6e789acf2 --- /dev/null +++ b/scripts/coccinelle/errp-guard.cocci @@ -0,0 +1,336 @@ +// Use ERRP_GUARD() (see include/qapi/error.h) +// +// Copyright (c) 2020 Virtuozzo International GmbH. +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see +// <http://www.gnu.org/licenses/>. +// +// Usage example: +// spatch --sp-file scripts/coccinelle/errp-guard.cocci \ +// --macro-file scripts/cocci-macro-file.h --in-place \ +// --no-show-diff --max-width 80 FILES... +// +// Note: --max-width 80 is needed because coccinelle default is less +// than 80, and without this parameter coccinelle may reindent some +// lines which fit into 80 characters but not to coccinelle default, +// which in turn produces extra patch hunks for no reason. + +// Switch unusual Error ** parameter names to errp +// (this is necessary to use ERRP_GUARD). +// +// Disable optional_qualifier to skip functions with +// "Error *const *errp" parameter. +// +// Skip functions with "assert(_errp && *_errp)" statement, because +// that signals unusual semantics, and the parameter name may well +// serve a purpose. (like nbd_iter_channel_error()). +// +// Skip util/error.c to not touch, for example, error_propagate() and +// error_propagate_prepend(). +@ depends on !(file in "util/error.c") disable optional_qualifier@ +identifier fn; +identifier _errp != errp; +@@ + + fn(..., +- Error **_errp ++ Error **errp + ,...) + { +( + ... when != assert(_errp && *_errp) +& + <... +- _errp ++ errp + ...> +) + } + +// Add invocation of ERRP_GUARD() to errp-functions where // necessary +// +// Note, that without "when any" the final "..." does not mach +// something matched by previous pattern, i.e. the rule will not match +// double error_prepend in control flow like in +// vfio_set_irq_signaling(). +// +// Note, "exists" says that we want apply rule even if it does not +// match on all possible control flows (otherwise, it will not match +// standard pattern when error_propagate() call is in if branch). +@ disable optional_qualifier exists@ +identifier fn, local_err; +symbol errp; +@@ + + fn(..., Error **errp, ...) + { ++ ERRP_GUARD(); + ... when != ERRP_GUARD(); +( +( + error_append_hint(errp, ...); +| + error_prepend(errp, ...); +| + error_vprepend(errp, ...); +) + ... when any +| + Error *local_err = NULL; + ... +( + error_propagate_prepend(errp, local_err, ...); +| + error_propagate(errp, local_err); +) + ... +) + } + +// Warn when several Error * definitions are in the control flow. +// This rule is not chained to rule1 and less restrictive, to cover more +// functions to warn (even those we are not going to convert). +// +// Note, that even with one (or zero) Error * definition in the each +// control flow we may have several (in total) Error * definitions in +// the function. This case deserves attention too, but I don't see +// simple way to match with help of coccinelle. +@check1 disable optional_qualifier exists@ +identifier fn, _errp, local_err, local_err2; +position p1, p2; +@@ + + fn(..., Error **_errp, ...) + { + ... + Error *local_err = NULL;@p1 + ... when any + Error *local_err2 = NULL;@p2 + ... when any + } + +@ script:python @ +fn << check1.fn; +p1 << check1.p1; +p2 << check1.p2; +@@ + +print('Warning: function {} has several definitions of ' + 'Error * local variable: at {}:{} and then at {}:{}'.format( + fn, p1[0].file, p1[0].line, p2[0].file, p2[0].line)) + +// Warn when several propagations are in the control flow. +@check2 disable optional_qualifier exists@ +identifier fn, _errp; +position p1, p2; +@@ + + fn(..., Error **_errp, ...) + { + ... +( + error_propagate_prepend(_errp, ...);@p1 +| + error_propagate(_errp, ...);@p1 +) + ... +( + error_propagate_prepend(_errp, ...);@p2 +| + error_propagate(_errp, ...);@p2 +) + ... when any + } + +@ script:python @ +fn << check2.fn; +p1 << check2.p1; +p2 << check2.p2; +@@ + +print('Warning: function {} propagates to errp several times in ' + 'one control flow: at {}:{} and then at {}:{}'.format( + fn, p1[0].file, p1[0].line, p2[0].file, p2[0].line)) + +// Match functions with propagation of local error to errp. +// We want to refer these functions in several following rules, but I +// don't know a proper way to inherit a function, not just its name +// (to not match another functions with same name in following rules). +// Not-proper way is as follows: rename errp parameter in functions +// header and match it in following rules. Rename it back after all +// transformations. +// +// The common case is a single definition of local_err with at most one +// error_propagate_prepend() or error_propagate() on each control-flow +// path. Functions with multiple definitions or propagates we want to +// examine manually. Rules check1 and check2 emit warnings to guide us +// to them. +// +// Note that we match not only this "common case", but any function, +// which has the "common case" on at least one control-flow path. +@rule1 disable optional_qualifier exists@ +identifier fn, local_err; +symbol errp; +@@ + + fn(..., Error ** +- errp ++ ____ + , ...) + { + ... + Error *local_err = NULL; + ... +( + error_propagate_prepend(errp, local_err, ...); +| + error_propagate(errp, local_err); +) + ... + } + +// Convert special case with goto separately. +// I tried merging this into the following rule the obvious way, but +// it made Coccinelle hang on block.c +// +// Note interesting thing: if we don't do it here, and try to fixup +// "out: }" things later after all transformations (the rule will be +// the same, just without error_propagate() call), coccinelle fails to +// match this "out: }". +@ disable optional_qualifier@ +identifier rule1.fn, rule1.local_err, out; +symbol errp; +@@ + + fn(..., Error ** ____, ...) + { + <... +- goto out; ++ return; + ...> +- out: +- error_propagate(errp, local_err); + } + +// Convert most of local_err related stuff. +// +// Note, that we inherit rule1.fn and rule1.local_err names, not +// objects themselves. We may match something not related to the +// pattern matched by rule1. For example, local_err may be defined with +// the same name in different blocks inside one function, and in one +// block follow the propagation pattern and in other block doesn't. +// +// Note also that errp-cleaning functions +// error_free_errp +// error_report_errp +// error_reportf_errp +// warn_report_errp +// warn_reportf_errp +// are not yet implemented. They must call corresponding Error* - +// freeing function and then set *errp to NULL, to avoid further +// propagation to original errp (consider ERRP_GUARD in use). +// For example, error_free_errp may look like this: +// +// void error_free_errp(Error **errp) +// { +// error_free(*errp); +// *errp = NULL; +// } +@ disable optional_qualifier exists@ +identifier rule1.fn, rule1.local_err; +expression list args; +symbol errp; +@@ + + fn(..., Error ** ____, ...) + { + <... +( +- Error *local_err = NULL; +| + +// Convert error clearing functions +( +- error_free(local_err); ++ error_free_errp(errp); +| +- error_report_err(local_err); ++ error_report_errp(errp); +| +- error_reportf_err(local_err, args); ++ error_reportf_errp(errp, args); +| +- warn_report_err(local_err); ++ warn_report_errp(errp); +| +- warn_reportf_err(local_err, args); ++ warn_reportf_errp(errp, args); +) +?- local_err = NULL; + +| +- error_propagate_prepend(errp, local_err, args); ++ error_prepend(errp, args); +| +- error_propagate(errp, local_err); +| +- &local_err ++ errp +) + ...> + } + +// Convert remaining local_err usage. For example, different kinds of +// error checking in if conditionals. We can't merge this into +// previous hunk, as this conflicts with other substitutions in it (at +// least with "- local_err = NULL"). +@ disable optional_qualifier@ +identifier rule1.fn, rule1.local_err; +symbol errp; +@@ + + fn(..., Error ** ____, ...) + { + <... +- local_err ++ *errp + ...> + } + +// Always use the same pattern for checking error +@ disable optional_qualifier@ +identifier rule1.fn; +symbol errp; +@@ + + fn(..., Error ** ____, ...) + { + <... +- *errp != NULL ++ *errp + ...> + } + +// Revert temporary ___ identifier. +@ disable optional_qualifier@ +identifier rule1.fn; +@@ + + fn(..., Error ** +- ____ ++ errp + , ...) + { + ... + } diff --git a/scripts/coccinelle/exec_rw_const.cocci b/scripts/coccinelle/exec_rw_const.cocci new file mode 100644 index 000000000..1a2029695 --- /dev/null +++ b/scripts/coccinelle/exec_rw_const.cocci @@ -0,0 +1,111 @@ +/* + Usage: + + spatch \ + --macro-file scripts/cocci-macro-file.h \ + --sp-file scripts/coccinelle/exec_rw_const.cocci \ + --keep-comments \ + --in-place \ + --dir . +*/ + +// Convert to boolean +@@ +expression E1, E2, E3, E4, E5; +@@ +( +- address_space_rw(E1, E2, E3, E4, E5, 0) ++ address_space_rw(E1, E2, E3, E4, E5, false) +| +- address_space_rw(E1, E2, E3, E4, E5, 1) ++ address_space_rw(E1, E2, E3, E4, E5, true) +| + +- cpu_physical_memory_rw(E1, E2, E3, 0) ++ cpu_physical_memory_rw(E1, E2, E3, false) +| +- cpu_physical_memory_rw(E1, E2, E3, 1) ++ cpu_physical_memory_rw(E1, E2, E3, true) +| + +- cpu_physical_memory_map(E1, E2, 0) ++ cpu_physical_memory_map(E1, E2, false) +| +- cpu_physical_memory_map(E1, E2, 1) ++ cpu_physical_memory_map(E1, E2, true) +) + +// Use address_space_write instead of casting to non-const +@@ +type T; +const T *V; +expression E1, E2, E3, E4; +@@ +( +- address_space_rw(E1, E2, E3, (T *)V, E4, 1) ++ address_space_write(E1, E2, E3, V, E4) +| +- address_space_rw(E1, E2, E3, (void *)V, E4, 1) ++ address_space_write(E1, E2, E3, V, E4) +) + +// Avoid uses of address_space_rw() with a constant is_write argument. +@@ +expression E1, E2, E3, E4, E5; +symbol true, false; +@@ +( +- address_space_rw(E1, E2, E3, E4, E5, false) ++ address_space_read(E1, E2, E3, E4, E5) +| +- address_space_rw(E1, E2, E3, E4, E5, true) ++ address_space_write(E1, E2, E3, E4, E5) +) + +// Avoid uses of cpu_physical_memory_rw() with a constant is_write argument. +@@ +expression E1, E2, E3; +@@ +( +- cpu_physical_memory_rw(E1, E2, E3, false) ++ cpu_physical_memory_read(E1, E2, E3) +| +- cpu_physical_memory_rw(E1, E2, E3, true) ++ cpu_physical_memory_write(E1, E2, E3) +) + +// Remove useless cast +@@ +expression E1, E2, E3, E4, E5, E6; +type T; +@@ +( +- address_space_rw(E1, E2, E3, (T *)(E4), E5, E6) ++ address_space_rw(E1, E2, E3, E4, E5, E6) +| +- address_space_read(E1, E2, E3, (T *)(E4), E5) ++ address_space_read(E1, E2, E3, E4, E5) +| +- address_space_write(E1, E2, E3, (T *)(E4), E5) ++ address_space_write(E1, E2, E3, E4, E5) +| +- address_space_write_rom(E1, E2, E3, (T *)(E4), E5) ++ address_space_write_rom(E1, E2, E3, E4, E5) +| + +- cpu_physical_memory_rw(E1, (T *)(E2), E3, E4) ++ cpu_physical_memory_rw(E1, E2, E3, E4) +| +- cpu_physical_memory_read(E1, (T *)(E2), E3) ++ cpu_physical_memory_read(E1, E2, E3) +| +- cpu_physical_memory_write(E1, (T *)(E2), E3) ++ cpu_physical_memory_write(E1, E2, E3) +| + +- dma_memory_read(E1, E2, (T *)(E3), E4) ++ dma_memory_read(E1, E2, E3, E4) +| +- dma_memory_write(E1, E2, (T *)(E3), E4) ++ dma_memory_write(E1, E2, E3, E4) +) diff --git a/scripts/coccinelle/inplace-byteswaps.cocci b/scripts/coccinelle/inplace-byteswaps.cocci new file mode 100644 index 000000000..a869a90cb --- /dev/null +++ b/scripts/coccinelle/inplace-byteswaps.cocci @@ -0,0 +1,65 @@ +// Replace uses of in-place byteswapping functions with calls to the +// equivalent not-in-place functions. This is necessary to avoid +// undefined behaviour if the expression being swapped is a field in a +// packed struct. + +@@ +expression E; +@@ +-be16_to_cpus(&E); ++E = be16_to_cpu(E); +@@ +expression E; +@@ +-be32_to_cpus(&E); ++E = be32_to_cpu(E); +@@ +expression E; +@@ +-be64_to_cpus(&E); ++E = be64_to_cpu(E); +@@ +expression E; +@@ +-cpu_to_be16s(&E); ++E = cpu_to_be16(E); +@@ +expression E; +@@ +-cpu_to_be32s(&E); ++E = cpu_to_be32(E); +@@ +expression E; +@@ +-cpu_to_be64s(&E); ++E = cpu_to_be64(E); +@@ +expression E; +@@ +-le16_to_cpus(&E); ++E = le16_to_cpu(E); +@@ +expression E; +@@ +-le32_to_cpus(&E); ++E = le32_to_cpu(E); +@@ +expression E; +@@ +-le64_to_cpus(&E); ++E = le64_to_cpu(E); +@@ +expression E; +@@ +-cpu_to_le16s(&E); ++E = cpu_to_le16(E); +@@ +expression E; +@@ +-cpu_to_le32s(&E); ++E = cpu_to_le32(E); +@@ +expression E; +@@ +-cpu_to_le64s(&E); ++E = cpu_to_le64(E); diff --git a/scripts/coccinelle/memory-region-housekeeping.cocci b/scripts/coccinelle/memory-region-housekeeping.cocci new file mode 100644 index 000000000..29651ebde --- /dev/null +++ b/scripts/coccinelle/memory-region-housekeeping.cocci @@ -0,0 +1,159 @@ +/* + Usage: + + spatch \ + --macro-file scripts/cocci-macro-file.h \ + --sp-file scripts/coccinelle/memory-region-housekeeping.cocci \ + --keep-comments \ + --in-place \ + --dir . + +*/ + + +// Replace memory_region_init_ram(readonly) by memory_region_init_rom() +@@ +expression E1, E2, E3, E4, E5; +symbol true; +@@ +( +- memory_region_init_ram(E1, E2, E3, E4, E5); ++ memory_region_init_rom(E1, E2, E3, E4, E5); + ... WHEN != E1 +- memory_region_set_readonly(E1, true); +| +- memory_region_init_ram_nomigrate(E1, E2, E3, E4, E5); ++ memory_region_init_rom_nomigrate(E1, E2, E3, E4, E5); + ... WHEN != E1 +- memory_region_set_readonly(E1, true); +) + + +@possible_memory_region_init_rom@ +expression E1, E2, E3, E4, E5; +position p; +@@ +( + memory_region_init_ram@p(E1, E2, E3, E4, E5); + ... + memory_region_set_readonly(E1, true); +| + memory_region_init_ram_nomigrate@p(E1, E2, E3, E4, E5); + ... + memory_region_set_readonly(E1, true); +) +@script:python@ +p << possible_memory_region_init_rom.p; +@@ +cocci.print_main("potential use of memory_region_init_rom*() in ", p) + + +// Do not call memory_region_set_readonly() on ROM alias +@@ +expression ROM, E1, E2, E3, E4; +expression ALIAS, E5, E6, E7, E8; +@@ +( + memory_region_init_rom(ROM, E1, E2, E3, E4); +| + memory_region_init_rom_nomigrate(ROM, E1, E2, E3, E4); +) + ... + memory_region_init_alias(ALIAS, E5, E6, ROM, E7, E8); +- memory_region_set_readonly(ALIAS, true); + + +// Replace by-hand memory_region_init_ram_nomigrate/vmstate_register_ram +// code sequences with use of the new memory_region_init_ram function. +// Similarly for the _rom and _rom_device functions. +// We don't try to replace sequences with a non-NULL owner, because +// there are none in the tree that can be automatically converted +// (and only a handful that can be manually converted). +@@ +expression MR; +expression NAME; +expression SIZE; +expression ERRP; +@@ +-memory_region_init_ram_nomigrate(MR, NULL, NAME, SIZE, ERRP); ++memory_region_init_ram(MR, NULL, NAME, SIZE, ERRP); + ... +-vmstate_register_ram_global(MR); +@@ +expression MR; +expression NAME; +expression SIZE; +expression ERRP; +@@ +-memory_region_init_rom_nomigrate(MR, NULL, NAME, SIZE, ERRP); ++memory_region_init_rom(MR, NULL, NAME, SIZE, ERRP); + ... +-vmstate_register_ram_global(MR); +@@ +expression MR; +expression OPS; +expression OPAQUE; +expression NAME; +expression SIZE; +expression ERRP; +@@ +-memory_region_init_rom_device_nomigrate(MR, NULL, OPS, OPAQUE, NAME, SIZE, ERRP); ++memory_region_init_rom_device(MR, NULL, OPS, OPAQUE, NAME, SIZE, ERRP); + ... +-vmstate_register_ram_global(MR); + + +// Device is owner +@@ +typedef DeviceState; +identifier device_fn, dev, obj; +expression E1, E2, E3, E4, E5; +@@ +static void device_fn(DeviceState *dev, ...) +{ + ... + Object *obj = OBJECT(dev); + <+... +( +- memory_region_init(E1, NULL, E2, E3); ++ memory_region_init(E1, obj, E2, E3); +| +- memory_region_init_io(E1, NULL, E2, E3, E4, E5); ++ memory_region_init_io(E1, obj, E2, E3, E4, E5); +| +- memory_region_init_alias(E1, NULL, E2, E3, E4, E5); ++ memory_region_init_alias(E1, obj, E2, E3, E4, E5); +| +- memory_region_init_rom(E1, NULL, E2, E3, E4); ++ memory_region_init_rom(E1, obj, E2, E3, E4); +| +- memory_region_init_ram_flags_nomigrate(E1, NULL, E2, E3, E4, E5); ++ memory_region_init_ram_flags_nomigrate(E1, obj, E2, E3, E4, E5); +) + ...+> +} +@@ +identifier device_fn, dev; +expression E1, E2, E3, E4, E5; +@@ +static void device_fn(DeviceState *dev, ...) +{ + <+... +( +- memory_region_init(E1, NULL, E2, E3); ++ memory_region_init(E1, OBJECT(dev), E2, E3); +| +- memory_region_init_io(E1, NULL, E2, E3, E4, E5); ++ memory_region_init_io(E1, OBJECT(dev), E2, E3, E4, E5); +| +- memory_region_init_alias(E1, NULL, E2, E3, E4, E5); ++ memory_region_init_alias(E1, OBJECT(dev), E2, E3, E4, E5); +| +- memory_region_init_rom(E1, NULL, E2, E3, E4); ++ memory_region_init_rom(E1, OBJECT(dev), E2, E3, E4); +| +- memory_region_init_ram_flags_nomigrate(E1, NULL, E2, E3, E4, E5); ++ memory_region_init_ram_flags_nomigrate(E1, OBJECT(dev), E2, E3, E4, E5); +) + ...+> +} diff --git a/scripts/coccinelle/overflow_muldiv64.cocci b/scripts/coccinelle/overflow_muldiv64.cocci new file mode 100644 index 000000000..08ec4a8de --- /dev/null +++ b/scripts/coccinelle/overflow_muldiv64.cocci @@ -0,0 +1,16 @@ +// Find muldiv64(i64, i64, x) for potential overflow +@filter@ +typedef uint64_t; +typedef int64_t; +{ uint64_t, int64_t, long, unsigned long } a, b; +expression c; +position p; +@@ + +muldiv64(a,b,c)@p + +@script:python@ +p << filter.p; +@@ + +cocci.print_main("potential muldiv64() overflow", p) diff --git a/scripts/coccinelle/qobject.cocci b/scripts/coccinelle/qobject.cocci new file mode 100644 index 000000000..9fee9c0d9 --- /dev/null +++ b/scripts/coccinelle/qobject.cocci @@ -0,0 +1,47 @@ +// Use QDict macros where they make sense +@@ +expression Obj, Key, E; +@@ +( +- qobject_ref(QOBJECT(E)); ++ qobject_ref(E); +| +- qobject_unref(QOBJECT(E)); ++ qobject_unref(E); +| +- qdict_put_obj(Obj, Key, QOBJECT(E)); ++ qdict_put(Obj, Key, E); +| +- qdict_put(Obj, Key, qnum_from_int(E)); ++ qdict_put_int(Obj, Key, E); +| +- qdict_put(Obj, Key, qbool_from_bool(E)); ++ qdict_put_bool(Obj, Key, E); +| +- qdict_put(Obj, Key, qstring_from_str(E)); ++ qdict_put_str(Obj, Key, E); +| +- qdict_put(Obj, Key, qnull()); ++ qdict_put_null(Obj, Key); +) + +// Use QList macros where they make sense +@@ +expression Obj, E; +@@ +( +- qlist_append_obj(Obj, QOBJECT(E)); ++ qlist_append(Obj, E); +| +- qlist_append(Obj, qnum_from_int(E)); ++ qlist_append_int(Obj, E); +| +- qlist_append(Obj, qbool_from_bool(E)); ++ qlist_append_bool(Obj, E); +| +- qlist_append(Obj, qstring_from_str(E)); ++ qlist_append_str(Obj, E); +| +- qlist_append(Obj, qnull()); ++ qlist_append_null(Obj); +) diff --git a/scripts/coccinelle/qom-parent-type.cocci b/scripts/coccinelle/qom-parent-type.cocci new file mode 100644 index 000000000..9afb3edd9 --- /dev/null +++ b/scripts/coccinelle/qom-parent-type.cocci @@ -0,0 +1,26 @@ +// Highlight object declarations that don't look like object class but +// accidentally inherit from it. + +@match@ +identifier obj_t, fld; +type parent_t =~ ".*Class$"; +@@ +struct obj_t { + parent_t fld; + ... +}; + +@script:python filter depends on match@ +obj_t << match.obj_t; +@@ +is_class_obj = obj_t.endswith('Class') +cocci.include_match(not is_class_obj) + +@replacement depends on filter@ +identifier match.obj_t, match.fld; +type match.parent_t; +@@ +struct obj_t { +* parent_t fld; + ... +}; diff --git a/scripts/coccinelle/remove_local_err.cocci b/scripts/coccinelle/remove_local_err.cocci new file mode 100644 index 000000000..9261c9968 --- /dev/null +++ b/scripts/coccinelle/remove_local_err.cocci @@ -0,0 +1,29 @@ +// Replace unnecessary usage of local_err variable with +// direct usage of errp argument + +@@ +identifier F; +expression list ARGS; +expression F2; +identifier LOCAL_ERR; +identifier ERRP; +idexpression V; +typedef Error; +@@ + F(..., Error **ERRP) + { + ... +- Error *LOCAL_ERR; + ... when != LOCAL_ERR + when != ERRP +( +- F2(ARGS, &LOCAL_ERR); +- error_propagate(ERRP, LOCAL_ERR); ++ F2(ARGS, ERRP); +| +- V = F2(ARGS, &LOCAL_ERR); +- error_propagate(ERRP, LOCAL_ERR); ++ V = F2(ARGS, ERRP); +) + ... when != LOCAL_ERR + } diff --git a/scripts/coccinelle/remove_muldiv64.cocci b/scripts/coccinelle/remove_muldiv64.cocci new file mode 100644 index 000000000..4c10bd57d --- /dev/null +++ b/scripts/coccinelle/remove_muldiv64.cocci @@ -0,0 +1,6 @@ +// replace muldiv64(a, 1, b) by "a / b" +@@ +expression a, b; +@@ +-muldiv64(a, 1, b) ++a / b diff --git a/scripts/coccinelle/return_directly.cocci b/scripts/coccinelle/return_directly.cocci new file mode 100644 index 000000000..4cf50e75e --- /dev/null +++ b/scripts/coccinelle/return_directly.cocci @@ -0,0 +1,19 @@ +// replace 'R = X; return R;' with 'return X;' +@@ +identifier VAR; +expression E; +type T; +identifier F; +@@ + T F(...) + { + ... +- T VAR; + ... when != VAR + +- VAR = ++ return + E; +- return VAR; + ... when != VAR + } diff --git a/scripts/coccinelle/round.cocci b/scripts/coccinelle/round.cocci new file mode 100644 index 000000000..ed0677328 --- /dev/null +++ b/scripts/coccinelle/round.cocci @@ -0,0 +1,19 @@ +// Use macro DIV_ROUND_UP instead of (((n) + (d) - 1) /(d)) +@@ +expression e1; +expression e2; +@@ +( +- ((e1) + e2 - 1) / (e2) ++ DIV_ROUND_UP(e1,e2) +| +- ((e1) + (e2 - 1)) / (e2) ++ DIV_ROUND_UP(e1,e2) +) + +@@ +expression e1; +expression e2; +@@ +-(DIV_ROUND_UP(e1,e2)) ++DIV_ROUND_UP(e1,e2) diff --git a/scripts/coccinelle/simplify_muldiv64.cocci b/scripts/coccinelle/simplify_muldiv64.cocci new file mode 100644 index 000000000..3d7c9744a --- /dev/null +++ b/scripts/coccinelle/simplify_muldiv64.cocci @@ -0,0 +1,11 @@ +// replace muldiv64(i32, i32, x) by (uint64_t)i32 * i32 / x +@@ +typedef uint32_t; +typedef int32_t; +{ uint32_t, int32_t, int, unsigned int } a, b; +typedef uint64_t; +expression c; +@@ + +-muldiv64(a,b,c) ++(uint64_t) a * b / c diff --git a/scripts/coccinelle/swap_muldiv64.cocci b/scripts/coccinelle/swap_muldiv64.cocci new file mode 100644 index 000000000..b48b0d084 --- /dev/null +++ b/scripts/coccinelle/swap_muldiv64.cocci @@ -0,0 +1,13 @@ +// replace muldiv64(i32, i64, x) by muldiv64(i64, i32, x) +@@ +typedef uint64_t; +typedef int64_t; +typedef uint32_t; +typedef int32_t; +{ uint32_t, int32_t, int, unsigned int } a; +{ uint64_t, int64_t, long, unsigned long } b; +expression c; +@@ + +-muldiv64(a,b,c) ++muldiv64(b,a,c) diff --git a/scripts/coccinelle/tcg_gen_extract.cocci b/scripts/coccinelle/tcg_gen_extract.cocci new file mode 100644 index 000000000..c10c86348 --- /dev/null +++ b/scripts/coccinelle/tcg_gen_extract.cocci @@ -0,0 +1,107 @@ +// optimize TCG using extract op +// +// Copyright: (C) 2017 Philippe Mathieu-Daudé. GPLv2+. +// Confidence: High +// Options: --macro-file scripts/cocci-macro-file.h +// +// Nikunj A Dadhania optimization: +// http://lists.nongnu.org/archive/html/qemu-devel/2017-02/msg05211.html +// Aurelien Jarno optimization: +// http://lists.nongnu.org/archive/html/qemu-devel/2017-05/msg01466.html +// +// This script can be run either using spatch locally or via a docker image: +// +// $ spatch \ +// --macro-file scripts/cocci-macro-file.h \ +// --sp-file scripts/coccinelle/tcg_gen_extract.cocci \ +// --keep-comments --in-place \ +// --use-gitgrep --dir target +// +// $ docker run --rm -v $PWD:$PWD -w $PWD philmd/coccinelle \ +// --macro-file scripts/cocci-macro-file.h \ +// --sp-file scripts/coccinelle/tcg_gen_extract.cocci \ +// --keep-comments --in-place \ +// --use-gitgrep --dir target + +@initialize:python@ +@@ +import sys +fd = sys.stderr +def debug(msg="", trailer="\n"): + fd.write("[DBG] " + msg + trailer) +def low_bits_count(value): + bits_count = 0 + while (value & (1 << bits_count)): + bits_count += 1 + return bits_count +def Mn(order): # Mersenne number + return (1 << order) - 1 + +@match@ +identifier ret; +metavariable arg; +constant ofs, msk; +position shr_p, and_p; +@@ +( + tcg_gen_shri_i32@shr_p +| + tcg_gen_shri_i64@shr_p +| + tcg_gen_shri_tl@shr_p +)(ret, arg, ofs); +... WHEN != ret +( + tcg_gen_andi_i32@and_p +| + tcg_gen_andi_i64@and_p +| + tcg_gen_andi_tl@and_p +)(ret, ret, msk); + +@script:python verify_len depends on match@ +ret_s << match.ret; +msk_s << match.msk; +shr_p << match.shr_p; +extract_len; +@@ +is_optimizable = False +debug("candidate at %s:%s" % (shr_p[0].file, shr_p[0].line)) +try: # only eval integer, no #define like 'SR_M' (cpp did this, else some headers are missing). + msk_v = long(msk_s.strip("UL"), 0) + msk_b = low_bits_count(msk_v) + if msk_b == 0: + debug(" value: 0x%x low_bits: %d" % (msk_v, msk_b)) + else: + debug(" value: 0x%x low_bits: %d [Mersenne number: 0x%x]" % (msk_v, msk_b, Mn(msk_b))) + is_optimizable = Mn(msk_b) == msk_v # check low_bits + coccinelle.extract_len = "%d" % msk_b + debug(" candidate %s optimizable" % ("IS" if is_optimizable else "is NOT")) +except: + debug(" ERROR (check included headers?)") +cocci.include_match(is_optimizable) +debug() + +@replacement depends on verify_len@ +identifier match.ret; +metavariable match.arg; +constant match.ofs, match.msk; +position match.shr_p, match.and_p; +identifier verify_len.extract_len; +@@ +( +-tcg_gen_shri_i32@shr_p(ret, arg, ofs); ++tcg_gen_extract_i32(ret, arg, ofs, extract_len); +... WHEN != ret +-tcg_gen_andi_i32@and_p(ret, ret, msk); +| +-tcg_gen_shri_i64@shr_p(ret, arg, ofs); ++tcg_gen_extract_i64(ret, arg, ofs, extract_len); +... WHEN != ret +-tcg_gen_andi_i64@and_p(ret, ret, msk); +| +-tcg_gen_shri_tl@shr_p(ret, arg, ofs); ++tcg_gen_extract_tl(ret, arg, ofs, extract_len); +... WHEN != ret +-tcg_gen_andi_tl@and_p(ret, ret, msk); +) diff --git a/scripts/coccinelle/timer-del-timer-free.cocci b/scripts/coccinelle/timer-del-timer-free.cocci new file mode 100644 index 000000000..c3cfd4280 --- /dev/null +++ b/scripts/coccinelle/timer-del-timer-free.cocci @@ -0,0 +1,18 @@ +// Remove superfluous timer_del() calls +// +// Copyright Linaro Limited 2020 +// This work is licensed under the terms of the GNU GPLv2 or later. +// +// spatch --macro-file scripts/cocci-macro-file.h \ +// --sp-file scripts/coccinelle/timer-del-timer-free.cocci \ +// --in-place --dir . +// +// The timer_free() function now implicitly calls timer_del() +// for you, so calls to timer_del() immediately before the +// timer_free() of the same timer can be deleted. + +@@ +expression T; +@@ +-timer_del(T); + timer_free(T); diff --git a/scripts/coccinelle/typecast.cocci b/scripts/coccinelle/typecast.cocci new file mode 100644 index 000000000..be2183ee4 --- /dev/null +++ b/scripts/coccinelle/typecast.cocci @@ -0,0 +1,7 @@ +// Remove useless casts +@@ +type T; +T v; +@@ +- (T *)&v ++ &v diff --git a/scripts/coccinelle/use-error_fatal.cocci b/scripts/coccinelle/use-error_fatal.cocci new file mode 100644 index 000000000..10fff0aec --- /dev/null +++ b/scripts/coccinelle/use-error_fatal.cocci @@ -0,0 +1,20 @@ +@@ +type T; +identifier FUN, RET; +expression list ARGS; +expression ERR, EC, FAIL; +@@ +( +- T RET = FUN(ARGS, &ERR); ++ T RET = FUN(ARGS, &error_fatal); +| +- RET = FUN(ARGS, &ERR); ++ RET = FUN(ARGS, &error_fatal); +| +- FUN(ARGS, &ERR); ++ FUN(ARGS, &error_fatal); +) +- if (FAIL) { +- error_report_err(ERR); +- exit(EC); +- } diff --git a/scripts/codeconverter/codeconverter/__init__.py b/scripts/codeconverter/codeconverter/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/scripts/codeconverter/codeconverter/__init__.py diff --git a/scripts/codeconverter/codeconverter/patching.py b/scripts/codeconverter/codeconverter/patching.py new file mode 100644 index 000000000..9e92505d3 --- /dev/null +++ b/scripts/codeconverter/codeconverter/patching.py @@ -0,0 +1,466 @@ +# Copyright (C) 2020 Red Hat Inc. +# +# Authors: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +from typing import IO, Match, NamedTuple, Optional, Literal, Iterable, Type, Dict, List, Any, TypeVar, NewType, Tuple, Union +from pathlib import Path +from itertools import chain +from tempfile import NamedTemporaryFile +import os +import re +import subprocess +from io import StringIO + +import logging +logger = logging.getLogger(__name__) +DBG = logger.debug +INFO = logger.info +WARN = logger.warning +ERROR = logger.error + +from .utils import * + +T = TypeVar('T') + +class Patch(NamedTuple): + # start inside file.original_content + start: int + # end position inside file.original_content + end: int + # replacement string for file.original_content[start:end] + replacement: str + +IdentifierType = Literal['type', 'symbol', 'include', 'constant'] +class RequiredIdentifier(NamedTuple): + type: IdentifierType + name: str + +class FileMatch: + """Base class for regex matches + + Subclasses just need to set the `regexp` class attribute + """ + regexp: Optional[str] = None + + def __init__(self, f: 'FileInfo', m: Match) -> None: + self.file: 'FileInfo' = f + self.match: Match[str] = m + + @property + def name(self) -> str: + if 'name' not in self.match.groupdict(): + return '[no name]' + return self.group('name') + + @classmethod + def compiled_re(klass): + return re.compile(klass.regexp, re.MULTILINE) + + def start(self) -> int: + return self.match.start() + + def end(self) -> int: + return self.match.end() + + def line_col(self) -> LineAndColumn: + return self.file.line_col(self.start()) + + def group(self, group: Union[int, str]) -> str: + return self.match.group(group) + + def getgroup(self, group: str) -> Optional[str]: + if group not in self.match.groupdict(): + return None + return self.match.group(group) + + def log(self, level, fmt, *args) -> None: + pos = self.line_col() + logger.log(level, '%s:%d:%d: '+fmt, self.file.filename, pos.line, pos.col, *args) + + def debug(self, fmt, *args) -> None: + self.log(logging.DEBUG, fmt, *args) + + def info(self, fmt, *args) -> None: + self.log(logging.INFO, fmt, *args) + + def warn(self, fmt, *args) -> None: + self.log(logging.WARNING, fmt, *args) + + def error(self, fmt, *args) -> None: + self.log(logging.ERROR, fmt, *args) + + def sub(self, original: str, replacement: str) -> str: + """Replace content + + XXX: this won't use the match position, but will just + replace all strings that look like the original match. + This should be enough for all the patterns used in this + script. + """ + return original.replace(self.group(0), replacement) + + def sanity_check(self) -> None: + """Sanity check match, and print warnings if necessary""" + pass + + def replacement(self) -> Optional[str]: + """Return replacement text for pattern, to use new code conventions""" + return None + + def make_patch(self, replacement: str) -> 'Patch': + """Make patch replacing the content of this match""" + return Patch(self.start(), self.end(), replacement) + + def make_subpatch(self, start: int, end: int, replacement: str) -> 'Patch': + return Patch(self.start() + start, self.start() + end, replacement) + + def make_removal_patch(self) -> 'Patch': + """Make patch removing contents of match completely""" + return self.make_patch('') + + def append(self, s: str) -> 'Patch': + """Make patch appending string after this match""" + return Patch(self.end(), self.end(), s) + + def prepend(self, s: str) -> 'Patch': + """Make patch prepending string before this match""" + return Patch(self.start(), self.start(), s) + + def gen_patches(self) -> Iterable['Patch']: + """Patch source code contents to use new code patterns""" + replacement = self.replacement() + if replacement is not None: + yield self.make_patch(replacement) + + @classmethod + def has_replacement_rule(klass) -> bool: + return (klass.gen_patches is not FileMatch.gen_patches + or klass.replacement is not FileMatch.replacement) + + def contains(self, other: 'FileMatch') -> bool: + return other.start() >= self.start() and other.end() <= self.end() + + def __repr__(self) -> str: + start = self.file.line_col(self.start()) + end = self.file.line_col(self.end() - 1) + return '<%s %s at %d:%d-%d:%d: %r>' % (self.__class__.__name__, + self.name, + start.line, start.col, + end.line, end.col, self.group(0)[:100]) + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + """Can be implemented by subclasses to keep track of identifier references + + This method will be used by the code that moves declarations around the file, + to make sure we find the right spot for them. + """ + raise NotImplementedError() + + def provided_identifiers(self) -> Iterable[RequiredIdentifier]: + """Can be implemented by subclasses to keep track of identifier references + + This method will be used by the code that moves declarations around the file, + to make sure we find the right spot for them. + """ + raise NotImplementedError() + + @classmethod + def finditer(klass, content: str, pos=0, endpos=-1) -> Iterable[Match]: + """Helper for re.finditer()""" + if endpos >= 0: + content = content[:endpos] + return klass.compiled_re().finditer(content, pos) + + @classmethod + def domatch(klass, content: str, pos=0, endpos=-1) -> Optional[Match]: + """Helper for re.match()""" + if endpos >= 0: + content = content[:endpos] + return klass.compiled_re().match(content, pos) + + def group_finditer(self, klass: Type['FileMatch'], group: Union[str, int]) -> Iterable['FileMatch']: + assert self.file.original_content + return (klass(self.file, m) + for m in klass.finditer(self.file.original_content, + self.match.start(group), + self.match.end(group))) + + def try_group_match(self, klass: Type['FileMatch'], group: Union[str, int]) -> Optional['FileMatch']: + assert self.file.original_content + m = klass.domatch(self.file.original_content, + self.match.start(group), + self.match.end(group)) + if not m: + return None + else: + return klass(self.file, m) + + def group_match(self, group: Union[str, int]) -> 'FileMatch': + m = self.try_group_match(FullMatch, group) + assert m + return m + + @property + def allfiles(self) -> 'FileList': + return self.file.allfiles + +class FullMatch(FileMatch): + """Regexp that will match all contents of string + Useful when used with group_match() + """ + regexp = r'(?s).*' # (?s) is re.DOTALL + +def all_subclasses(c: Type[FileMatch]) -> Iterable[Type[FileMatch]]: + for sc in c.__subclasses__(): + yield sc + yield from all_subclasses(sc) + +def match_class_dict() -> Dict[str, Type[FileMatch]]: + d = dict((t.__name__, t) for t in all_subclasses(FileMatch)) + return d + +def names(matches: Iterable[FileMatch]) -> Iterable[str]: + return [m.name for m in matches] + +class PatchingError(Exception): + pass + +class OverLappingPatchesError(PatchingError): + pass + +def apply_patches(s: str, patches: Iterable[Patch]) -> str: + """Apply a sequence of patches to string + + >>> apply_patches('abcdefg', [Patch(2,2,'xxx'), Patch(0, 1, 'yy')]) + 'yybxxxcdefg' + """ + r = StringIO() + last = 0 + def patch_sort_key(item: Tuple[int, Patch]) -> Tuple[int, int, int]: + """Patches are sorted by byte position, + patches at the same byte position are applied in the order + they were generated. + """ + i,p = item + return (p.start, p.end, i) + + for i,p in sorted(enumerate(patches), key=patch_sort_key): + DBG("Applying patch at position %d (%s) - %d (%s): %r", + p.start, line_col(s, p.start), + p.end, line_col(s, p.end), + p.replacement) + if last > p.start: + raise OverLappingPatchesError("Overlapping patch at position %d (%s), last patch at %d (%s)" % \ + (p.start, line_col(s, p.start), last, line_col(s, last))) + r.write(s[last:p.start]) + r.write(p.replacement) + last = p.end + r.write(s[last:]) + return r.getvalue() + +class RegexpScanner: + def __init__(self) -> None: + self.match_index: Dict[Type[Any], List[FileMatch]] = {} + self.match_name_index: Dict[Tuple[Type[Any], str, str], Optional[FileMatch]] = {} + + def _matches_of_type(self, klass: Type[Any]) -> Iterable[FileMatch]: + raise NotImplementedError() + + def matches_of_type(self, t: Type[T]) -> List[T]: + if t not in self.match_index: + self.match_index[t] = list(self._matches_of_type(t)) + return self.match_index[t] # type: ignore + + def find_matches(self, t: Type[T], name: str, group: str='name') -> List[T]: + indexkey = (t, name, group) + if indexkey in self.match_name_index: + return self.match_name_index[indexkey] # type: ignore + r: List[T] = [] + for m in self.matches_of_type(t): + assert isinstance(m, FileMatch) + if m.getgroup(group) == name: + r.append(m) # type: ignore + self.match_name_index[indexkey] = r # type: ignore + return r + + def find_match(self, t: Type[T], name: str, group: str='name') -> Optional[T]: + l = self.find_matches(t, name, group) + if not l: + return None + if len(l) > 1: + logger.warn("multiple matches found for %r (%s=%r)", t, group, name) + return None + return l[0] + + def reset_index(self) -> None: + self.match_index.clear() + self.match_name_index.clear() + +class FileInfo(RegexpScanner): + filename: Path + original_content: Optional[str] = None + + def __init__(self, files: 'FileList', filename: os.PathLike, force:bool=False) -> None: + super().__init__() + self.allfiles = files + self.filename = Path(filename) + self.patches: List[Patch] = [] + self.force = force + + def __repr__(self) -> str: + return f'<FileInfo {repr(self.filename)}>' + + def filename_matches(self, name: str) -> bool: + nameparts = Path(name).parts + return self.filename.parts[-len(nameparts):] == nameparts + + def line_col(self, start: int) -> LineAndColumn: + """Return line and column for a match object inside original_content""" + return line_col(self.original_content, start) + + def _matches_of_type(self, klass: Type[Any]) -> List[FileMatch]: + """Build FileMatch objects for each match of regexp""" + if not hasattr(klass, 'regexp') or klass.regexp is None: + return [] + assert hasattr(klass, 'regexp') + DBG("%s: scanning for %s", self.filename, klass.__name__) + DBG("regexp: %s", klass.regexp) + matches = [klass(self, m) for m in klass.finditer(self.original_content)] + DBG('%s: %d matches found for %s: %s', self.filename, len(matches), + klass.__name__,' '.join(names(matches))) + return matches + + def find_match(self, t: Type[T], name: str, group: str='name') -> Optional[T]: + for m in self.matches_of_type(t): + assert isinstance(m, FileMatch) + if m.getgroup(group) == name: + return m # type: ignore + return None + + def reset_content(self, s:str): + self.original_content = s + self.patches.clear() + self.reset_index() + self.allfiles.reset_index() + + def load(self) -> None: + if self.original_content is not None: + return + with open(self.filename, 'rt') as f: + self.reset_content(f.read()) + + @property + def all_matches(self) -> Iterable[FileMatch]: + lists = list(self.match_index.values()) + return (m for l in lists + for m in l) + + def gen_patches(self, matches: List[FileMatch]) -> None: + for m in matches: + DBG("Generating patches for %r", m) + for i,p in enumerate(m.gen_patches()): + DBG("patch %d generated by %r:", i, m) + DBG("replace contents at %s-%s with %r", + self.line_col(p.start), self.line_col(p.end), p.replacement) + self.patches.append(p) + + def scan_for_matches(self, class_names: Optional[List[str]]=None) -> Iterable[FileMatch]: + DBG("class names: %r", class_names) + class_dict = match_class_dict() + if class_names is None: + DBG("default class names") + class_names = list(name for name,klass in class_dict.items() + if klass.has_replacement_rule()) + DBG("class_names: %r", class_names) + for cn in class_names: + matches = self.matches_of_type(class_dict[cn]) + DBG('%d matches found for %s: %s', + len(matches), cn, ' '.join(names(matches))) + yield from matches + + def apply_patches(self) -> None: + """Replace self.original_content after applying patches from self.patches""" + self.reset_content(self.get_patched_content()) + + def get_patched_content(self) -> str: + assert self.original_content is not None + return apply_patches(self.original_content, self.patches) + + def write_to_file(self, f: IO[str]) -> None: + f.write(self.get_patched_content()) + + def write_to_filename(self, filename: os.PathLike) -> None: + with open(filename, 'wt') as of: + self.write_to_file(of) + + def patch_inplace(self) -> None: + newfile = self.filename.with_suffix('.changed') + self.write_to_filename(newfile) + os.rename(newfile, self.filename) + + def show_diff(self) -> None: + with NamedTemporaryFile('wt') as f: + self.write_to_file(f) + f.flush() + subprocess.call(['diff', '-u', self.filename, f.name]) + + def ref(self): + return TypeInfoReference + +class FileList(RegexpScanner): + def __init__(self): + super().__init__() + self.files: List[FileInfo] = [] + + def extend(self, *args, **kwargs): + self.files.extend(*args, **kwargs) + + def __iter__(self): + return iter(self.files) + + def _matches_of_type(self, klass: Type[Any]) -> Iterable[FileMatch]: + return chain(*(f._matches_of_type(klass) for f in self.files)) + + def find_file(self, name: str) -> Optional[FileInfo]: + """Get file with path ending with @name""" + for f in self.files: + if f.filename_matches(name): + return f + else: + return None + + def one_pass(self, class_names: List[str]) -> int: + total_patches = 0 + for f in self.files: + INFO("Scanning file %s", f.filename) + matches = list(f.scan_for_matches(class_names)) + INFO("Generating patches for file %s", f.filename) + f.gen_patches(matches) + total_patches += len(f.patches) + if total_patches: + for f in self.files: + try: + f.apply_patches() + except PatchingError: + logger.exception("%s: failed to patch file", f.filename) + return total_patches + + def patch_content(self, max_passes, class_names: List[str]) -> None: + """Multi-pass content patching loop + + We run multiple passes because there are rules that will + delete init functions once they become empty. + """ + passes = 0 + total_patches = 0 + DBG("max_passes: %r", max_passes) + while not max_passes or max_passes <= 0 or passes < max_passes: + passes += 1 + INFO("Running pass: %d", passes) + count = self.one_pass(class_names) + DBG("patch content: pass %d: %d patches generated", passes, count) + total_patches += count + DBG("%d patches applied total in %d passes", total_patches, passes) diff --git a/scripts/codeconverter/codeconverter/qom_macros.py b/scripts/codeconverter/codeconverter/qom_macros.py new file mode 100644 index 000000000..2d2f2055a --- /dev/null +++ b/scripts/codeconverter/codeconverter/qom_macros.py @@ -0,0 +1,861 @@ +# Copyright (C) 2020 Red Hat Inc. +# +# Authors: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +import re +from itertools import chain +from typing import * + +from .regexps import * +from .patching import * +from .utils import * + +import logging +logger = logging.getLogger(__name__) +DBG = logger.debug +INFO = logger.info +WARN = logger.warning + +# simple expressions: + +RE_CONSTANT = OR(RE_STRING, RE_NUMBER) + +class DefineDirective(FileMatch): + """Match any #define directive""" + regexp = S(r'^[ \t]*#[ \t]*define', CPP_SPACE, NAMED('name', RE_IDENTIFIER), r'\b') + +class ExpressionDefine(FileMatch): + """Simple #define preprocessor directive for an expression""" + regexp = S(r'^[ \t]*#[ \t]*define', CPP_SPACE, NAMED('name', RE_IDENTIFIER), + CPP_SPACE, NAMED('value', RE_EXPRESSION), r'[ \t]*\n') + + def provided_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('constant', self.group('name')) + +class ConstantDefine(ExpressionDefine): + """Simple #define preprocessor directive for a number or string constant""" + regexp = S(r'^[ \t]*#[ \t]*define', CPP_SPACE, NAMED('name', RE_IDENTIFIER), + CPP_SPACE, NAMED('value', RE_CONSTANT), r'[ \t]*\n') + + +class TypeIdentifiers(NamedTuple): + """Type names found in type declarations""" + # TYPE_MYDEVICE + typename: Optional[str] + # MYDEVICE + uppercase: Optional[str] = None + # MyDevice + instancetype: Optional[str] = None + # MyDeviceClass + classtype: Optional[str] = None + # my_device + lowercase: Optional[str] = None + + def allfields(self): + return tuple(getattr(self, f) for f in self._fields) + + def merge(self, other: 'TypeIdentifiers') -> Optional['TypeIdentifiers']: + """Check if identifiers match, return new identifier with complete list""" + if any(not opt_compare(a, b) for a,b in zip(self, other)): + return None + return TypeIdentifiers(*(merge(a, b) for a,b in zip(self, other))) + + def __str__(self) -> str: + values = ((f, getattr(self, f)) for f in self._fields) + s = ', '.join('%s=%s' % (f,v) for f,v in values if v is not None) + return f'{s}' + + def check_consistency(self) -> List[str]: + """Check if identifiers are consistent with each other, + return list of problems (or empty list if everything seems consistent) + """ + r = [] + if self.typename is None: + r.append("typename (TYPE_MYDEVICE) is unavailable") + + if self.uppercase is None: + r.append("uppercase name is unavailable") + + if (self.instancetype is not None + and self.classtype is not None + and self.classtype != f'{self.instancetype}Class'): + r.append("class typedef %s doesn't match instance typedef %s" % + (self.classtype, self.instancetype)) + + if (self.uppercase is not None + and self.typename is not None + and f'TYPE_{self.uppercase}' != self.typename): + r.append("uppercase name (%s) doesn't match type name (%s)" % + (self.uppercase, self.typename)) + + return r + +class TypedefMatch(FileMatch): + """typedef declaration""" + def provided_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('type', self.group('name')) + +class SimpleTypedefMatch(TypedefMatch): + """Simple typedef declaration + (no replacement rules)""" + regexp = S(r'^[ \t]*typedef', SP, + NAMED('typedef_type', RE_TYPE), SP, + NAMED('name', RE_IDENTIFIER), r'\s*;[ \t]*\n') + +RE_MACRO_DEFINE = S(r'^[ \t]*#\s*define\s+', NAMED('name', RE_IDENTIFIER), + r'\s*\(\s*', RE_IDENTIFIER, r'\s*\)', CPP_SPACE) + +RE_STRUCT_ATTRIBUTE = r'QEMU_PACKED' + +# This doesn't parse the struct definitions completely, it just assumes +# the closing brackets are going to be in an unindented line: +RE_FULL_STRUCT = S('struct', SP, M(RE_IDENTIFIER, n='?', name='structname'), SP, + NAMED('body', r'{\n', + # acceptable inside the struct body: + # - lines starting with space or tab + # - empty lines + # - preprocessor directives + # - comments + OR(r'[ \t][^\n]*\n', + r'#[^\n]*\n', + r'\n', + S(r'[ \t]*', RE_COMMENT, r'[ \t]*\n'), + repeat='*?'), + r'}', M(RE_STRUCT_ATTRIBUTE, SP, n='*'))) +RE_STRUCT_TYPEDEF = S(r'^[ \t]*typedef', SP, RE_FULL_STRUCT, SP, + NAMED('name', RE_IDENTIFIER), r'\s*;[ \t]*\n') + +class FullStructTypedefMatch(TypedefMatch): + """typedef struct [SomeStruct] { ...} SomeType + Will be replaced by separate struct declaration + typedef + """ + regexp = RE_STRUCT_TYPEDEF + + def make_structname(self) -> str: + """Make struct name for struct+typedef split""" + name = self.group('structname') + if not name: + name = self.name + return name + + def strip_typedef(self) -> Patch: + """generate patch that will strip typedef from the struct declartion + + The caller is responsible for readding the typedef somewhere else. + """ + name = self.make_structname() + body = self.group('body') + return self.make_patch(f'struct {name} {body};\n') + + def make_simple_typedef(self) -> str: + structname = self.make_structname() + name = self.name + return f'typedef struct {structname} {name};\n' + + def move_typedef(self, position) -> Iterator[Patch]: + """Generate patches to move typedef elsewhere""" + yield self.strip_typedef() + yield Patch(position, position, self.make_simple_typedef()) + + def split_typedef(self) -> Iterator[Patch]: + """Split into struct definition + typedef in-place""" + yield self.strip_typedef() + yield self.append(self.make_simple_typedef()) + +class StructTypedefSplit(FullStructTypedefMatch): + """split struct+typedef declaration""" + def gen_patches(self) -> Iterator[Patch]: + if self.group('structname'): + yield from self.split_typedef() + +class DuplicatedTypedefs(SimpleTypedefMatch): + """Delete ALL duplicate typedefs (unsafe)""" + def gen_patches(self) -> Iterable[Patch]: + other_td = [td for td in chain(self.file.matches_of_type(SimpleTypedefMatch), + self.file.matches_of_type(FullStructTypedefMatch)) + if td.name == self.name] + DBG("other_td: %r", other_td) + if any(td.start() < self.start() for td in other_td): + # patch only if handling the first typedef + return + for td in other_td: + if isinstance(td, SimpleTypedefMatch): + DBG("other td: %r", td.match.groupdict()) + if td.group('typedef_type') != self.group('typedef_type'): + yield td.make_removal_patch() + elif isinstance(td, FullStructTypedefMatch): + DBG("other td: %r", td.match.groupdict()) + if self.group('typedef_type') == 'struct '+td.group('structname'): + yield td.strip_typedef() + +class QOMDuplicatedTypedefs(DuplicatedTypedefs): + """Delete duplicate typedefs if used by QOM type""" + def gen_patches(self) -> Iterable[Patch]: + qom_macros = [TypeCheckMacro, DeclareInstanceChecker, DeclareClassCheckers, DeclareObjCheckers] + qom_matches = chain(*(self.file.matches_of_type(t) for t in qom_macros)) + in_use = any(RequiredIdentifier('type', self.name) in m.required_identifiers() + for m in qom_matches) + if in_use: + yield from DuplicatedTypedefs.gen_patches(self) + +class QOMStructTypedefSplit(FullStructTypedefMatch): + """split struct+typedef declaration if used by QOM type""" + def gen_patches(self) -> Iterator[Patch]: + qom_macros = [TypeCheckMacro, DeclareInstanceChecker, DeclareClassCheckers, DeclareObjCheckers] + qom_matches = chain(*(self.file.matches_of_type(t) for t in qom_macros)) + in_use = any(RequiredIdentifier('type', self.name) in m.required_identifiers() + for m in qom_matches) + if in_use: + yield from self.split_typedef() + +def typedefs(file: FileInfo) -> Iterable[TypedefMatch]: + return (cast(TypedefMatch, m) + for m in chain(file.matches_of_type(SimpleTypedefMatch), + file.matches_of_type(FullStructTypedefMatch))) + +def find_typedef(f: FileInfo, name: Optional[str]) -> Optional[TypedefMatch]: + if not name: + return None + for td in typedefs(f): + if td.name == name: + return td + return None + +CHECKER_MACROS = ['OBJECT_CHECK', 'OBJECT_CLASS_CHECK', 'OBJECT_GET_CLASS'] +CheckerMacroName = Literal['OBJECT_CHECK', 'OBJECT_CLASS_CHECK', 'OBJECT_GET_CLASS'] + +RE_CHECK_MACRO = \ + S(RE_MACRO_DEFINE, + OR(*CHECKER_MACROS, name='checker'), + M(r'\s*\(\s*', OR(NAMED('typedefname', RE_IDENTIFIER), RE_TYPE, name='c_type'), r'\s*,', CPP_SPACE, + OPTIONAL_PARS(RE_IDENTIFIER), r',', CPP_SPACE, + NAMED('qom_typename', RE_IDENTIFIER), r'\s*\)\n', + n='?', name='check_args')) + +EXPECTED_CHECKER_SUFFIXES: List[Tuple[CheckerMacroName, str]] = [ + ('OBJECT_GET_CLASS', '_GET_CLASS'), + ('OBJECT_CLASS_CHECK', '_CLASS'), +] + +class TypeCheckMacro(FileMatch): + """OBJECT_CHECK/OBJECT_CLASS_CHECK/OBJECT_GET_CLASS macro definitions + Will be replaced by DECLARE_*_CHECKERS macro + """ + regexp = RE_CHECK_MACRO + + @property + def checker(self) -> CheckerMacroName: + """Name of checker macro being used""" + return self.group('checker') # type: ignore + + @property + def typedefname(self) -> Optional[str]: + return self.group('typedefname') + + def find_typedef(self) -> Optional[TypedefMatch]: + return find_typedef(self.file, self.typedefname) + + def sanity_check(self) -> None: + DBG("groups: %r", self.match.groups()) + if not self.group('check_args'): + self.warn("type check macro not parsed completely: %s", self.name) + return + DBG("type identifiers: %r", self.type_identifiers) + if self.typedefname and self.find_typedef() is None: + self.warn("typedef used by %s not found", self.name) + + def find_matching_macros(self) -> List['TypeCheckMacro']: + """Find other check macros that generate the same macro names + + The returned list will always be sorted. + """ + my_ids = self.type_identifiers + assert my_ids + return [m for m in self.file.matches_of_type(TypeCheckMacro) + if m.type_identifiers is not None + and my_ids.uppercase is not None + and (my_ids.uppercase == m.type_identifiers.uppercase + or my_ids.typename == m.type_identifiers.typename)] + + def merge_ids(self, matches: List['TypeCheckMacro']) -> Optional[TypeIdentifiers]: + """Try to merge info about type identifiers from all matches in a list""" + if not matches: + return None + r = matches[0].type_identifiers + if r is None: + return None + for m in matches[1:]: + assert m.type_identifiers + new = r.merge(m.type_identifiers) + if new is None: + self.warn("macro %s identifiers (%s) don't match macro %s (%s)", + matches[0].name, r, m.name, m.type_identifiers) + return None + r = new + return r + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', '"qom/object.h"') + if self.type_identifiers is None: + return + # to make sure typedefs will be moved above all related macros, + # return dependencies from all of them, not just this match + for m in self.find_matching_macros(): + yield RequiredIdentifier('type', m.group('c_type')) + yield RequiredIdentifier('constant', m.group('qom_typename')) + + @property + def type_identifiers(self) -> Optional[TypeIdentifiers]: + """Extract type identifier information from match""" + typename = self.group('qom_typename') + c_type = self.group('c_type') + if not typename or not c_type: + return None + typedef = self.group('typedefname') + classtype = None + instancetype = None + uppercase = None + expected_suffix = dict(EXPECTED_CHECKER_SUFFIXES).get(self.checker) + + # here the available data depends on the checker macro being called: + # - we need to remove the suffix from the macro name + # - depending on the macro type, we know the class type name, or + # the instance type name + if self.checker in ('OBJECT_GET_CLASS', 'OBJECT_CLASS_CHECK'): + classtype = c_type + elif self.checker == 'OBJECT_CHECK': + instancetype = c_type + uppercase = self.name + else: + assert False + if expected_suffix and self.name.endswith(expected_suffix): + uppercase = self.name[:-len(expected_suffix)] + return TypeIdentifiers(typename=typename, classtype=classtype, + instancetype=instancetype, uppercase=uppercase) + + def gen_patches(self) -> Iterable[Patch]: + # the implementation is a bit tricky because we need to group + # macros dealing with the same type into a single declaration + if self.type_identifiers is None: + self.warn("couldn't extract type information from macro %s", self.name) + return + + if self.name == 'INTERFACE_CLASS': + # INTERFACE_CLASS is special and won't be patched + return + + for checker,suffix in EXPECTED_CHECKER_SUFFIXES: + if self.name.endswith(suffix): + if self.checker != checker: + self.warn("macro %s is using macro %s instead of %s", self.name, self.checker, checker) + return + break + + matches = self.find_matching_macros() + DBG("found %d matching macros: %s", len(matches), ' '.join(m.name for m in matches)) + # we will generate patches only when processing the first macro: + if matches[0].start != self.start: + DBG("skipping %s (will patch when handling %s)", self.name, matches[0].name) + return + + + ids = self.merge_ids(matches) + if ids is None: + DBG("type identifier mismatch, won't patch %s", self.name) + return + + if not ids.uppercase: + self.warn("macro %s doesn't follow the expected name pattern", self.name) + return + if not ids.typename: + self.warn("macro %s: couldn't extract type name", self.name) + return + + #issues = ids.check_consistency() + #if issues: + # for i in issues: + # self.warn("inconsistent identifiers: %s", i) + + names = [n for n in (ids.instancetype, ids.classtype, ids.uppercase, ids.typename) + if n is not None] + if len(set(names)) != len(names): + self.warn("duplicate names used by macro: %r", ids) + return + + assert ids.classtype or ids.instancetype + assert ids.typename + assert ids.uppercase + if ids.classtype and ids.instancetype: + new_decl = (f'DECLARE_OBJ_CHECKERS({ids.instancetype}, {ids.classtype},\n' + f' {ids.uppercase}, {ids.typename})\n') + elif ids.classtype: + new_decl = (f'DECLARE_CLASS_CHECKERS({ids.classtype}, {ids.uppercase},\n' + f' {ids.typename})\n') + elif ids.instancetype: + new_decl = (f'DECLARE_INSTANCE_CHECKER({ids.instancetype}, {ids.uppercase},\n' + f' {ids.typename})\n') + else: + assert False + + # we need to ensure the typedefs are already available + issues = [] + for t in [ids.instancetype, ids.classtype]: + if not t: + continue + if re.fullmatch(RE_STRUCT_TYPE, t): + self.info("type %s is not a typedef", t) + continue + td = find_typedef(self.file, t) + #if not td and self.allfiles.find_file('include/qemu/typedefs.h'): + # + if not td: + # it is OK if the typedef is in typedefs.h + f = self.allfiles.find_file('include/qemu/typedefs.h') + if f and find_typedef(f, t): + self.info("typedef %s found in typedefs.h", t) + continue + + issues.append("couldn't find typedef %s" % (t)) + elif td.start() > self.start(): + issues.append("typedef %s need to be moved earlier in the file" % (td.name)) + + for issue in issues: + self.warn(issue) + + if issues and not self.file.force: + return + + # delete all matching macros and add new declaration: + for m in matches: + yield m.make_patch('') + for issue in issues: + yield self.prepend("/* FIXME: %s */\n" % (issue)) + yield self.append(new_decl) + +class InterfaceCheckMacro(FileMatch): + """Type checking macro using INTERFACE_CHECK + Will be replaced by DECLARE_INTERFACE_CHECKER + """ + regexp = S(RE_MACRO_DEFINE, + 'INTERFACE_CHECK', + r'\s*\(\s*', OR(NAMED('instancetype', RE_IDENTIFIER), RE_TYPE, name='c_type'), + r'\s*,', CPP_SPACE, + OPTIONAL_PARS(RE_IDENTIFIER), r',', CPP_SPACE, + NAMED('qom_typename', RE_IDENTIFIER), r'\s*\)\n') + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', '"qom/object.h"') + yield RequiredIdentifier('type', self.group('instancetype')) + yield RequiredIdentifier('constant', self.group('qom_typename')) + + def gen_patches(self) -> Iterable[Patch]: + if self.file.filename_matches('qom/object.h'): + self.debug("skipping object.h") + return + + typename = self.group('qom_typename') + uppercase = self.name + instancetype = self.group('instancetype') + c = f"DECLARE_INTERFACE_CHECKER({instancetype}, {uppercase},\n"+\ + f" {typename})\n" + yield self.make_patch(c) + + +class TypeDeclaration(FileMatch): + """Parent class to all type declarations""" + @property + def instancetype(self) -> Optional[str]: + return self.getgroup('instancetype') + + @property + def classtype(self) -> Optional[str]: + return self.getgroup('classtype') + + @property + def typename(self) -> Optional[str]: + return self.getgroup('typename') + +class TypeCheckerDeclaration(TypeDeclaration): + """Parent class to all type checker declarations""" + @property + def typename(self) -> str: + return self.group('typename') + + @property + def uppercase(self) -> str: + return self.group('uppercase') + +class DeclareInstanceChecker(TypeCheckerDeclaration): + """DECLARE_INSTANCE_CHECKER use""" + #TODO: replace lonely DECLARE_INSTANCE_CHECKER with DECLARE_OBJ_CHECKERS + # if all types are found. + # This will require looking up the correct class type in the TypeInfo + # structs in another file + regexp = S(r'^[ \t]*DECLARE_INSTANCE_CHECKER\s*\(\s*', + NAMED('instancetype', RE_TYPE), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), SP, + r'\)[ \t]*;?[ \t]*\n') + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', '"qom/object.h"') + yield RequiredIdentifier('constant', self.group('typename')) + yield RequiredIdentifier('type', self.group('instancetype')) + +class DeclareInterfaceChecker(TypeCheckerDeclaration): + """DECLARE_INTERFACE_CHECKER use""" + regexp = S(r'^[ \t]*DECLARE_INTERFACE_CHECKER\s*\(\s*', + NAMED('instancetype', RE_TYPE), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), SP, + r'\)[ \t]*;?[ \t]*\n') + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', '"qom/object.h"') + yield RequiredIdentifier('constant', self.group('typename')) + yield RequiredIdentifier('type', self.group('instancetype')) + +class DeclareInstanceType(TypeDeclaration): + """DECLARE_INSTANCE_TYPE use""" + regexp = S(r'^[ \t]*DECLARE_INSTANCE_TYPE\s*\(\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + NAMED('instancetype', RE_TYPE), SP, + r'\)[ \t]*;?[ \t]*\n') + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', '"qom/object.h"') + yield RequiredIdentifier('type', self.group('instancetype')) + +class DeclareClassType(TypeDeclaration): + """DECLARE_CLASS_TYPE use""" + regexp = S(r'^[ \t]*DECLARE_CLASS_TYPE\s*\(\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + NAMED('classtype', RE_TYPE), SP, + r'\)[ \t]*;?[ \t]*\n') + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', '"qom/object.h"') + yield RequiredIdentifier('type', self.group('classtype')) + + + +class DeclareClassCheckers(TypeCheckerDeclaration): + """DECLARE_CLASS_CHECKER use""" + regexp = S(r'^[ \t]*DECLARE_CLASS_CHECKERS\s*\(\s*', + NAMED('classtype', RE_TYPE), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), SP, + r'\)[ \t]*;?[ \t]*\n') + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', '"qom/object.h"') + yield RequiredIdentifier('constant', self.group('typename')) + yield RequiredIdentifier('type', self.group('classtype')) + +class DeclareObjCheckers(TypeCheckerDeclaration): + """DECLARE_OBJ_CHECKERS use""" + #TODO: detect when OBJECT_DECLARE_SIMPLE_TYPE can be used + regexp = S(r'^[ \t]*DECLARE_OBJ_CHECKERS\s*\(\s*', + NAMED('instancetype', RE_TYPE), r'\s*,\s*', + NAMED('classtype', RE_TYPE), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), SP, + r'\)[ \t]*;?[ \t]*\n') + + def required_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', '"qom/object.h"') + yield RequiredIdentifier('constant', self.group('typename')) + yield RequiredIdentifier('type', self.group('classtype')) + yield RequiredIdentifier('type', self.group('instancetype')) + +class TypeDeclarationFixup(FileMatch): + """Common base class for code that will look at a set of type declarations""" + regexp = RE_FILE_BEGIN + def gen_patches(self) -> Iterable[Patch]: + if self.file.filename_matches('qom/object.h'): + self.debug("skipping object.h") + return + + # group checkers by uppercase name: + decl_types: List[Type[TypeDeclaration]] = [DeclareInstanceChecker, DeclareInstanceType, + DeclareClassCheckers, DeclareClassType, + DeclareObjCheckers] + checker_dict: Dict[str, List[TypeDeclaration]] = {} + for t in decl_types: + for m in self.file.matches_of_type(t): + checker_dict.setdefault(m.group('uppercase'), []).append(m) + self.debug("checker_dict: %r", checker_dict) + for uppercase,checkers in checker_dict.items(): + fields = ('instancetype', 'classtype', 'uppercase', 'typename') + fvalues = dict((field, set(getattr(m, field) for m in checkers + if getattr(m, field, None) is not None)) + for field in fields) + for field,values in fvalues.items(): + if len(values) > 1: + for c in checkers: + c.warn("%s mismatch (%s)", field, ' '.join(values)) + return + + field_dict = dict((f, v.pop() if v else None) for f,v in fvalues.items()) + yield from self.gen_patches_for_type(uppercase, checkers, field_dict) + + def find_conflicts(self, uppercase: str, checkers: List[TypeDeclaration]) -> bool: + """Look for conflicting declarations that would make it unsafe to add new ones""" + conflicting: List[FileMatch] = [] + # conflicts in the same file: + conflicting.extend(chain(self.file.find_matches(DefineDirective, uppercase), + self.file.find_matches(DeclareInterfaceChecker, uppercase, 'uppercase'), + self.file.find_matches(DeclareClassType, uppercase, 'uppercase'), + self.file.find_matches(DeclareInstanceType, uppercase, 'uppercase'))) + + # conflicts in another file: + conflicting.extend(o for o in chain(self.allfiles.find_matches(DeclareInstanceChecker, uppercase, 'uppercase'), + self.allfiles.find_matches(DeclareClassCheckers, uppercase, 'uppercase'), + self.allfiles.find_matches(DeclareInterfaceChecker, uppercase, 'uppercase'), + self.allfiles.find_matches(DefineDirective, uppercase)) + if o is not None and o.file != self.file + # if both are .c files, there's no conflict at all: + and not (o.file.filename.suffix == '.c' and + self.file.filename.suffix == '.c')) + + if conflicting: + for c in checkers: + c.warn("skipping due to conflicting %s macro", uppercase) + for o in conflicting: + if o is None: + continue + o.warn("conflicting %s macro is here", uppercase) + return True + + return False + + def gen_patches_for_type(self, uppercase: str, + checkers: List[TypeDeclaration], + fields: Dict[str, Optional[str]]) -> Iterable[Patch]: + """Should be reimplemented by subclasses""" + return + yield + +class DeclareVoidTypes(TypeDeclarationFixup): + """Add DECLARE_*_TYPE(..., void) when there's no declared type""" + regexp = RE_FILE_BEGIN + def gen_patches_for_type(self, uppercase: str, + checkers: List[TypeDeclaration], + fields: Dict[str, Optional[str]]) -> Iterable[Patch]: + if self.find_conflicts(uppercase, checkers): + return + + #_,last_checker = max((m.start(), m) for m in checkers) + _,first_checker = min((m.start(), m) for m in checkers) + + if not any(m.instancetype for m in checkers): + yield first_checker.prepend(f'DECLARE_INSTANCE_TYPE({uppercase}, void)\n') + if not any(m.classtype for m in checkers): + yield first_checker.prepend(f'DECLARE_CLASS_TYPE({uppercase}, void)\n') + + #if not all(len(v) == 1 for v in fvalues.values()): + # return + # + #final_values = dict((field, values.pop()) + # for field,values in fvalues.items()) + #s = (f"DECLARE_OBJ_CHECKERS({final_values['instancetype']}, {final_values['classtype']},\n"+ + # f" {final_values['uppercase']}, {final_values['typename']})\n") + #for c in checkers: + # yield c.make_removal_patch() + #yield last_checker.append(s) + + +class AddDeclareTypeName(TypeDeclarationFixup): + """Add DECLARE_TYPE_NAME declarations if necessary""" + def gen_patches_for_type(self, uppercase: str, + checkers: List[TypeDeclaration], + fields: Dict[str, Optional[str]]) -> Iterable[Patch]: + typename = fields.get('typename') + if typename is None: + self.warn("typename unavailable") + return + if typename == f'TYPE_{uppercase}': + self.info("already using TYPE_%s as type name", uppercase) + return + if self.file.find_match(DeclareTypeName, uppercase, 'uppercase'): + self.info("type name for %s already declared", uppercase) + return + _,first_checker = min((m.start(), m) for m in checkers) + s = f'DECLARE_TYPE_NAME({uppercase}, {typename})\n' + yield first_checker.prepend(s) + +class TrivialClassStruct(FileMatch): + """Trivial class struct""" + regexp = S(r'^[ \t]*struct\s*', NAMED('name', RE_IDENTIFIER), + r'\s*{\s*', NAMED('parent_struct', RE_IDENTIFIER), r'\s*parent(_class)?\s*;\s*};\n') + +class DeclareTypeName(FileMatch): + """DECLARE_TYPE_NAME usage""" + regexp = S(r'^[ \t]*DECLARE_TYPE_NAME\s*\(', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + OR(RE_IDENTIFIER, RE_STRING, RE_MACRO_CONCAT, RE_FUN_CALL, name='typename'), + r'\s*\);?[ \t]*\n') + +class ObjectDeclareType(TypeCheckerDeclaration): + """OBJECT_DECLARE_TYPE usage + Will be replaced with OBJECT_DECLARE_SIMPLE_TYPE if possible + """ + regexp = S(r'^[ \t]*OBJECT_DECLARE_TYPE\s*\(', + NAMED('instancetype', RE_TYPE), r'\s*,\s*', + NAMED('classtype', RE_TYPE), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), SP, + r'\)[ \t]*;?[ \t]*\n') + + def gen_patches(self): + DBG("groups: %r", self.match.groupdict()) + trivial_struct = self.file.find_match(TrivialClassStruct, self.group('classtype')) + if trivial_struct: + d = self.match.groupdict().copy() + d['parent_struct'] = trivial_struct.group("parent_struct") + yield trivial_struct.make_removal_patch() + c = ("OBJECT_DECLARE_SIMPLE_TYPE(%(instancetype)s, %(lowercase)s,\n" + " %(uppercase)s, %(parent_struct)s)\n" % d) + yield self.make_patch(c) + +class ObjectDeclareSimpleType(TypeCheckerDeclaration): + """OBJECT_DECLARE_SIMPLE_TYPE usage""" + regexp = S(r'^[ \t]*OBJECT_DECLARE_SIMPLE_TYPE\s*\(', + NAMED('instancetype', RE_TYPE), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), SP, + r'\)[ \t]*;?[ \t]*\n') + +class OldStyleObjectDeclareSimpleType(TypeCheckerDeclaration): + """OBJECT_DECLARE_SIMPLE_TYPE usage (old API)""" + regexp = S(r'^[ \t]*OBJECT_DECLARE_SIMPLE_TYPE\s*\(', + NAMED('instancetype', RE_TYPE), r'\s*,\s*', + NAMED('lowercase', RE_IDENTIFIER), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + NAMED('parent_classtype', RE_TYPE), SP, + r'\)[ \t]*;?[ \t]*\n') + + @property + def classtype(self) -> Optional[str]: + instancetype = self.instancetype + assert instancetype + return f"{instancetype}Class" + +def find_typename_uppercase(files: FileList, typename: str) -> Optional[str]: + """Try to find what's the right MODULE_OBJ_NAME for a given type name""" + decl = files.find_match(DeclareTypeName, name=typename, group='typename') + if decl: + return decl.group('uppercase') + if typename.startswith('TYPE_'): + return typename[len('TYPE_'):] + return None + +def find_type_checkers(files:FileList, name:str, group:str='uppercase') -> Iterable[TypeCheckerDeclaration]: + """Find usage of DECLARE*CHECKER macro""" + c: Type[TypeCheckerDeclaration] + for c in (DeclareInstanceChecker, DeclareClassCheckers, DeclareObjCheckers, ObjectDeclareType, ObjectDeclareSimpleType): + yield from files.find_matches(c, name=name, group=group) + +class Include(FileMatch): + """#include directive""" + regexp = RE_INCLUDE + def provided_identifiers(self) -> Iterable[RequiredIdentifier]: + yield RequiredIdentifier('include', self.group('includepath')) + +class InitialIncludes(FileMatch): + """Initial #include block""" + regexp = S(RE_FILE_BEGIN, + M(SP, RE_COMMENTS, + r'^[ \t]*#[ \t]*ifndef[ \t]+', RE_IDENTIFIER, r'[ \t]*\n', + n='?', name='ifndef_block'), + M(SP, RE_COMMENTS, + OR(RE_INCLUDE, RE_SIMPLEDEFINE), + n='*', name='includes')) + +class SymbolUserList(NamedTuple): + definitions: List[FileMatch] + users: List[FileMatch] + +class MoveSymbols(FileMatch): + """Handle missing symbols + - Move typedefs and defines when necessary + - Add missing #include lines when necessary + """ + regexp = RE_FILE_BEGIN + + def gen_patches(self) -> Iterator[Patch]: + if self.file.filename_matches('qom/object.h'): + self.debug("skipping object.h") + return + + index: Dict[RequiredIdentifier, SymbolUserList] = {} + definition_classes = [SimpleTypedefMatch, FullStructTypedefMatch, ConstantDefine, Include] + user_classes = [TypeCheckMacro, DeclareObjCheckers, DeclareInstanceChecker, DeclareClassCheckers, InterfaceCheckMacro] + + # first we scan for all symbol definitions and usage: + for dc in definition_classes: + defs = self.file.matches_of_type(dc) + for d in defs: + DBG("scanning %r", d) + for i in d.provided_identifiers(): + index.setdefault(i, SymbolUserList([], [])).definitions.append(d) + DBG("index: %r", list(index.keys())) + for uc in user_classes: + users = self.file.matches_of_type(uc) + for u in users: + for i in u.required_identifiers(): + index.setdefault(i, SymbolUserList([], [])).users.append(u) + + # validate all symbols: + for i,ul in index.items(): + if not ul.users: + # unused symbol + continue + + # symbol not defined + if len(ul.definitions) == 0: + if i.type == 'include': + includes, = self.file.matches_of_type(InitialIncludes) + #FIXME: don't do this if we're already inside qom/object.h + yield includes.append(f'#include {i.name}\n') + else: + u.warn("definition of %s %s not found in file", i.type, i.name) + continue + + # symbol defined twice: + if len(ul.definitions) > 1: + ul.definitions[1].warn("%s defined twice", i.name) + ul.definitions[0].warn("previously defined here") + continue + + # symbol defined. check if all users are after its definition: + assert len(ul.definitions) == 1 + definition = ul.definitions[0] + DBG("handling repositioning of %r", definition) + earliest = min(ul.users, key=lambda u: u.start()) + if earliest.start() > definition.start(): + DBG("%r is OK", definition) + continue + + DBG("%r needs to be moved", definition) + if isinstance(definition, SimpleTypedefMatch) \ + or isinstance(definition, ConstantDefine): + # simple typedef or define can be moved directly: + yield definition.make_removal_patch() + yield earliest.prepend(definition.group(0)) + elif isinstance(definition, FullStructTypedefMatch) \ + and definition.group('structname'): + # full struct typedef is more complex: we need to remove + # the typedef + yield from definition.move_typedef(earliest.start()) + else: + definition.warn("definition of %s %s needs to be moved earlier in the file", i.type, i.name) + earliest.warn("definition of %s %s is used here", i.type, i.name) + + +class EmptyPreprocessorConditional(FileMatch): + """Delete empty preprocessor conditionals""" + regexp = r'^[ \t]*#(if|ifdef)[ \t].*\n+[ \t]*#endif[ \t]*\n' + def gen_patches(self) -> Iterable[Patch]: + yield self.make_removal_patch() diff --git a/scripts/codeconverter/codeconverter/qom_type_info.py b/scripts/codeconverter/codeconverter/qom_type_info.py new file mode 100644 index 000000000..255cb5992 --- /dev/null +++ b/scripts/codeconverter/codeconverter/qom_type_info.py @@ -0,0 +1,969 @@ +# Copyright (C) 2020 Red Hat Inc. +# +# Authors: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +import re +from .regexps import * +from .patching import * +from .utils import * +from .qom_macros import * + +TI_FIELDS = [ 'name', 'parent', 'abstract', 'interfaces', + 'instance_size', 'instance_init', 'instance_post_init', 'instance_finalize', + 'class_size', 'class_init', 'class_base_init', 'class_data'] + +RE_TI_FIELD_NAME = OR(*TI_FIELDS) + +RE_TI_FIELD_INIT = S(r'[ \t]*', NAMED('comments', RE_COMMENTS), + r'\.', NAMED('field', RE_TI_FIELD_NAME), r'\s*=\s*', + NAMED('value', RE_EXPRESSION), r'[ \t]*,?[ \t]*\n') +RE_TI_FIELDS = M(RE_TI_FIELD_INIT) + +RE_TYPEINFO_START = S(r'^[ \t]*', M(r'(static|const)\s+', name='modifiers'), r'TypeInfo\s+', + NAMED('name', RE_IDENTIFIER), r'\s*=\s*{[ \t]*\n') + +ParsedArray = List[str] +ParsedInitializerValue = Union[str, ParsedArray] +class InitializerValue(NamedTuple): + raw: str + parsed: Optional[ParsedInitializerValue] + match: Optional[Match] + +class ArrayItem(FileMatch): + regexp = RE_ARRAY_ITEM + +class ArrayInitializer(FileMatch): + regexp = RE_ARRAY + + def parsed(self) -> ParsedArray: + #DBG('parse_array: %r', m.group(0)) + return [m.group('arrayitem') for m in self.group_finditer(ArrayItem, 'arrayitems')] + +class FieldInitializer(FileMatch): + regexp = RE_TI_FIELD_INIT + + @property + def raw(self) -> str: + return self.group('value') + + @property + def parsed(self) -> ParsedInitializerValue: + parsed: ParsedInitializerValue = self.raw + #DBG("parse_initializer_value: %r", s) + array = self.try_group_match(ArrayInitializer, 'value') + if array: + assert isinstance(array, ArrayInitializer) + return array.parsed() + return parsed + +TypeInfoInitializers = Dict[str, FieldInitializer] + +class TypeDefinition(FileMatch): + """ + Common base class for type definitions (TypeInfo variables or OBJECT_DEFINE* macros) + """ + @property + def instancetype(self) -> Optional[str]: + return self.group('instancetype') + + @property + def classtype(self) -> Optional[str]: + return self.group('classtype') + + @property + def uppercase(self) -> Optional[str]: + return self.group('uppercase') + + @property + def parent_uppercase(self) -> str: + return self.group('parent_uppercase') + + @property + def initializers(self) -> Optional[TypeInfoInitializers]: + if getattr(self, '_inititalizers', None): + self._initializers: TypeInfoInitializers + return self._initializers + fields = self.group('fields') + if fields is None: + return None + d = dict((fm.group('field'), fm) + for fm in self.group_finditer(FieldInitializer, 'fields')) + self._initializers = d # type: ignore + return self._initializers + + +class TypeInfoVar(TypeDefinition): + """TypeInfo variable declaration with initializer""" + regexp = S(NAMED('begin', RE_TYPEINFO_START), + M(NAMED('fields', RE_TI_FIELDS), + NAMED('endcomments', SP, RE_COMMENTS), + NAMED('end', r'};?\n'), + n='?', name='fullspec')) + + def is_static(self) -> bool: + return 'static' in self.group('modifiers') + + def is_const(self) -> bool: + return 'const' in self.group('modifiers') + + def is_full(self) -> bool: + return bool(self.group('fullspec')) + + def get_initializers(self) -> TypeInfoInitializers: + """Helper for code that needs to deal with missing initializer info""" + if self.initializers is None: + return {} + return self.initializers + + def get_raw_initializer_value(self, field: str, default: str = '') -> str: + initializers = self.get_initializers() + if field in initializers: + return initializers[field].raw + else: + return default + + @property + def typename(self) -> Optional[str]: + return self.get_raw_initializer_value('name') + + @property + def uppercase(self) -> Optional[str]: + typename = self.typename + if not typename: + return None + if not typename.startswith('TYPE_'): + return None + return typename[len('TYPE_'):] + + @property + def classtype(self) -> Optional[str]: + class_size = self.get_raw_initializer_value('class_size') + if not class_size: + return None + m = re.fullmatch(RE_SIZEOF, class_size) + if not m: + return None + return m.group('sizeoftype') + + @property + def instancetype(self) -> Optional[str]: + instance_size = self.get_raw_initializer_value('instance_size') + if not instance_size: + return None + m = re.fullmatch(RE_SIZEOF, instance_size) + if not m: + return None + return m.group('sizeoftype') + + + #def extract_identifiers(self) -> Optional[TypeIdentifiers]: + # """Try to extract identifiers from names being used""" + # DBG("extracting idenfiers from %s", self.name) + #uppercase = None + #if typename and re.fullmatch(RE_IDENTIFIER, typename) and typename.startswith("TYPE_"): + # uppercase = typename[len('TYPE_'):] + #lowercase = None + #funcs = set() + #prefixes = set() + #for field,suffix in [('instance_init', '_init'), + # ('instance_finalize', '_finalize'), + # ('class_init', '_class_init')]: + # if field not in values: + # continue + # func = values[field].raw + # funcs.add(func) + # if func.endswith(suffix): + # prefixes.add(func[:-len(suffix)]) + # else: + # self.warn("function name %s doesn't have expected %s suffix", + # func, suffix) + #if len(prefixes) == 1: + # lowercase = prefixes.pop() + #elif len(prefixes) > 1: + # self.warn("inconsistent function names: %s", ' '.join(funcs)) + + #.parent = TYPE_##PARENT_MODULE_OBJ_NAME, \ + #return TypeIdentifiers(typename=typename, + # uppercase=uppercase, lowercase=lowercase, + # instancetype=instancetype, classtype=classtype) + + def append_field(self, field: str, value: str) -> Patch: + """Generate patch appending a field initializer""" + content = f' .{field} = {value},\n' + fm = self.group_match('fields') + assert fm + return fm.append(content) + + def patch_field(self, field: str, replacement: str) -> Patch: + """Generate patch replacing a field initializer""" + initializers = self.initializers + assert initializers + value = initializers.get(field) + assert value + return value.make_patch(replacement) + + def remove_field(self, field: str) -> Iterable[Patch]: + initializers = self.initializers + assert initializers + if field in initializers: + yield self.patch_field(field, '') + + def remove_fields(self, *fields: str) -> Iterable[Patch]: + for f in fields: + yield from self.remove_field(f) + + def patch_field_value(self, field: str, replacement: str) -> Patch: + """Replace just the value of a field initializer""" + initializers = self.initializers + assert initializers + value = initializers.get(field) + assert value + vm = value.group_match('value') + assert vm + return vm.make_patch(replacement) + + +class RemoveRedundantClassSize(TypeInfoVar): + """Remove class_size when using OBJECT_DECLARE_SIMPLE_TYPE""" + def gen_patches(self) -> Iterable[Patch]: + initializers = self.initializers + if initializers is None: + return + if 'class_size' not in initializers: + return + + self.debug("Handling %s", self.name) + m = re.fullmatch(RE_SIZEOF, initializers['class_size'].raw) + if not m: + self.warn("%s class_size is not sizeof?", self.name) + return + classtype = m.group('sizeoftype') + if not classtype.endswith('Class'): + self.warn("%s class size type (%s) is not *Class?", self.name, classtype) + return + self.debug("classtype is %s", classtype) + instancetype = classtype[:-len('Class')] + self.debug("intanceypte is %s", instancetype) + self.debug("searching for simpletype declaration using %s as InstanceType", instancetype) + decl = self.allfiles.find_match(OldStyleObjectDeclareSimpleType, + instancetype, 'instancetype') + if not decl: + self.debug("No simpletype declaration found for %s", instancetype) + return + self.debug("Found simple type declaration") + decl.debug("declaration is here") + yield from self.remove_field('class_size') + +class RemoveDeclareSimpleTypeArg(OldStyleObjectDeclareSimpleType): + """Remove class_size when using OBJECT_DECLARE_SIMPLE_TYPE""" + def gen_patches(self) -> Iterable[Patch]: + c = (f'OBJECT_DECLARE_SIMPLE_TYPE({self.group("instancetype")}, {self.group("lowercase")},\n' + f' {self.group("uppercase")})\n') + yield self.make_patch(c) + +class UseDeclareTypeExtended(TypeInfoVar): + """Replace TypeInfo variable with OBJECT_DEFINE_TYPE_EXTENDED""" + def gen_patches(self) -> Iterable[Patch]: + # this will just ensure the caches for find_match() and matches_for_type() + # will be loaded in advance: + find_type_checkers(self.allfiles, 'xxxxxxxxxxxxxxxxx') + + if not self.is_static(): + self.info("Skipping non-static TypeInfo variable") + return + + type_info_macro = self.file.find_match(TypeInfoMacro, self.name) + if not type_info_macro: + self.warn("TYPE_INFO(%s) line not found", self.name) + return + + values = self.initializers + if values is None: + return + if 'name' not in values: + self.warn("name not set in TypeInfo variable %s", self.name) + return + + typename = values['name'].raw + + if 'parent' not in values: + self.warn("parent not set in TypeInfo variable %s", self.name) + return + parent_typename = values['parent'].raw + + instancetype = None + if 'instance_size' in values: + m = re.fullmatch(RE_SIZEOF, values['instance_size'].raw) + if m: + instancetype = m.group('sizeoftype') + else: + self.warn("can't extract instance type in TypeInfo variable %s", self.name) + self.warn("instance_size is set to: %r", values['instance_size'].raw) + return + + classtype = None + if 'class_size' in values: + m = re.fullmatch(RE_SIZEOF, values['class_size'].raw) + if m: + classtype = m.group('sizeoftype') + else: + self.warn("can't extract class type in TypeInfo variable %s", self.name) + self.warn("class_size is set to: %r", values['class_size'].raw) + return + + #for t in (typename, parent_typename): + # if not re.fullmatch(RE_IDENTIFIER, t): + # self.info("type name is not a macro/constant") + # if instancetype or classtype: + # self.warn("macro/constant type name is required for instance/class type") + # if not self.file.force: + # return + + # Now, the challenge is to find out the right MODULE_OBJ_NAME for the + # type and for the parent type + self.info("TypeInfo variable for %s is here", typename) + uppercase = find_typename_uppercase(self.allfiles, typename) + if not uppercase: + self.info("Can't find right uppercase name for %s", typename) + if instancetype or classtype: + self.warn("Can't find right uppercase name for %s", typename) + self.warn("This will make type validation difficult in the future") + return + + parent_uppercase = find_typename_uppercase(self.allfiles, parent_typename) + if not parent_uppercase: + self.info("Can't find right uppercase name for parent type (%s)", parent_typename) + if instancetype or classtype: + self.warn("Can't find right uppercase name for parent type (%s)", parent_typename) + self.warn("This will make type validation difficult in the future") + return + + ok = True + + #checkers: List[TypeCheckerDeclaration] = list(find_type_checkers(self.allfiles, uppercase)) + #for c in checkers: + # c.info("instance type checker declaration (%s) is here", c.group('uppercase')) + #if not checkers: + # self.info("No type checkers declared for %s", uppercase) + # if instancetype or classtype: + # self.warn("Can't find where type checkers for %s (%s) are declared. We will need them to validate sizes of %s", + # typename, uppercase, self.name) + + if not instancetype: + instancetype = 'void' + if not classtype: + classtype = 'void' + + #checker_instancetypes = set(c.instancetype for c in checkers + # if c.instancetype is not None) + #if len(checker_instancetypes) > 1: + # self.warn("ambiguous set of type checkers") + # for c in checkers: + # c.warn("instancetype is %s here", c.instancetype) + # ok = False + #elif len(checker_instancetypes) == 1: + # checker_instancetype = checker_instancetypes.pop() + # DBG("checker instance type: %r", checker_instancetype) + # if instancetype != checker_instancetype: + # self.warn("type at instance_size is %r. Should instance_size be set to sizeof(%s) ?", + # instancetype, checker_instancetype) + # ok = False + #else: + # if instancetype != 'void': + # self.warn("instance type checker for %s (%s) not found", typename, instancetype) + # ok = False + + #checker_classtypes = set(c.classtype for c in checkers + # if c.classtype is not None) + #if len(checker_classtypes) > 1: + # self.warn("ambiguous set of type checkers") + # for c in checkers: + # c.warn("classtype is %s here", c.classtype) + # ok = False + #elif len(checker_classtypes) == 1: + # checker_classtype = checker_classtypes.pop() + # DBG("checker class type: %r", checker_classtype) + # if classtype != checker_classtype: + # self.warn("type at class_size is %r. Should class_size be set to sizeof(%s) ?", + # classtype, checker_classtype) + # ok = False + #else: + # if classtype != 'void': + # self.warn("class type checker for %s (%s) not found", typename, classtype) + # ok = False + + #if not ok: + # for c in checkers: + # c.warn("Type checker declaration for %s (%s) is here", + # typename, type(c).__name__) + # return + + #if parent_decl is None: + # self.warn("Can't find where parent type %s is declared", parent_typename) + + #yield self.prepend(f'DECLARE_TYPE_NAME({uppercase}, {typename})\n') + #if not instancetype: + # yield self.prepend(f'DECLARE_INSTANCE_TYPE({uppercase}, void)\n') + #if not classtype: + # yield self.prepend(f'DECLARE_CLASS_TYPE({uppercase}, void)\n') + self.info("%s can be patched!", self.name) + replaced_fields = ['name', 'parent', 'instance_size', 'class_size'] + begin = self.group_match('begin') + newbegin = f'OBJECT_DEFINE_TYPE_EXTENDED({self.name},\n' + newbegin += f' {instancetype}, {classtype},\n' + newbegin += f' {uppercase}, {parent_uppercase}' + if set(values.keys()) - set(replaced_fields): + newbegin += ',\n' + yield begin.make_patch(newbegin) + yield from self.remove_fields(*replaced_fields) + end = self.group_match('end') + yield end.make_patch(')\n') + yield type_info_macro.make_removal_patch() + +class ObjectDefineTypeExtended(TypeDefinition): + """OBJECT_DEFINE_TYPE_EXTENDED usage""" + regexp = S(r'^[ \t]*OBJECT_DEFINE_TYPE_EXTENDED\s*\(\s*', + NAMED('name', RE_IDENTIFIER), r'\s*,\s*', + NAMED('instancetype', RE_IDENTIFIER), r'\s*,\s*', + NAMED('classtype', RE_IDENTIFIER), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + NAMED('parent_uppercase', RE_IDENTIFIER), + M(r',\s*\n', + NAMED('fields', RE_TI_FIELDS), + n='?'), + r'\s*\);?\n?') + +class ObjectDefineType(TypeDefinition): + """OBJECT_DEFINE_TYPE usage""" + regexp = S(r'^[ \t]*OBJECT_DEFINE_TYPE\s*\(\s*', + NAMED('lowercase', RE_IDENTIFIER), r'\s*,\s*', + NAMED('uppercase', RE_IDENTIFIER), r'\s*,\s*', + NAMED('parent_uppercase', RE_IDENTIFIER), + M(r',\s*\n', + NAMED('fields', RE_TI_FIELDS), + n='?'), + r'\s*\);?\n?') + +def find_type_definitions(files: FileList, uppercase: str) -> Iterable[TypeDefinition]: + types: List[Type[TypeDefinition]] = [TypeInfoVar, ObjectDefineType, ObjectDefineTypeExtended] + for t in types: + for m in files.matches_of_type(t): + m.debug("uppercase: %s", m.uppercase) + yield from (m for t in types + for m in files.matches_of_type(t) + if m.uppercase == uppercase) + +class AddDeclareVoidClassType(TypeDeclarationFixup): + """Will add DECLARE_CLASS_TYPE(..., void) if possible""" + def gen_patches_for_type(self, uppercase: str, + checkers: List[TypeDeclaration], + fields: Dict[str, Optional[str]]) -> Iterable[Patch]: + defs = list(find_type_definitions(self.allfiles, uppercase)) + if len(defs) > 1: + self.warn("multiple definitions for %s", uppercase) + for d in defs: + d.warn("definition found here") + return + elif len(defs) == 0: + self.warn("type definition for %s not found", uppercase) + return + d = defs[0] + if d.classtype is None: + d.info("definition for %s has classtype, skipping", uppercase) + return + class_type_checkers = [c for c in checkers + if c.classtype is not None] + if class_type_checkers: + for c in class_type_checkers: + c.warn("class type checker for %s is present here", uppercase) + return + + _,last_checker = max((m.start(), m) for m in checkers) + s = f'DECLARE_CLASS_TYPE({uppercase}, void)\n' + yield last_checker.append(s) + +class AddDeclareVoidInstanceType(FileMatch): + """Will add DECLARE_INSTANCE_TYPE(..., void) if possible""" + regexp = S(r'^[ \t]*#[ \t]*define', CPP_SPACE, + NAMED('name', r'TYPE_[a-zA-Z0-9_]+\b'), + CPP_SPACE, r'.*\n') + + def gen_patches(self) -> Iterable[Patch]: + assert self.name.startswith('TYPE_') + uppercase = self.name[len('TYPE_'):] + defs = list(find_type_definitions(self.allfiles, uppercase)) + if len(defs) > 1: + self.warn("multiple definitions for %s", uppercase) + for d in defs: + d.warn("definition found here") + return + elif len(defs) == 0: + self.warn("type definition for %s not found", uppercase) + return + d = defs[0] + instancetype = d.instancetype + if instancetype is not None and instancetype != 'void': + return + + instance_checkers = [c for c in find_type_checkers(self.allfiles, uppercase) + if c.instancetype] + if instance_checkers: + d.warn("instance type checker for %s already declared", uppercase) + for c in instance_checkers: + c.warn("instance checker for %s is here", uppercase) + return + + s = f'DECLARE_INSTANCE_TYPE({uppercase}, void)\n' + yield self.append(s) + +class AddObjectDeclareType(DeclareObjCheckers): + """Will add OBJECT_DECLARE_TYPE(...) if possible""" + def gen_patches(self) -> Iterable[Patch]: + uppercase = self.uppercase + typename = self.group('typename') + instancetype = self.group('instancetype') + classtype = self.group('classtype') + + if typename != f'TYPE_{uppercase}': + self.warn("type name mismatch: %s vs %s", typename, uppercase) + return + + typedefs = [(t,self.allfiles.find_matches(SimpleTypedefMatch, t)) + for t in (instancetype, classtype)] + for t,tds in typedefs: + if not tds: + self.warn("typedef %s not found", t) + return + for td in tds: + td_type = td.group('typedef_type') + if td_type != f'struct {t}': + self.warn("typedef mismatch: %s is defined as %s", t, td_type) + td.warn("typedef is here") + return + + # look for reuse of same struct type + other_instance_checkers = [c for c in find_type_checkers(self.allfiles, instancetype, 'instancetype') + if c.uppercase != uppercase] + if other_instance_checkers: + self.warn("typedef %s is being reused", instancetype) + for ic in other_instance_checkers: + ic.warn("%s is reused here", instancetype) + if not self.file.force: + return + + decl_types: List[Type[TypeDeclaration]] = [DeclareClassCheckers, DeclareObjCheckers] + class_decls = [m for t in decl_types + for m in self.allfiles.find_matches(t, uppercase, 'uppercase')] + + defs = list(find_type_definitions(self.allfiles, uppercase)) + if len(defs) > 1: + self.warn("multiple definitions for %s", uppercase) + for d in defs: + d.warn("definition found here") + if not self.file.force: + return + elif len(defs) == 0: + self.warn("type definition for %s not found", uppercase) + if not self.file.force: + return + else: + d = defs[0] + if d.instancetype != instancetype: + self.warn("mismatching instance type for %s (%s)", uppercase, instancetype) + d.warn("instance type declared here (%s)", d.instancetype) + if not self.file.force: + return + if d.classtype != classtype: + self.warn("mismatching class type for %s (%s)", uppercase, classtype) + d.warn("class type declared here (%s)", d.classtype) + if not self.file.force: + return + + assert self.file.original_content + for t,tds in typedefs: + assert tds + for td in tds: + if td.file is not self.file: + continue + + # delete typedefs that are truly redundant: + # 1) defined after DECLARE_OBJ_CHECKERS + if td.start() > self.start(): + yield td.make_removal_patch() + # 2) defined before DECLARE_OBJ_CHECKERS, but unused + elif not re.search(r'\b'+t+r'\b', self.file.original_content[td.end():self.start()]): + yield td.make_removal_patch() + + c = (f'OBJECT_DECLARE_TYPE({instancetype}, {classtype}, {uppercase})\n') + yield self.make_patch(c) + +class AddObjectDeclareSimpleType(DeclareInstanceChecker): + """Will add OBJECT_DECLARE_SIMPLE_TYPE(...) if possible""" + def gen_patches(self) -> Iterable[Patch]: + uppercase = self.uppercase + typename = self.group('typename') + instancetype = self.group('instancetype') + + if typename != f'TYPE_{uppercase}': + self.warn("type name mismatch: %s vs %s", typename, uppercase) + return + + typedefs = [(t,self.allfiles.find_matches(SimpleTypedefMatch, t)) + for t in (instancetype,)] + for t,tds in typedefs: + if not tds: + self.warn("typedef %s not found", t) + return + for td in tds: + td_type = td.group('typedef_type') + if td_type != f'struct {t}': + self.warn("typedef mismatch: %s is defined as %s", t, td_type) + td.warn("typedef is here") + return + + # look for reuse of same struct type + other_instance_checkers = [c for c in find_type_checkers(self.allfiles, instancetype, 'instancetype') + if c.uppercase != uppercase] + if other_instance_checkers: + self.warn("typedef %s is being reused", instancetype) + for ic in other_instance_checkers: + ic.warn("%s is reused here", instancetype) + if not self.file.force: + return + + decl_types: List[Type[TypeDeclaration]] = [DeclareClassCheckers, DeclareObjCheckers] + class_decls = [m for t in decl_types + for m in self.allfiles.find_matches(t, uppercase, 'uppercase')] + if class_decls: + self.warn("class type declared for %s", uppercase) + for cd in class_decls: + cd.warn("class declaration found here") + return + + defs = list(find_type_definitions(self.allfiles, uppercase)) + if len(defs) > 1: + self.warn("multiple definitions for %s", uppercase) + for d in defs: + d.warn("definition found here") + if not self.file.force: + return + elif len(defs) == 0: + self.warn("type definition for %s not found", uppercase) + if not self.file.force: + return + else: + d = defs[0] + if d.instancetype != instancetype: + self.warn("mismatching instance type for %s (%s)", uppercase, instancetype) + d.warn("instance type declared here (%s)", d.instancetype) + if not self.file.force: + return + if d.classtype: + self.warn("class type set for %s", uppercase) + d.warn("class type declared here") + if not self.file.force: + return + + assert self.file.original_content + for t,tds in typedefs: + assert tds + for td in tds: + if td.file is not self.file: + continue + + # delete typedefs that are truly redundant: + # 1) defined after DECLARE_OBJ_CHECKERS + if td.start() > self.start(): + yield td.make_removal_patch() + # 2) defined before DECLARE_OBJ_CHECKERS, but unused + elif not re.search(r'\b'+t+r'\b', self.file.original_content[td.end():self.start()]): + yield td.make_removal_patch() + + c = (f'OBJECT_DECLARE_SIMPLE_TYPE({instancetype}, {uppercase})\n') + yield self.make_patch(c) + + +class TypeInfoStringName(TypeInfoVar): + """Replace hardcoded type names with TYPE_ constant""" + def gen_patches(self) -> Iterable[Patch]: + values = self.initializers + if values is None: + return + if 'name' not in values: + self.warn("name not set in TypeInfo variable %s", self.name) + return + typename = values['name'].raw + if re.fullmatch(RE_IDENTIFIER, typename): + return + + self.warn("name %s is not an identifier", typename) + #all_defines = [m for m in self.allfiles.matches_of_type(ExpressionDefine)] + #self.debug("all_defines: %r", all_defines) + constants = [m for m in self.allfiles.matches_of_type(ExpressionDefine) + if m.group('value').strip() == typename.strip()] + if not constants: + self.warn("No macro for %s found", typename) + return + if len(constants) > 1: + self.warn("I don't know which macro to use: %r", constants) + return + yield self.patch_field_value('name', constants[0].name) + +class RedundantTypeSizes(TypeInfoVar): + """Remove redundant instance_size/class_size from TypeInfo vars""" + def gen_patches(self) -> Iterable[Patch]: + values = self.initializers + if values is None: + return + if 'name' not in values: + self.warn("name not set in TypeInfo variable %s", self.name) + return + typename = values['name'].raw + if 'parent' not in values: + self.warn("parent not set in TypeInfo variable %s", self.name) + return + parent_typename = values['parent'].raw + + if 'instance_size' not in values and 'class_size' not in values: + self.debug("no need to validate %s", self.name) + return + + instance_decls = find_type_checkers(self.allfiles, typename) + if instance_decls: + self.debug("won't touch TypeInfo var that has type checkers") + return + + parent = find_type_info(self.allfiles, parent_typename) + if not parent: + self.warn("Can't find TypeInfo for %s", parent_typename) + return + + if 'instance_size' in values and parent.get_raw_initializer_value('instance_size') != values['instance_size'].raw: + self.info("instance_size mismatch") + parent.info("parent type declared here") + return + + if 'class_size' in values and parent.get_raw_initializer_value('class_size') != values['class_size'].raw: + self.info("class_size mismatch") + parent.info("parent type declared here") + return + + self.debug("will patch variable %s", self.name) + + if 'instance_size' in values: + self.debug("deleting instance_size") + yield self.patch_field('instance_size', '') + + if 'class_size' in values: + self.debug("deleting class_size") + yield self.patch_field('class_size', '') + + +#class TypeInfoVarInitFuncs(TypeInfoVar): +# """TypeInfo variable +# Will create missing init functions +# """ +# def gen_patches(self) -> Iterable[Patch]: +# values = self.initializers +# if values is None: +# self.warn("type not parsed completely: %s", self.name) +# return +# +# macro = self.file.find_match(TypeInfoVar, self.name) +# if macro is None: +# self.warn("No TYPE_INFO macro for %s", self.name) +# return +# +# ids = self.extract_identifiers() +# if ids is None: +# return +# +# DBG("identifiers extracted: %r", ids) +# fields = set(values.keys()) +# if ids.lowercase: +# if 'instance_init' not in fields: +# yield self.prepend(('static void %s_init(Object *obj)\n' +# '{\n' +# '}\n\n') % (ids.lowercase)) +# yield self.append_field('instance_init', ids.lowercase+'_init') +# +# if 'instance_finalize' not in fields: +# yield self.prepend(('static void %s_finalize(Object *obj)\n' +# '{\n' +# '}\n\n') % (ids.lowercase)) +# yield self.append_field('instance_finalize', ids.lowercase+'_finalize') +# +# +# if 'class_init' not in fields: +# yield self.prepend(('static void %s_class_init(ObjectClass *oc, void *data)\n' +# '{\n' +# '}\n\n') % (ids.lowercase)) +# yield self.append_field('class_init', ids.lowercase+'_class_init') + +class TypeInitMacro(FileMatch): + """Use of type_init(...) macro""" + regexp = S(r'^[ \t]*type_init\s*\(\s*', NAMED('name', RE_IDENTIFIER), r'\s*\);?[ \t]*\n') + +class DeleteEmptyTypeInitFunc(TypeInitMacro): + """Delete empty function declared using type_init(...)""" + def gen_patches(self) -> Iterable[Patch]: + fn = self.file.find_match(StaticVoidFunction, self.name) + DBG("function for %s: %s", self.name, fn) + if fn and fn.body == '': + yield fn.make_patch('') + yield self.make_patch('') + +class StaticVoidFunction(FileMatch): + """simple static void function + (no replacement rules) + """ + #NOTE: just like RE_FULL_STRUCT, this doesn't parse any of the body contents + # of the function. Tt will just look for "}" in the beginning of a line + regexp = S(r'static\s+void\s+', NAMED('name', RE_IDENTIFIER), r'\s*\(\s*void\s*\)\n', + r'{\n', + NAMED('body', + # acceptable inside the function body: + # - lines starting with space or tab + # - empty lines + # - preprocessor directives + OR(r'[ \t][^\n]*\n', + r'#[^\n]*\n', + r'\n', + repeat='*')), + r'};?\n') + + @property + def body(self) -> str: + return self.group('body') + + def has_preprocessor_directive(self) -> bool: + return bool(re.search(r'^[ \t]*#', self.body, re.MULTILINE)) + +def find_containing_func(m: FileMatch) -> Optional['StaticVoidFunction']: + """Return function containing this match""" + for fn in m.file.matches_of_type(StaticVoidFunction): + if fn.contains(m): + return fn + return None + +class TypeRegisterStaticCall(FileMatch): + """type_register_static() call + Will be replaced by TYPE_INFO() macro + """ + regexp = S(r'^[ \t]*', NAMED('func_name', 'type_register_static'), + r'\s*\(&\s*', NAMED('name', RE_IDENTIFIER), r'\s*\);[ \t]*\n') + +class UseTypeInfo(TypeRegisterStaticCall): + """Replace type_register_static() call with TYPE_INFO declaration""" + def gen_patches(self) -> Iterable[Patch]: + fn = find_containing_func(self) + if fn: + DBG("%r is inside %r", self, fn) + type_init = self.file.find_match(TypeInitMacro, fn.name) + if type_init is None: + self.warn("can't find type_init(%s) line", fn.name) + if not self.file.force: + return + else: + self.warn("can't identify the function where type_register_static(&%s) is called", self.name) + if not self.file.force: + return + + #if fn.has_preprocessor_directive() and not self.file.force: + # self.warn("function %s has preprocessor directives, this requires --force", fn.name) + # return + + var = self.file.find_match(TypeInfoVar, self.name) + if var is None: + self.warn("can't find TypeInfo var declaration for %s", self.name) + return + + if not var.is_full(): + self.warn("variable declaration %s wasn't parsed fully", var.name) + if not self.file.force: + return + + if fn and fn.contains(var): + self.warn("TypeInfo %s variable is inside a function", self.name) + if not self.file.force: + return + + # delete type_register_static() call: + yield self.make_patch('') + # append TYPE_REGISTER(...) after variable declaration: + yield var.append(f'TYPE_INFO({self.name})\n') + +class TypeRegisterCall(FileMatch): + """type_register_static() call""" + regexp = S(r'^[ \t]*', NAMED('func_name', 'type_register'), + r'\s*\(&\s*', NAMED('name', RE_IDENTIFIER), r'\s*\);[ \t]*\n') + +class MakeTypeRegisterStatic(TypeRegisterCall): + """Make type_register() call static if variable is static const""" + def gen_patches(self): + var = self.file.find_match(TypeInfoVar, self.name) + if var is None: + self.warn("can't find TypeInfo var declaration for %s", self.name) + return + if var.is_static() and var.is_const(): + yield self.group_match('func_name').make_patch('type_register_static') + +class MakeTypeRegisterNotStatic(TypeRegisterStaticCall): + """Make type_register() call static if variable is static const""" + def gen_patches(self): + var = self.file.find_match(TypeInfoVar, self.name) + if var is None: + self.warn("can't find TypeInfo var declaration for %s", self.name) + return + if not var.is_static() or not var.is_const(): + yield self.group_match('func_name').make_patch('type_register') + +class TypeInfoMacro(FileMatch): + """TYPE_INFO macro usage""" + regexp = S(r'^[ \t]*TYPE_INFO\s*\(\s*', NAMED('name', RE_IDENTIFIER), r'\s*\)[ \t]*;?[ \t]*\n') + +def find_type_info(files: RegexpScanner, name: str) -> Optional[TypeInfoVar]: + ti = [ti for ti in files.matches_of_type(TypeInfoVar) + if ti.get_raw_initializer_value('name') == name] + DBG("type info vars: %r", ti) + if len(ti) > 1: + DBG("multiple TypeInfo vars found for %s", name) + return None + if len(ti) == 0: + DBG("no TypeInfo var found for %s", name) + return None + return ti[0] + +class CreateClassStruct(DeclareInstanceChecker): + """Replace DECLARE_INSTANCE_CHECKER with OBJECT_DECLARE_SIMPLE_TYPE""" + def gen_patches(self) -> Iterable[Patch]: + typename = self.group('typename') + DBG("looking for TypeInfo variable for %s", typename) + var = find_type_info(self.allfiles, typename) + if var is None: + self.warn("no TypeInfo var found for %s", typename) + return + assert var.initializers + if 'class_size' in var.initializers: + self.warn("class size already set for TypeInfo %s", var.name) + return + classtype = self.group('instancetype')+'Class' + return + yield + #TODO: need to find out what's the parent class type... + #yield var.append_field('class_size', f'sizeof({classtype})') + #c = (f'OBJECT_DECLARE_SIMPLE_TYPE({instancetype}, {lowercase},\n' + # f' MODULE_OBJ_NAME, ParentClassType)\n') + #yield self.make_patch(c) + +def type_infos(file: FileInfo) -> Iterable[TypeInfoVar]: + return file.matches_of_type(TypeInfoVar) + +def full_types(file: FileInfo) -> Iterable[TypeInfoVar]: + return [t for t in type_infos(file) if t.is_full()] + +def partial_types(file: FileInfo) -> Iterable[TypeInfoVar]: + return [t for t in type_infos(file) if not t.is_full()] diff --git a/scripts/codeconverter/codeconverter/regexps.py b/scripts/codeconverter/codeconverter/regexps.py new file mode 100644 index 000000000..77993cc3b --- /dev/null +++ b/scripts/codeconverter/codeconverter/regexps.py @@ -0,0 +1,118 @@ +# Copyright (C) 2020 Red Hat Inc. +# +# Authors: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +"""Helpers for creation of regular expressions""" +import re + +import logging +logger = logging.getLogger(__name__) +DBG = logger.debug +INFO = logger.info +WARN = logger.warning + +def S(*regexps) -> str: + """Just a shortcut to concatenate multiple regexps more easily""" + return ''.join(regexps) + +def P(*regexps, name=None, capture=False, repeat='') -> str: + """Just add parenthesis around regexp(s), with optional name or repeat suffix""" + s = S(*regexps) + if name: + return f'(?P<{name}>{s}){repeat}' + elif capture: + return f'({s}){repeat}' + else: + return f'(?:{s}){repeat}' + +def NAMED(name, *regexps) -> str: + """Make named group using <P<name>...) syntax + + >>> NAMED('mygroup', 'xyz', 'abc') + '(?P<mygroup>xyzabc)' + """ + return P(*regexps, name=name) + +def OR(*regexps, **kwargs) -> str: + """Build (a|b|c) regexp""" + return P('|'.join(regexps), **kwargs) + +def M(*regexps, n='*', name=None) -> str: + """Add repetition qualifier to regexp(s) + + >>> M('a', 'b') + '(?:ab)*' + >>> M('a' , 'b', n='+') + '(?:ab)+' + >>> M('a' , 'b', n='{2,3}', name='name') + '(?P<name>(?:ab){2,3})' + """ + r = P(*regexps, repeat=n) + if name: + r = NAMED(name, r) + return r + +# helper to make parenthesis optional around regexp +OPTIONAL_PARS = lambda R: OR(S(r'\(\s*', R, r'\s*\)'), R) +def test_optional_pars(): + r = OPTIONAL_PARS('abc')+'$' + assert re.match(r, 'abc') + assert re.match(r, '(abc)') + assert not re.match(r, '(abcd)') + assert not re.match(r, '(abc') + assert not re.match(r, 'abc)') + + +# this disables the MULTILINE flag, so it will match at the +# beginning of the file: +RE_FILE_BEGIN = r'(?-m:^)' + +# C primitives: + +SP = r'\s*' + +RE_COMMENT = r'//[^\n]*$|/\*([^*]|\*[^/])*\*/' +RE_COMMENTS = M(RE_COMMENT + SP) + +RE_IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_]*(?![a-zA-Z0-9])' +RE_STRING = r'\"([^\"\\]|\\[a-z\"])*\"' +RE_NUMBER = r'[0-9]+|0x[0-9a-fA-F]+' + +# space or escaped newlines: +CPP_SPACE = OR(r'\s', r'\\\n', repeat='+') + +RE_PATH = '[a-zA-Z0-9/_.-]+' + +RE_INCLUDEPATH = OR(S(r'\"', RE_PATH, r'\"'), + S(r'<', RE_PATH, r'>')) + +RE_INCLUDE = S(r'^[ \t]*#[ \t]*include[ \t]+', NAMED('includepath', RE_INCLUDEPATH), r'[ \t]*\n') +RE_SIMPLEDEFINE = S(r'^[ \t]*#[ \t]*define[ \t]+', RE_IDENTIFIER, r'[ \t]*\n') + +RE_STRUCT_TYPE = S(r'struct\s+', RE_IDENTIFIER) +RE_TYPE = OR(RE_IDENTIFIER, RE_STRUCT_TYPE) + +RE_MACRO_CONCAT = M(S(OR(RE_IDENTIFIER, RE_STRING), SP), n='{2,}') + +RE_SIMPLE_VALUE = OR(RE_IDENTIFIER, RE_STRING, RE_NUMBER) + +RE_FUN_CALL = S(RE_IDENTIFIER, r'\s*\(\s*', RE_SIMPLE_VALUE, r'\s*\)') +RE_SIZEOF = S(r'sizeof\s*\(\s*', NAMED('sizeoftype', RE_TYPE), r'\s*\)') + +RE_ADDRESS = S(r'&\s*', RE_IDENTIFIER) + +RE_ARRAY_ITEM = S(r'{\s*', NAMED('arrayitem', M(RE_SIMPLE_VALUE, n='?')), r'\s*}\s*,?') +RE_ARRAY_CAST = S(r'\(\s*', RE_IDENTIFIER, r'\s*\[\s*\]\)') +RE_ARRAY_ITEMS = M(S(RE_ARRAY_ITEM, SP)) +RE_ARRAY = S(M(RE_ARRAY_CAST, n='?'), r'\s*{\s*', + NAMED('arrayitems', RE_ARRAY_ITEMS), + r'}') + +# NOTE: this covers a very small subset of valid expressions + +RE_EXPRESSION = OR(RE_SIZEOF, RE_FUN_CALL, RE_MACRO_CONCAT, RE_SIMPLE_VALUE, + RE_ARRAY, RE_ADDRESS) + diff --git a/scripts/codeconverter/codeconverter/test_patching.py b/scripts/codeconverter/codeconverter/test_patching.py new file mode 100644 index 000000000..71dfbd47e --- /dev/null +++ b/scripts/codeconverter/codeconverter/test_patching.py @@ -0,0 +1,104 @@ +# Copyright (C) 2020 Red Hat Inc. +# +# Authors: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +from tempfile import NamedTemporaryFile +from .patching import FileInfo, FileMatch, Patch, FileList +from .regexps import * + +class BasicPattern(FileMatch): + regexp = '[abc]{3}' + + @property + def name(self): + return self.group(0) + + def replacement(self) -> str: + # replace match with the middle character repeated 5 times + return self.group(0)[1].upper()*5 + +def test_pattern_patching(): + of = NamedTemporaryFile('wt') + of.writelines(['one line\n', + 'this pattern will be patched: defbbahij\n', + 'third line\n', + 'another pattern: jihaabfed']) + of.flush() + + files = FileList() + f = FileInfo(files, of.name) + f.load() + matches = f.matches_of_type(BasicPattern) + assert len(matches) == 2 + p2 = matches[1] + + # manually add patch, to see if .append() works: + f.patches.append(p2.append('XXX')) + + # apply all patches: + f.gen_patches(matches) + patched = f.get_patched_content() + assert patched == ('one line\n'+ + 'this pattern will be patched: defBBBBBhij\n'+ + 'third line\n'+ + 'another pattern: jihAAAAAXXXfed') + +class Function(FileMatch): + regexp = S(r'BEGIN\s+', NAMED('name', RE_IDENTIFIER), r'\n', + r'(.*\n)*?END\n') + +class Statement(FileMatch): + regexp = S(r'^\s*', NAMED('name', RE_IDENTIFIER), r'\(\)\n') + +def test_container_match(): + of = NamedTemporaryFile('wt') + of.writelines(['statement1()\n', + 'statement2()\n', + 'BEGIN function1\n', + ' statement3()\n', + ' statement4()\n', + 'END\n', + 'BEGIN function2\n', + ' statement5()\n', + ' statement6()\n', + 'END\n', + 'statement7()\n']) + of.flush() + + files = FileList() + f = FileInfo(files, of.name) + f.load() + assert len(f.matches_of_type(Function)) == 2 + print(' '.join(m.name for m in f.matches_of_type(Statement))) + assert len(f.matches_of_type(Statement)) == 7 + + f1 = f.find_match(Function, 'function1') + f2 = f.find_match(Function, 'function2') + st1 = f.find_match(Statement, 'statement1') + st2 = f.find_match(Statement, 'statement2') + st3 = f.find_match(Statement, 'statement3') + st4 = f.find_match(Statement, 'statement4') + st5 = f.find_match(Statement, 'statement5') + st6 = f.find_match(Statement, 'statement6') + st7 = f.find_match(Statement, 'statement7') + + assert not f1.contains(st1) + assert not f1.contains(st2) + assert not f1.contains(st2) + assert f1.contains(st3) + assert f1.contains(st4) + assert not f1.contains(st5) + assert not f1.contains(st6) + assert not f1.contains(st7) + + assert not f2.contains(st1) + assert not f2.contains(st2) + assert not f2.contains(st2) + assert not f2.contains(st3) + assert not f2.contains(st4) + assert f2.contains(st5) + assert f2.contains(st6) + assert not f2.contains(st7) diff --git a/scripts/codeconverter/codeconverter/test_regexps.py b/scripts/codeconverter/codeconverter/test_regexps.py new file mode 100644 index 000000000..a445634d8 --- /dev/null +++ b/scripts/codeconverter/codeconverter/test_regexps.py @@ -0,0 +1,282 @@ +# Copyright (C) 2020 Red Hat Inc. +# +# Authors: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +from .regexps import * +from .qom_macros import * +from .qom_type_info import * + +def test_res() -> None: + def fullmatch(regexp, s): + return re.fullmatch(regexp, s, re.MULTILINE) + + assert fullmatch(RE_IDENTIFIER, 'sizeof') + assert fullmatch(RE_IDENTIFIER, 'X86CPU') + assert fullmatch(RE_FUN_CALL, 'sizeof(X86CPU)') + assert fullmatch(RE_IDENTIFIER, 'X86_CPU_TYPE_NAME') + assert fullmatch(RE_SIMPLE_VALUE, '"base"') + print(RE_FUN_CALL) + assert fullmatch(RE_FUN_CALL, 'X86_CPU_TYPE_NAME("base")') + print(RE_TI_FIELD_INIT) + assert fullmatch(RE_TI_FIELD_INIT, '.name = X86_CPU_TYPE_NAME("base"),\n') + + + assert fullmatch(RE_MACRO_CONCAT, 'TYPE_ASPEED_GPIO "-ast2600"') + assert fullmatch(RE_EXPRESSION, 'TYPE_ASPEED_GPIO "-ast2600"') + + print(RE_MACRO_DEFINE) + assert re.search(RE_MACRO_DEFINE, r''' + #define OFFSET_CHECK(c) \ + do { \ + if (!(c)) { \ + goto bad_offset; \ + } \ + } while (0) + ''', re.MULTILINE) + + print(RE_CHECK_MACRO) + print(CPP_SPACE) + assert not re.match(RE_CHECK_MACRO, r''' + #define OFFSET_CHECK(c) \ + do { \ + if (!(c)) { \ + goto bad_offset; \ + } \ + } while (0)''', re.MULTILINE) + + print(RE_CHECK_MACRO) + assert fullmatch(RE_CHECK_MACRO, r'''#define PCI_DEVICE(obj) \ + OBJECT_CHECK(PCIDevice, (obj), TYPE_PCI_DEVICE) +''') + assert fullmatch(RE_CHECK_MACRO, r'''#define COLLIE_MACHINE(obj) \ + OBJECT_CHECK(CollieMachineState, obj, TYPE_COLLIE_MACHINE) +''') + + print(RE_TYPEINFO_START) + assert re.search(RE_TYPEINFO_START, r''' + cc->open = qmp_chardev_open_file; +} + +static const TypeInfo char_file_type_info = { + .name = TYPE_CHARDEV_FILE, +#ifdef _WIN32 + .parent = TYPE_CHARDEV_WIN, +''', re.MULTILINE) + assert re.search(RE_TYPEINFO_START, r''' + TypeInfo ti = { + .name = armsse_variants[i].name, + .parent = TYPE_ARMSSE, + .class_init = armsse_class_init, + .class_data = (void *)&armsse_variants[i], + };''', re.MULTILINE) + + print(RE_ARRAY_ITEM) + assert fullmatch(RE_ARRAY_ITEM, '{ TYPE_HOTPLUG_HANDLER },') + assert fullmatch(RE_ARRAY_ITEM, '{ TYPE_ACPI_DEVICE_IF },') + assert fullmatch(RE_ARRAY_ITEM, '{ }') + assert fullmatch(RE_ARRAY_CAST, '(InterfaceInfo[])') + assert fullmatch(RE_ARRAY, '''(InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { TYPE_ACPI_DEVICE_IF }, + { } + }''') + print(RE_COMMENT) + assert fullmatch(RE_COMMENT, r'''/* multi-line + * comment + */''') + + print(RE_TI_FIELDS) + assert fullmatch(RE_TI_FIELDS, + r'''/* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */ + .parent = TYPE_DEVICE, +''') + assert fullmatch(RE_TI_FIELDS, r'''.name = TYPE_TPM_CRB, + /* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */ + .parent = TYPE_DEVICE, + .instance_size = sizeof(CRBState), + .class_init = tpm_crb_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_TPM_IF }, + { } + } +''') + assert fullmatch(RE_TI_FIELDS + SP + RE_COMMENTS, + r'''.name = TYPE_PALM_MISC_GPIO, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(PalmMiscGPIOState), + .instance_init = palm_misc_gpio_init, + /* + * No class init required: device has no internal state so does not + * need to set up reset or vmstate, and has no realize method. + */''') + + print(TypeInfoVar.regexp) + test_empty = 'static const TypeInfo x86_base_cpu_type_info = {\n'+\ + '};\n'; + assert fullmatch(TypeInfoVar.regexp, test_empty) + + test_simple = r''' + static const TypeInfo x86_base_cpu_type_info = { + .name = X86_CPU_TYPE_NAME("base"), + .parent = TYPE_X86_CPU, + .class_init = x86_cpu_base_class_init, + }; + ''' + assert re.search(TypeInfoVar.regexp, test_simple, re.MULTILINE) + + test_interfaces = r''' + static const TypeInfo acpi_ged_info = { + .name = TYPE_ACPI_GED, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(AcpiGedState), + .instance_init = acpi_ged_initfn, + .class_init = acpi_ged_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { TYPE_ACPI_DEVICE_IF }, + { } + } + }; + ''' + assert re.search(TypeInfoVar.regexp, test_interfaces, re.MULTILINE) + + test_comments = r''' + static const TypeInfo palm_misc_gpio_info = { + .name = TYPE_PALM_MISC_GPIO, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(PalmMiscGPIOState), + .instance_init = palm_misc_gpio_init, + /* + * No class init required: device has no internal state so does not + * need to set up reset or vmstate, and has no realize method. + */ + }; + ''' + assert re.search(TypeInfoVar.regexp, test_comments, re.MULTILINE) + + test_comments = r''' + static const TypeInfo tpm_crb_info = { + .name = TYPE_TPM_CRB, + /* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */ + .parent = TYPE_DEVICE, + .instance_size = sizeof(CRBState), + .class_init = tpm_crb_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_TPM_IF }, + { } + } + }; + ''' + assert re.search(TypeInfoVar.regexp, test_comments, re.MULTILINE) + +def test_struct_re(): + print('---') + print(RE_STRUCT_TYPEDEF) + assert re.search(RE_STRUCT_TYPEDEF, r''' +typedef struct TCGState { + AccelState parent_obj; + + bool mttcg_enabled; + unsigned long tb_size; +} TCGState; +''', re.MULTILINE) + + assert re.search(RE_STRUCT_TYPEDEF, r''' +typedef struct { + ISADevice parent_obj; + + QEMUSoundCard card; + uint32_t freq; + uint32_t port; + int ticking[2]; + int enabled; + int active; + int bufpos; +#ifdef DEBUG + int64_t exp[2]; +#endif + int16_t *mixbuf; + uint64_t dexp[2]; + SWVoiceOut *voice; + int left, pos, samples; + QEMUAudioTimeStamp ats; + FM_OPL *opl; + PortioList port_list; +} AdlibState; +''', re.MULTILINE) + + false_positive = r''' +typedef struct dma_pagetable_entry { + int32_t frame; + int32_t owner; +} A B C D E; +struct foo { + int x; +} some_variable; +''' + assert not re.search(RE_STRUCT_TYPEDEF, false_positive, re.MULTILINE) + +def test_initial_includes(): + print(InitialIncludes.regexp) + c = ''' +#ifndef HW_FLASH_H +#define HW_FLASH_H + +/* NOR flash devices */ + +#include "qom/object.h" +#include "exec/hwaddr.h" + +/* pflash_cfi01.c */ +''' + print(repr(list(m.groupdict() for m in InitialIncludes.finditer(c)))) + m = InitialIncludes.domatch(c) + assert m + print(repr(m.group(0))) + assert m.group(0).endswith('#include "exec/hwaddr.h"\n') + + c = '''#ifndef QEMU_VIRTIO_9P_H +#define QEMU_VIRTIO_9P_H + +#include "standard-headers/linux/virtio_9p.h" +#include "hw/virtio/virtio.h" +#include "9p.h" + + +''' + print(repr(list(m.groupdict() for m in InitialIncludes.finditer(c)))) + m = InitialIncludes.domatch(c) + assert m + print(repr(m.group(0))) + assert m.group(0).endswith('#include "9p.h"\n') + + c = '''#include "qom/object.h" +/* + * QEMU ES1370 emulation +... + */ + +/* #define DEBUG_ES1370 */ +/* #define VERBOSE_ES1370 */ +#define SILENT_ES1370 + +#include "qemu/osdep.h" +#include "hw/audio/soundhw.h" +#include "audio/audio.h" +#include "hw/pci/pci.h" +#include "migration/vmstate.h" +#include "qemu/module.h" +#include "sysemu/dma.h" + +/* Missing stuff: + SCTRL_P[12](END|ST)INC +''' + print(repr(list(m.groupdict() for m in InitialIncludes.finditer(c)))) + m = InitialIncludes.domatch(c) + assert m + print(repr(m.group(0))) + assert m.group(0).endswith('#include "sysemu/dma.h"\n') + diff --git a/scripts/codeconverter/codeconverter/utils.py b/scripts/codeconverter/codeconverter/utils.py new file mode 100644 index 000000000..760ab7eec --- /dev/null +++ b/scripts/codeconverter/codeconverter/utils.py @@ -0,0 +1,72 @@ +# Copyright (C) 2020 Red Hat Inc. +# +# Authors: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +from typing import * + +import logging +logger = logging.getLogger(__name__) +DBG = logger.debug +INFO = logger.info +WARN = logger.warning + +T = TypeVar('T') +def opt_compare(a: T, b: T) -> bool: + """Compare two values, ignoring mismatches if one of them is None""" + return (a is None) or (b is None) or (a == b) + +def merge(a: T, b: T) -> T: + """Merge two values if they matched using opt_compare()""" + assert opt_compare(a, b) + if a is None: + return b + else: + return a + +def test_comp_merge(): + assert opt_compare(None, 1) == True + assert opt_compare(2, None) == True + assert opt_compare(1, 1) == True + assert opt_compare(1, 2) == False + + assert merge(None, None) is None + assert merge(None, 10) == 10 + assert merge(10, None) == 10 + assert merge(10, 10) == 10 + + +LineNumber = NewType('LineNumber', int) +ColumnNumber = NewType('ColumnNumber', int) +class LineAndColumn(NamedTuple): + line: int + col: int + + def __str__(self): + return '%d:%d' % (self.line, self.col) + +def line_col(s, position: int) -> LineAndColumn: + """Return line and column for a char position in string + + Character position starts in 0, but lines and columns start in 1. + """ + before = s[:position] + lines = before.split('\n') + line = len(lines) + col = len(lines[-1]) + 1 + return LineAndColumn(line, col) + +def test_line_col(): + assert line_col('abc\ndefg\nhijkl', 0) == (1, 1) + assert line_col('abc\ndefg\nhijkl', 2) == (1, 3) + assert line_col('abc\ndefg\nhijkl', 3) == (1, 4) + assert line_col('abc\ndefg\nhijkl', 4) == (2, 1) + assert line_col('abc\ndefg\nhijkl', 10) == (3, 2) + +def not_optional(arg: Optional[T]) -> T: + assert arg is not None + return arg + +__all__ = ['not_optional', 'opt_compare', 'merge', 'line_col', 'LineAndColumn']
\ No newline at end of file diff --git a/scripts/codeconverter/converter.py b/scripts/codeconverter/converter.py new file mode 100755 index 000000000..75cb515d9 --- /dev/null +++ b/scripts/codeconverter/converter.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# QEMU library +# +# Copyright (C) 2020 Red Hat Inc. +# +# Authors: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +# +import sys +import argparse +import os +import os.path +import re +from typing import * + +from codeconverter.patching import FileInfo, match_class_dict, FileList +import codeconverter.qom_macros +from codeconverter.qom_type_info import TI_FIELDS, type_infos, TypeInfoVar + +import logging +logger = logging.getLogger(__name__) +DBG = logger.debug +INFO = logger.info +WARN = logger.warning + +def process_all_files(parser: argparse.ArgumentParser, args: argparse.Namespace) -> None: + DBG("filenames: %r", args.filenames) + + files = FileList() + files.extend(FileInfo(files, fn, args.force) for fn in args.filenames) + for f in files: + DBG('opening %s', f.filename) + f.load() + + if args.table: + fields = ['filename', 'variable_name'] + TI_FIELDS + print('\t'.join(fields)) + for f in files: + for t in f.matches_of_type(TypeInfoVar): + assert isinstance(t, TypeInfoVar) + values = [f.filename, t.name] + \ + [t.get_raw_initializer_value(f) + for f in TI_FIELDS] + DBG('values: %r', values) + assert all('\t' not in v for v in values) + values = [v.replace('\n', ' ').replace('"', '') for v in values] + print('\t'.join(values)) + return + + match_classes = match_class_dict() + if not args.patterns: + parser.error("--pattern is required") + + classes = [p for arg in args.patterns + for p in re.split(r'[\s,]', arg) + if p.strip()] + for c in classes: + if c not in match_classes \ + or not match_classes[c].regexp: + print("Invalid pattern name: %s" % (c), file=sys.stderr) + print("Valid patterns:", file=sys.stderr) + print(PATTERN_HELP, file=sys.stderr) + sys.exit(1) + + DBG("classes: %r", classes) + files.patch_content(max_passes=args.passes, class_names=classes) + + for f in files: + #alltypes.extend(f.type_infos) + #full_types.extend(f.full_types()) + + if not args.dry_run: + if args.inplace: + f.patch_inplace() + if args.diff: + f.show_diff() + if not args.diff and not args.inplace: + f.write_to_file(sys.stdout) + sys.stdout.flush() + + +PATTERN_HELP = ('\n'.join(" %s: %s" % (n, str(c.__doc__).strip()) + for (n,c) in sorted(match_class_dict().items()) + if c.has_replacement_rule())) + +def main() -> None: + p = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('filenames', nargs='+') + p.add_argument('--passes', type=int, default=1, + help="Number of passes (0 means unlimited)") + p.add_argument('--pattern', required=True, action='append', + default=[], dest='patterns', + help="Pattern to scan for") + p.add_argument('--inplace', '-i', action='store_true', + help="Patch file in place") + p.add_argument('--dry-run', action='store_true', + help="Don't patch files or print patching results") + p.add_argument('--force', '-f', action='store_true', + help="Perform changes even if not completely safe") + p.add_argument('--diff', action='store_true', + help="Print diff output on stdout") + p.add_argument('--debug', '-d', action='store_true', + help="Enable debugging") + p.add_argument('--verbose', '-v', action='store_true', + help="Verbose logging on stderr") + p.add_argument('--table', action='store_true', + help="Print CSV table of type information") + p.add_argument_group("Valid pattern names", + PATTERN_HELP) + args = p.parse_args() + + loglevel = (logging.DEBUG if args.debug + else logging.INFO if args.verbose + else logging.WARN) + logging.basicConfig(format='%(levelname)s: %(message)s', level=loglevel) + DBG("args: %r", args) + process_all_files(p, args) + +if __name__ == '__main__': + main()
\ No newline at end of file diff --git a/scripts/coverity-scan/COMPONENTS.md b/scripts/coverity-scan/COMPONENTS.md new file mode 100644 index 000000000..183f26a32 --- /dev/null +++ b/scripts/coverity-scan/COMPONENTS.md @@ -0,0 +1,148 @@ +This is the list of currently configured Coverity components: + +alpha + ~ (/qemu)?((/include)?/hw/alpha/.*|/target/alpha/.*) + +arm + ~ (/qemu)?((/include)?/hw/arm/.*|(/include)?/hw/.*/(arm|allwinner-a10|bcm28|digic|exynos|imx|omap|stellaris|pxa2xx|versatile|zynq|cadence).*|/hw/net/xgmac.c|/hw/ssi/xilinx_spips.c|/target/arm/.*) + +avr + ~ (/qemu)?((/include)?/hw/avr/.*|/target/avr/.*) + +cris + ~ (/qemu)?((/include)?/hw/cris/.*|/target/cris/.*) + +hexagon + ~ (/qemu)?(/target/hexagon/.*) + +hppa + ~ (/qemu)?((/include)?/hw/hppa/.*|/target/hppa/.*) + +i386 + ~ (/qemu)?((/include)?/hw/i386/.*|/target/i386/.*|/hw/intc/[^/]*apic[^/]*\.c) + +m68k + ~ (/qemu)?((/include)?/hw/m68k/.*|/target/m68k/.*|(/include)?/hw(/.*)?/mcf.*) + +microblaze + ~ (/qemu)?((/include)?/hw/microblaze/.*|/target/microblaze/.*) + +mips + ~ (/qemu)?((/include)?/hw/mips/.*|/target/mips/.*) + +nios2 + ~ (/qemu)?((/include)?/hw/nios2/.*|/target/nios2/.*) + +ppc + ~ (/qemu)?((/include)?/hw/ppc/.*|/target/ppc/.*|/hw/pci-host/(uninorth.*|dec.*|prep.*|ppc.*)|/hw/misc/macio/.*|(/include)?/hw/.*/(xics|openpic|spapr).*) + +riscv + ~ (/qemu)?((/include)?/hw/riscv/.*|/target/riscv/.*) + +rx + ~ (/qemu)?((/include)?/hw/rx/.*|/target/rx/.*) + +s390 + ~ (/qemu)?((/include)?/hw/s390x/.*|/target/s390x/.*|/hw/.*/s390_.*) + +sh4 + ~ (/qemu)?((/include)?/hw/sh4/.*|/target/sh4/.*) + +sparc + ~ (/qemu)?((/include)?/hw/sparc(64)?.*|/target/sparc/.*|/hw/.*/grlib.*|/hw/display/cg3.c) + +tilegx + ~ (/qemu)?(/target/tilegx/.*) + +tricore + ~ (/qemu)?((/include)?/hw/tricore/.*|/target/tricore/.*) + +9pfs + ~ (/qemu)?(/hw/9pfs/.*|/fsdev/.*) + +audio + ~ (/qemu)?((/include)?/(audio|hw/audio)/.*) + +block + ~ (/qemu)?(/block.*|(/include?)(/hw)?/(block|storage-daemon)/.*|(/include)?/hw/ide/.*|/qemu-(img|io).*|/util/(aio|async|thread-pool).*) + +char + ~ (/qemu)?(/qemu-char\.c|/include/sysemu/char\.h|(/include)?/hw/char/.*) + +capstone + ~ (/qemu)?(/capstone/.*) + +crypto + ~ (/qemu)?((/include)?/crypto/.*|/hw/.*/crypto.*) + +disas + ~ (/qemu)?((/include)?/disas.*) + +fpu + ~ (/qemu)?((/include)?(/fpu|/libdecnumber)/.*) + +io + ~ (/qemu)?((/include)?/io/.*) + +ipmi + ~ (/qemu)?((/include)?/hw/ipmi/.*) + +libvixl + ~ (/qemu)?(/disas/libvixl/.*) + +migration + ~ (/qemu)?((/include)?/migration/.*) + +monitor + ~ (/qemu)?(/qapi.*|/qobject/.*|/monitor\..*|/[hq]mp\..*) + +nbd + ~ (/qemu)?(/nbd/.*|/include/block/nbd.*|/qemu-nbd\.c) + +net + ~ (/qemu)?((/include)?(/hw)?/(net|rdma)/.*) + +pci + ~ (/qemu)?(/hw/pci.*|/include/hw/pci.*) + +qemu-ga + ~ (/qemu)?(/qga/.*) + +scsi + ~ (/qemu)?(/scsi/.*|/hw/scsi/.*|/include/hw/scsi/.*) + +slirp + ~ (/qemu)?(/.*slirp.*) + +tcg + ~ (/qemu)?(/accel/tcg/.*|/replay/.*|/(.*/)?softmmu.*) + +trace + ~ (/qemu)?(/.*trace.*\.[ch]) + +ui + ~ (/qemu)?((/include)?(/ui|/hw/display|/hw/input)/.*) + +usb + ~ (/qemu)?(/hw/usb/.*|/include/hw/usb/.*) + +user + ~ (/qemu)?(/linux-user/.*|/bsd-user/.*|/user-exec\.c|/thunk\.c|/include/exec/user/.*) + +util + ~ (/qemu)?(/util/.*|/include/qemu/.*) + +xen + ~ (/qemu)?(.*/xen.*) + +virtiofsd + ~ (/qemu)?(/tools/virtiofsd/.*) + +(headers) + ~ (/qemu)?(/include/.*) + +testlibs + ~ (/qemu)?(/tests/qtest(/libqos/.*|/libqtest.*)) + +tests + ~ (/qemu)?(/tests/.*) diff --git a/scripts/coverity-scan/coverity-scan.docker b/scripts/coverity-scan/coverity-scan.docker new file mode 100644 index 000000000..ecff6ac5b --- /dev/null +++ b/scripts/coverity-scan/coverity-scan.docker @@ -0,0 +1,128 @@ +# syntax=docker/dockerfile:1.0.0-experimental +# +# Docker setup for running the "Coverity Scan" tools over the source +# tree and uploading them to the website, as per +# https://scan.coverity.com/projects/qemu/builds/new +# We do this on a fixed config (currently Fedora 30 with a known +# set of dependencies and a configure command that enables a specific +# set of options) so that random changes don't result in our accidentally +# dropping some files from the scan. +# +# We don't build on top of the fedora.docker file because we don't +# want to accidentally change or break the scan config when that +# is updated. + +# The work of actually doing the build is handled by the +# run-coverity-scan script. + +FROM fedora:30 +ENV PACKAGES \ + alsa-lib-devel \ + bc \ + brlapi-devel \ + bzip2 \ + bzip2-devel \ + ccache \ + clang \ + curl \ + cyrus-sasl-devel \ + dbus-daemon \ + device-mapper-multipath-devel \ + findutils \ + gcc \ + gcc-c++ \ + gettext \ + git \ + glib2-devel \ + glusterfs-api-devel \ + gnutls-devel \ + gtk3-devel \ + hostname \ + libaio-devel \ + libasan \ + libattr-devel \ + libblockdev-mpath-devel \ + libcap-devel \ + libcap-ng-devel \ + libcurl-devel \ + libepoxy-devel \ + libfdt-devel \ + libgbm-devel \ + libiscsi-devel \ + libjpeg-devel \ + libpmem-devel \ + libnfs-devel \ + libpng-devel \ + librbd-devel \ + libseccomp-devel \ + libssh-devel \ + libubsan \ + libudev-devel \ + libusbx-devel \ + libxml2-devel \ + libzstd-devel \ + llvm \ + lzo-devel \ + make \ + mingw32-bzip2 \ + mingw32-curl \ + mingw32-glib2 \ + mingw32-gmp \ + mingw32-gnutls \ + mingw32-gtk3 \ + mingw32-libjpeg-turbo \ + mingw32-libpng \ + mingw32-libtasn1 \ + mingw32-nettle \ + mingw32-nsis \ + mingw32-pixman \ + mingw32-pkg-config \ + mingw32-SDL2 \ + mingw64-bzip2 \ + mingw64-curl \ + mingw64-glib2 \ + mingw64-gmp \ + mingw64-gnutls \ + mingw64-gtk3 \ + mingw64-libjpeg-turbo \ + mingw64-libpng \ + mingw64-libtasn1 \ + mingw64-nettle \ + mingw64-pixman \ + mingw64-pkg-config \ + mingw64-SDL2 \ + ncurses-devel \ + nettle-devel \ + numactl-devel \ + perl \ + perl-Test-Harness \ + pixman-devel \ + pulseaudio-libs-devel \ + python3 \ + python3-sphinx \ + PyYAML \ + rdma-core-devel \ + SDL2-devel \ + snappy-devel \ + sparse \ + spice-server-devel \ + systemd-devel \ + systemtap-sdt-devel \ + tar \ + usbredir-devel \ + virglrenderer-devel \ + vte291-devel \ + wget \ + which \ + xen-devel \ + xfsprogs-devel \ + zlib-devel +ENV QEMU_CONFIGURE_OPTS --python=/usr/bin/python3 + +RUN dnf install -y $PACKAGES +RUN rpm -q $PACKAGES | sort > /packages.txt +ENV PATH $PATH:/usr/libexec/python3-sphinx/ +ENV COVERITY_TOOL_BASE=/coverity-tools +COPY coverity_tool.tgz coverity_tool.tgz +RUN mkdir -p /coverity-tools/coverity_tool && cd /coverity-tools/coverity_tool && tar xf /coverity_tool.tgz +COPY run-coverity-scan run-coverity-scan diff --git a/scripts/coverity-scan/model.c b/scripts/coverity-scan/model.c new file mode 100644 index 000000000..9d4fba53d --- /dev/null +++ b/scripts/coverity-scan/model.c @@ -0,0 +1,371 @@ +/* Coverity Scan model + * + * Copyright (C) 2014 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com> + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or, at your + * option, any later version. See the COPYING file in the top-level directory. + */ + + +/* + * This is the source code for our Coverity user model file. The + * purpose of user models is to increase scanning accuracy by explaining + * code Coverity can't see (out of tree libraries) or doesn't + * sufficiently understand. Better accuracy means both fewer false + * positives and more true defects. Memory leaks in particular. + * + * - A model file can't import any header files. Some built-in primitives are + * available but not wchar_t, NULL etc. + * - Modeling doesn't need full structs and typedefs. Rudimentary structs + * and similar types are sufficient. + * - An uninitialized local variable signifies that the variable could be + * any value. + * + * The model file must be uploaded by an admin in the analysis settings of + * http://scan.coverity.com/projects/378 + */ + +#define NULL ((void *)0) + +typedef unsigned char uint8_t; +typedef char int8_t; +typedef unsigned int uint32_t; +typedef int int32_t; +typedef long ssize_t; +typedef unsigned long long uint64_t; +typedef long long int64_t; +typedef _Bool bool; + +typedef struct va_list_str *va_list; + +/* exec.c */ + +typedef struct AddressSpace AddressSpace; +typedef struct MemoryRegionCache MemoryRegionCache; +typedef uint64_t hwaddr; +typedef uint32_t MemTxResult; +typedef struct MemTxAttrs {} MemTxAttrs; + +static void __bufwrite(uint8_t *buf, ssize_t len) +{ + int first, last; + __coverity_negative_sink__(len); + if (len == 0) return; + buf[0] = first; + buf[len-1] = last; + __coverity_writeall__(buf); +} + +static void __bufread(uint8_t *buf, ssize_t len) +{ + __coverity_negative_sink__(len); + if (len == 0) return; + int first = buf[0]; + int last = buf[len-1]; +} + +MemTxResult address_space_read_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, + void *buf, int len) +{ + MemTxResult result; + // TODO: investigate impact of treating reads as producing + // tainted data, with __coverity_tainted_data_argument__(buf). + __bufwrite(buf, len); + return result; +} + +MemTxResult address_space_write_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, + const void *buf, int len) +{ + MemTxResult result; + __bufread(buf, len); + return result; +} + +MemTxResult address_space_rw_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, + void *buf, int len, bool is_write) +{ + if (is_write) { + return address_space_write_cached(cache, addr, attrs, buf, len); + } else { + return address_space_read_cached(cache, addr, attrs, buf, len); + } +} + +MemTxResult address_space_read(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + void *buf, int len) +{ + MemTxResult result; + // TODO: investigate impact of treating reads as producing + // tainted data, with __coverity_tainted_data_argument__(buf). + __bufwrite(buf, len); + return result; +} + +MemTxResult address_space_write(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + const void *buf, int len) +{ + MemTxResult result; + __bufread(buf, len); + return result; +} + +MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + void *buf, int len, bool is_write) +{ + if (is_write) { + return address_space_write(as, addr, attrs, buf, len); + } else { + return address_space_read(as, addr, attrs, buf, len); + } +} + +/* Tainting */ + +typedef struct {} name2keysym_t; +static int get_keysym(const name2keysym_t *table, + const char *name) +{ + int result; + if (result > 0) { + __coverity_tainted_string_sanitize_content__(name); + return result; + } else { + return 0; + } +} + +/* Replay data is considered trusted. */ +uint8_t replay_get_byte(void) +{ + uint8_t byte; + return byte; +} + + +/* + * GLib memory allocation functions. + * + * Note that we ignore the fact that g_malloc of 0 bytes returns NULL, + * and g_realloc of 0 bytes frees the pointer. + * + * Modeling this would result in Coverity flagging a lot of memory + * allocations as potentially returning NULL, and asking us to check + * whether the result of the allocation is NULL or not. However, the + * resulting pointer should never be dereferenced anyway, and in fact + * it is not in the vast majority of cases. + * + * If a dereference did happen, this would suppress a defect report + * for an actual null pointer dereference. But it's too unlikely to + * be worth wading through the false positives, and with some luck + * we'll get a buffer overflow reported anyway. + */ + +/* + * Allocation primitives, cannot return NULL + * See also Coverity's library/generic/libc/all/all.c + */ + +void *g_malloc_n(size_t nmemb, size_t size) +{ + void *ptr; + + __coverity_negative_sink__(nmemb); + __coverity_negative_sink__(size); + ptr = __coverity_alloc__(nmemb * size); + if (!ptr) { + __coverity_panic__(); + } + __coverity_mark_as_uninitialized_buffer__(ptr); + __coverity_mark_as_afm_allocated__(ptr, AFM_free); + return ptr; +} + +void *g_malloc0_n(size_t nmemb, size_t size) +{ + void *ptr; + + __coverity_negative_sink__(nmemb); + __coverity_negative_sink__(size); + ptr = __coverity_alloc__(nmemb * size); + if (!ptr) { + __coverity_panic__(); + } + __coverity_writeall0__(ptr); + __coverity_mark_as_afm_allocated__(ptr, AFM_free); + return ptr; +} + +void *g_realloc_n(void *ptr, size_t nmemb, size_t size) +{ + __coverity_negative_sink__(nmemb); + __coverity_negative_sink__(size); + __coverity_escape__(ptr); + ptr = __coverity_alloc__(nmemb * size); + if (!ptr) { + __coverity_panic__(); + } + /* + * Memory beyond the old size isn't actually initialized. Can't + * model that. See Coverity's realloc() model + */ + __coverity_writeall__(ptr); + __coverity_mark_as_afm_allocated__(ptr, AFM_free); + return ptr; +} + +void g_free(void *ptr) +{ + __coverity_free__(ptr); + __coverity_mark_as_afm_freed__(ptr, AFM_free); +} + +/* + * Derive the g_try_FOO_n() from the g_FOO_n() by adding indeterminate + * out of memory conditions + */ + +void *g_try_malloc_n(size_t nmemb, size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_malloc_n(nmemb, size); +} + +void *g_try_malloc0_n(size_t nmemb, size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_malloc0_n(nmemb, size); +} + +void *g_try_realloc_n(void *ptr, size_t nmemb, size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_realloc_n(ptr, nmemb, size); +} + +/* Derive the g_FOO() from the g_FOO_n() */ + +void *g_malloc(size_t size) +{ + void *ptr; + + __coverity_negative_sink__(size); + ptr = __coverity_alloc__(size); + if (!ptr) { + __coverity_panic__(); + } + __coverity_mark_as_uninitialized_buffer__(ptr); + __coverity_mark_as_afm_allocated__(ptr, AFM_free); + return ptr; +} + +void *g_malloc0(size_t size) +{ + void *ptr; + + __coverity_negative_sink__(size); + ptr = __coverity_alloc__(size); + if (!ptr) { + __coverity_panic__(); + } + __coverity_writeall0__(ptr); + __coverity_mark_as_afm_allocated__(ptr, AFM_free); + return ptr; +} + +void *g_realloc(void *ptr, size_t size) +{ + __coverity_negative_sink__(size); + __coverity_escape__(ptr); + ptr = __coverity_alloc__(size); + if (!ptr) { + __coverity_panic__(); + } + /* + * Memory beyond the old size isn't actually initialized. Can't + * model that. See Coverity's realloc() model + */ + __coverity_writeall__(ptr); + __coverity_mark_as_afm_allocated__(ptr, AFM_free); + return ptr; +} + +void *g_try_malloc(size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_malloc(size); +} + +void *g_try_malloc0(size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_malloc0(size); +} + +void *g_try_realloc(void *ptr, size_t size) +{ + int nomem; + + if (nomem) { + return NULL; + } + return g_realloc(ptr, size); +} + +/* Other glib functions */ + +typedef struct pollfd GPollFD; + +int poll(); + +int g_poll (GPollFD *fds, unsigned nfds, int timeout) +{ + return poll(fds, nfds, timeout); +} + +typedef struct _GIOChannel GIOChannel; +GIOChannel *g_io_channel_unix_new(int fd) +{ + GIOChannel *c = g_malloc0(sizeof(GIOChannel)); + __coverity_escape__(fd); + return c; +} + +void g_assertion_message_expr(const char *domain, + const char *file, + int line, + const char *func, + const char *expr) +{ + __coverity_panic__(); +} diff --git a/scripts/coverity-scan/run-coverity-scan b/scripts/coverity-scan/run-coverity-scan new file mode 100755 index 000000000..7395bbfad --- /dev/null +++ b/scripts/coverity-scan/run-coverity-scan @@ -0,0 +1,433 @@ +#!/bin/sh -e + +# Upload a created tarball to Coverity Scan, as per +# https://scan.coverity.com/projects/qemu/builds/new + +# This work is licensed under the terms of the GNU GPL version 2, +# or (at your option) any later version. +# See the COPYING file in the top-level directory. +# +# Copyright (c) 2017-2020 Linaro Limited +# Written by Peter Maydell + +# Note that this script will automatically download and +# run the (closed-source) coverity build tools, so don't +# use it if you don't trust them! + +# This script assumes that you're running it from a QEMU source +# tree, and that tree is a fresh clean one, because we do an in-tree +# build. (This is necessary so that the filenames that the Coverity +# Scan server sees are relative paths that match up with the component +# regular expressions it uses; an out-of-tree build won't work for this.) +# The host machine should have as many of QEMU's dependencies +# installed as possible, for maximum coverity coverage. + +# To do an upload you need to be a maintainer in the Coverity online +# service, and you will need to know the "Coverity token", which is a +# secret 8 digit hex string. You can find that from the web UI in the +# project settings, if you have maintainer access there. + +# Command line options: +# --dry-run : run the tools, but don't actually do the upload +# --docker : create and work inside a container +# --docker-engine : specify the container engine to use (docker/podman/auto); +# implies --docker +# --update-tools-only : update the cached copy of the tools, but don't run them +# --no-update-tools : do not update the cached copy of the tools +# --tokenfile : file to read Coverity token from +# --version ver : specify version being analyzed (default: ask git) +# --description desc : specify description of this version (default: ask git) +# --srcdir : QEMU source tree to analyze (default: current working dir) +# --results-tarball : path to copy the results tarball to (default: don't +# copy it anywhere, just upload it) +# --src-tarball : tarball to untar into src dir (default: none); this +# is intended mainly for internal use by the Docker support +# +# User-specifiable environment variables: +# COVERITY_TOKEN -- Coverity token (default: looks at your +# coverity.token config) +# COVERITY_EMAIL -- the email address to use for uploads (default: +# looks at your git coverity.email or user.email config) +# COVERITY_BUILD_CMD -- make command (default: 'make -jN' where N is +# number of CPUs as determined by 'nproc') +# COVERITY_TOOL_BASE -- set to directory to put coverity tools +# (default: /tmp/coverity-tools) +# +# You must specify the token, either by environment variable or by +# putting it in a file and using --tokenfile. Everything else has +# a reasonable default if this is run from a git tree. + +check_upload_permissions() { + # Check whether we can do an upload to the server; will exit the script + # with status 1 if the check failed (usually a bad token); + # will exit the script with status 0 if the check indicated that we + # can't upload yet (ie we are at quota) + # Assumes that COVERITY_TOKEN, PROJNAME and DRYRUN have been initialized. + + echo "Checking upload permissions..." + + if ! up_perm="$(wget https://scan.coverity.com/api/upload_permitted --post-data "token=$COVERITY_TOKEN&project=$PROJNAME" -q -O -)"; then + echo "Coverity Scan API access denied: bad token?" + exit 1 + fi + + # Really up_perm is a JSON response with either + # {upload_permitted:true} or {next_upload_permitted_at:<date>} + # We do some hacky string parsing instead of properly parsing it. + case "$up_perm" in + *upload_permitted*true*) + echo "Coverity Scan: upload permitted" + ;; + *next_upload_permitted_at*) + if [ "$DRYRUN" = yes ]; then + echo "Coverity Scan: upload quota reached, continuing dry run" + else + echo "Coverity Scan: upload quota reached; stopping here" + # Exit success as this isn't a build error. + exit 0 + fi + ;; + *) + echo "Coverity Scan upload check: unexpected result $up_perm" + exit 1 + ;; + esac +} + + +build_docker_image() { + # build docker container including the coverity-scan tools + echo "Building docker container..." + # TODO: This re-unpacks the tools every time, rather than caching + # and reusing the image produced by the COPY of the .tgz file. + # Not sure why. + tests/docker/docker.py --engine ${DOCKER_ENGINE} build \ + -t coverity-scanner -f scripts/coverity-scan/coverity-scan.docker \ + --extra-files scripts/coverity-scan/run-coverity-scan \ + "$COVERITY_TOOL_BASE"/coverity_tool.tgz +} + +update_coverity_tools () { + # Check for whether we need to download the Coverity tools + # (either because we don't have a copy, or because it's out of date) + # Assumes that COVERITY_TOOL_BASE, COVERITY_TOKEN and PROJNAME are set. + + mkdir -p "$COVERITY_TOOL_BASE" + cd "$COVERITY_TOOL_BASE" + + echo "Checking for new version of coverity build tools..." + wget https://scan.coverity.com/download/linux64 --post-data "token=$COVERITY_TOKEN&project=$PROJNAME&md5=1" -O coverity_tool.md5.new + + if ! cmp -s coverity_tool.md5 coverity_tool.md5.new; then + # out of date md5 or no md5: download new build tool + # blow away the old build tool + echo "Downloading coverity build tools..." + rm -rf coverity_tool coverity_tool.tgz + wget https://scan.coverity.com/download/linux64 --post-data "token=$COVERITY_TOKEN&project=$PROJNAME" -O coverity_tool.tgz + if ! (cat coverity_tool.md5.new; echo " coverity_tool.tgz") | md5sum -c --status; then + echo "Downloaded tarball didn't match md5sum!" + exit 1 + fi + + if [ "$DOCKER" != yes ]; then + # extract the new one, keeping it corralled in a 'coverity_tool' directory + echo "Unpacking coverity build tools..." + mkdir -p coverity_tool + cd coverity_tool + tar xf ../coverity_tool.tgz + cd .. + mv coverity_tool.md5.new coverity_tool.md5 + fi + fi + rm -f coverity_tool.md5.new + cd "$SRCDIR" + + if [ "$DOCKER" = yes ]; then + build_docker_image + fi +} + + +# Check user-provided environment variables and arguments +DRYRUN=no +UPDATE=yes +DOCKER=no + +while [ "$#" -ge 1 ]; do + case "$1" in + --dry-run) + shift + DRYRUN=yes + ;; + --no-update-tools) + shift + UPDATE=no + ;; + --update-tools-only) + shift + UPDATE=only + ;; + --version) + shift + if [ $# -eq 0 ]; then + echo "--version needs an argument" + exit 1 + fi + VERSION="$1" + shift + ;; + --description) + shift + if [ $# -eq 0 ]; then + echo "--description needs an argument" + exit 1 + fi + DESCRIPTION="$1" + shift + ;; + --tokenfile) + shift + if [ $# -eq 0 ]; then + echo "--tokenfile needs an argument" + exit 1 + fi + COVERITY_TOKEN="$(cat "$1")" + shift + ;; + --srcdir) + shift + if [ $# -eq 0 ]; then + echo "--srcdir needs an argument" + exit 1 + fi + SRCDIR="$1" + shift + ;; + --results-tarball) + shift + if [ $# -eq 0 ]; then + echo "--results-tarball needs an argument" + exit 1 + fi + RESULTSTARBALL="$1" + shift + ;; + --src-tarball) + shift + if [ $# -eq 0 ]; then + echo "--src-tarball needs an argument" + exit 1 + fi + SRCTARBALL="$1" + shift + ;; + --docker) + DOCKER=yes + DOCKER_ENGINE=auto + shift + ;; + --docker-engine) + shift + if [ $# -eq 0 ]; then + echo "--docker-engine needs an argument" + exit 1 + fi + DOCKER=yes + DOCKER_ENGINE="$1" + shift + ;; + *) + echo "Unexpected argument '$1'" + exit 1 + ;; + esac +done + +if [ -z "$COVERITY_TOKEN" ]; then + COVERITY_TOKEN="$(git config coverity.token)" +fi +if [ -z "$COVERITY_TOKEN" ]; then + echo "COVERITY_TOKEN environment variable not set" + exit 1 +fi + +if [ -z "$COVERITY_BUILD_CMD" ]; then + NPROC=$(nproc) + COVERITY_BUILD_CMD="make -j$NPROC" + echo "COVERITY_BUILD_CMD: using default '$COVERITY_BUILD_CMD'" +fi + +if [ -z "$COVERITY_TOOL_BASE" ]; then + echo "COVERITY_TOOL_BASE: using default /tmp/coverity-tools" + COVERITY_TOOL_BASE=/tmp/coverity-tools +fi + +if [ -z "$SRCDIR" ]; then + SRCDIR="$PWD" +fi + +PROJNAME=QEMU +TARBALL=cov-int.tar.xz + +if [ "$UPDATE" = only ]; then + # Just do the tools update; we don't need to check whether + # we are in a source tree or have upload rights for this, + # so do it before some of the command line and source tree checks. + + if [ "$DOCKER" = yes ] && [ ! -z "$SRCTARBALL" ]; then + echo --update-tools-only --docker is incompatible with --src-tarball. + exit 1 + fi + + update_coverity_tools + exit 0 +fi + +if [ ! -e "$SRCDIR" ]; then + mkdir "$SRCDIR" +fi + +cd "$SRCDIR" + +if [ ! -z "$SRCTARBALL" ]; then + echo "Untarring source tarball into $SRCDIR..." + tar xvf "$SRCTARBALL" +fi + +echo "Checking this is a QEMU source tree..." +if ! [ -e "$SRCDIR/VERSION" ]; then + echo "Not in a QEMU source tree?" + exit 1 +fi + +# Fill in defaults used by the non-update-only process +if [ -z "$VERSION" ]; then + VERSION="$(git describe --always HEAD)" +fi + +if [ -z "$DESCRIPTION" ]; then + DESCRIPTION="$(git rev-parse HEAD)" +fi + +if [ -z "$COVERITY_EMAIL" ]; then + COVERITY_EMAIL="$(git config coverity.email)" +fi +if [ -z "$COVERITY_EMAIL" ]; then + COVERITY_EMAIL="$(git config user.email)" +fi + +# Otherwise, continue with the full build and upload process. + +check_upload_permissions + +if [ "$UPDATE" != no ]; then + update_coverity_tools +fi + +# Run ourselves inside docker if that's what the user wants +if [ "$DOCKER" = yes ]; then + # Put the Coverity token into a temporary file that only + # we have read access to, and then pass it to docker build + # using a volume. A volume is enough for the token not to + # leak into the Docker image. + umask 077 + SECRETDIR=$(mktemp -d) + if [ -z "$SECRETDIR" ]; then + echo "Failed to create temporary directory" + exit 1 + fi + trap 'rm -rf "$SECRETDIR"' INT TERM EXIT + echo "Created temporary directory $SECRETDIR" + SECRET="$SECRETDIR/token" + echo "$COVERITY_TOKEN" > "$SECRET" + echo "Archiving sources to be analyzed..." + ./scripts/archive-source.sh "$SECRETDIR/qemu-sources.tgz" + ARGS="--no-update-tools" + if [ "$DRYRUN" = yes ]; then + ARGS="$ARGS --dry-run" + fi + echo "Running scanner..." + # If we need to capture the output tarball, get the inner run to + # save it to the secrets directory so we can copy it out before the + # directory is cleaned up. + if [ ! -z "$RESULTSTARBALL" ]; then + ARGS="$ARGS --results-tarball /work/cov-int.tar.xz" + fi + # Arrange for this docker run to get access to the sources with -v. + # We pass through all the configuration from the outer script to the inner. + export COVERITY_EMAIL COVERITY_BUILD_CMD + tests/docker/docker.py run -it --env COVERITY_EMAIL --env COVERITY_BUILD_CMD \ + -v "$SECRETDIR:/work" coverity-scanner \ + ./run-coverity-scan --version "$VERSION" \ + --description "$DESCRIPTION" $ARGS --tokenfile /work/token \ + --srcdir /qemu --src-tarball /work/qemu-sources.tgz + if [ ! -z "$RESULTSTARBALL" ]; then + echo "Copying results tarball to $RESULTSTARBALL..." + cp "$SECRETDIR/cov-int.tar.xz" "$RESULTSTARBALL" + fi + echo "Docker work complete." + exit 0 +fi + +TOOLBIN="$(cd "$COVERITY_TOOL_BASE" && echo $PWD/coverity_tool/cov-analysis-*/bin)" + +if ! test -x "$TOOLBIN/cov-build"; then + echo "Couldn't find cov-build in the coverity build-tool directory??" + exit 1 +fi + +export PATH="$TOOLBIN:$PATH" + +cd "$SRCDIR" + +echo "Nuking build directory..." +rm -rf +build +mkdir +build +cd +build + +echo "Configuring..." +# We configure with a fixed set of enables here to ensure that we don't +# accidentally reduce the scope of the analysis by doing the build on +# the system that's missing a dependency that we need to build part of +# the codebase. +../configure --disable-modules --enable-sdl --enable-gtk \ + --enable-opengl --enable-vte --enable-gnutls \ + --enable-nettle --enable-curses --enable-curl \ + --audio-drv-list=oss,alsa,sdl,pa --enable-virtfs \ + --enable-vnc --enable-vnc-sasl --enable-vnc-jpeg --enable-vnc-png \ + --enable-xen --enable-brlapi \ + --enable-linux-aio --enable-attr \ + --enable-cap-ng --enable-trace-backends=log --enable-spice --enable-rbd \ + --enable-xfsctl --enable-libusb --enable-usb-redir \ + --enable-libiscsi --enable-libnfs --enable-seccomp \ + --enable-tpm --enable-libssh --enable-lzo --enable-snappy --enable-bzip2 \ + --enable-numa --enable-rdma --enable-smartcard --enable-virglrenderer \ + --enable-mpath --enable-libxml2 --enable-glusterfs \ + --enable-virtfs --enable-zstd + +echo "Running cov-build..." +rm -rf cov-int +mkdir cov-int +cov-build --dir cov-int $COVERITY_BUILD_CMD + +echo "Creating results tarball..." +tar cvf - cov-int | xz > "$TARBALL" + +if [ ! -z "$RESULTSTARBALL" ]; then + echo "Copying results tarball to $RESULTSTARBALL..." + cp "$TARBALL" "$RESULTSTARBALL" +fi + +echo "Uploading results tarball..." + +if [ "$DRYRUN" = yes ]; then + echo "Dry run only, not uploading $TARBALL" + exit 0 +fi + +curl --form token="$COVERITY_TOKEN" --form email="$COVERITY_EMAIL" \ + --form file=@"$TARBALL" --form version="$VERSION" \ + --form description="$DESCRIPTION" \ + https://scan.coverity.com/builds?project="$PROJNAME" + +echo "Done." diff --git a/scripts/cpu-x86-uarch-abi.py b/scripts/cpu-x86-uarch-abi.py new file mode 100644 index 000000000..08acc52a8 --- /dev/null +++ b/scripts/cpu-x86-uarch-abi.py @@ -0,0 +1,194 @@ +#!/usr/bin/python3 +# +# SPDX-License-Identifier: GPL-2.0-or-later +# +# A script to generate a CSV file showing the x86_64 ABI +# compatibility levels for each CPU model. +# + +from qemu import qmp +import sys + +if len(sys.argv) != 1: + print("syntax: %s QMP-SOCK\n\n" % __file__ + + "Where QMP-SOCK points to a QEMU process such as\n\n" + + " # qemu-system-x86_64 -qmp unix:/tmp/qmp,server,nowait " + + "-display none -accel kvm", file=sys.stderr) + sys.exit(1) + +# Mandatory CPUID features for each microarch ABI level +levels = [ + [ # x86-64 baseline + "cmov", + "cx8", + "fpu", + "fxsr", + "mmx", + "syscall", + "sse", + "sse2", + ], + [ # x86-64-v2 + "cx16", + "lahf-lm", + "popcnt", + "pni", + "sse4.1", + "sse4.2", + "ssse3", + ], + [ # x86-64-v3 + "avx", + "avx2", + "bmi1", + "bmi2", + "f16c", + "fma", + "abm", + "movbe", + ], + [ # x86-64-v4 + "avx512f", + "avx512bw", + "avx512cd", + "avx512dq", + "avx512vl", + ], +] + +# Assumes externally launched process such as +# +# qemu-system-x86_64 -qmp unix:/tmp/qmp,server,nowait -display none -accel kvm +# +# Note different results will be obtained with TCG, as +# TCG masks out certain features otherwise present in +# the CPU model definitions, as does KVM. + + +sock = sys.argv[1] +cmd = sys.argv[2] +shell = qmp.QEMUMonitorProtocol(sock) +shell.connect() + +models = shell.cmd("query-cpu-definitions") + +# These QMP props don't correspond to CPUID fatures +# so ignore them +skip = [ + "family", + "min-level", + "min-xlevel", + "vendor", + "model", + "model-id", + "stepping", +] + +names = [] + +for model in models["return"]: + if "alias-of" in model: + continue + names.append(model["name"]) + +models = {} + +for name in sorted(names): + cpu = shell.cmd("query-cpu-model-expansion", + { "type": "static", + "model": { "name": name }}) + + got = {} + for (feature, present) in cpu["return"]["model"]["props"].items(): + if present and feature not in skip: + got[feature] = True + + if name in ["host", "max", "base"]: + continue + + models[name] = { + # Dict of all present features in this CPU model + "features": got, + + # Whether each x86-64 ABI level is satisfied + "levels": [False, False, False, False], + + # Number of extra CPUID features compared to the x86-64 ABI level + "distance":[-1, -1, -1, -1], + + # CPUID features present in model, but not in ABI level + "delta":[[], [], [], []], + + # CPUID features in ABI level but not present in model + "missing": [[], [], [], []], + } + + +# Calculate whether the CPU models satisfy each ABI level +for name in models.keys(): + for level in range(len(levels)): + got = set(models[name]["features"]) + want = set(levels[level]) + missing = want - got + match = True + if len(missing) > 0: + match = False + models[name]["levels"][level] = match + models[name]["missing"][level] = missing + +# Cache list of CPU models satisfying each ABI level +abi_models = [ + [], + [], + [], + [], +] + +for name in models.keys(): + for level in range(len(levels)): + if models[name]["levels"][level]: + abi_models[level].append(name) + + +for level in range(len(abi_models)): + # Find the union of features in all CPU models satisfying this ABI + allfeatures = {} + for name in abi_models[level]: + for feat in models[name]["features"]: + allfeatures[feat] = True + + # Find the intersection of features in all CPU models satisfying this ABI + commonfeatures = [] + for feat in allfeatures: + present = True + for name in models.keys(): + if not models[name]["levels"][level]: + continue + if feat not in models[name]["features"]: + present = False + if present: + commonfeatures.append(feat) + + # Determine how many extra features are present compared to the lowest + # common denominator + for name in models.keys(): + if not models[name]["levels"][level]: + continue + + delta = set(models[name]["features"].keys()) - set(commonfeatures) + models[name]["distance"][level] = len(delta) + models[name]["delta"][level] = delta + +def print_uarch_abi_csv(): + print("# Automatically generated from '%s'" % __file__) + print("Model,baseline,v2,v3,v4") + for name in models.keys(): + print(name, end="") + for level in range(len(levels)): + if models[name]["levels"][level]: + print(",✅", end="") + else: + print(",", end="") + print() + +print_uarch_abi_csv() diff --git a/scripts/decodetree.py b/scripts/decodetree.py new file mode 100644 index 000000000..a03dc6b5e --- /dev/null +++ b/scripts/decodetree.py @@ -0,0 +1,1424 @@ +#!/usr/bin/env python3 +# Copyright (c) 2018 Linaro Limited +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. +# + +# +# Generate a decoding tree from a specification file. +# See the syntax and semantics in docs/devel/decodetree.rst. +# + +import io +import os +import re +import sys +import getopt + +insnwidth = 32 +bitop_width = 32 +insnmask = 0xffffffff +variablewidth = False +fields = {} +arguments = {} +formats = {} +allpatterns = [] +anyextern = False + +translate_prefix = 'trans' +translate_scope = 'static ' +input_file = '' +output_file = None +output_fd = None +insntype = 'uint32_t' +decode_function = 'decode' + +# An identifier for C. +re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*' + +# Identifiers for Arguments, Fields, Formats and Patterns. +re_arg_ident = '&[a-zA-Z0-9_]*' +re_fld_ident = '%[a-zA-Z0-9_]*' +re_fmt_ident = '@[a-zA-Z0-9_]*' +re_pat_ident = '[a-zA-Z0-9_]*' + +def error_with_file(file, lineno, *args): + """Print an error message from file:line and args and exit.""" + global output_file + global output_fd + + prefix = '' + if file: + prefix += f'{file}:' + if lineno: + prefix += f'{lineno}:' + if prefix: + prefix += ' ' + print(prefix, end='error: ', file=sys.stderr) + print(*args, file=sys.stderr) + + if output_file and output_fd: + output_fd.close() + os.remove(output_file) + exit(1) +# end error_with_file + + +def error(lineno, *args): + error_with_file(input_file, lineno, *args) +# end error + + +def output(*args): + global output_fd + for a in args: + output_fd.write(a) + + +def output_autogen(): + output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') + + +def str_indent(c): + """Return a string with C spaces""" + return ' ' * c + + +def str_fields(fields): + """Return a string uniquely identifying FIELDS""" + r = '' + for n in sorted(fields.keys()): + r += '_' + n + return r[1:] + + +def whex(val): + """Return a hex string for val padded for insnwidth""" + global insnwidth + return f'0x{val:0{insnwidth // 4}x}' + + +def whexC(val): + """Return a hex string for val padded for insnwidth, + and with the proper suffix for a C constant.""" + suffix = '' + if val >= 0x100000000: + suffix = 'ull' + elif val >= 0x80000000: + suffix = 'u' + return whex(val) + suffix + + +def str_match_bits(bits, mask): + """Return a string pretty-printing BITS/MASK""" + global insnwidth + + i = 1 << (insnwidth - 1) + space = 0x01010100 + r = '' + while i != 0: + if i & mask: + if i & bits: + r += '1' + else: + r += '0' + else: + r += '.' + if i & space: + r += ' ' + i >>= 1 + return r + + +def is_pow2(x): + """Return true iff X is equal to a power of 2.""" + return (x & (x - 1)) == 0 + + +def ctz(x): + """Return the number of times 2 factors into X.""" + assert x != 0 + r = 0 + while ((x >> r) & 1) == 0: + r += 1 + return r + + +def is_contiguous(bits): + if bits == 0: + return -1 + shift = ctz(bits) + if is_pow2((bits >> shift) + 1): + return shift + else: + return -1 + + +def eq_fields_for_args(flds_a, arg): + if len(flds_a) != len(arg.fields): + return False + # Only allow inference on default types + for t in arg.types: + if t != 'int': + return False + for k, a in flds_a.items(): + if k not in arg.fields: + return False + return True + + +def eq_fields_for_fmts(flds_a, flds_b): + if len(flds_a) != len(flds_b): + return False + for k, a in flds_a.items(): + if k not in flds_b: + return False + b = flds_b[k] + if a.__class__ != b.__class__ or a != b: + return False + return True + + +class Field: + """Class representing a simple instruction field""" + def __init__(self, sign, pos, len): + self.sign = sign + self.pos = pos + self.len = len + self.mask = ((1 << len) - 1) << pos + + def __str__(self): + if self.sign: + s = 's' + else: + s = '' + return str(self.pos) + ':' + s + str(self.len) + + def str_extract(self): + global bitop_width + s = 's' if self.sign else '' + return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' + + def __eq__(self, other): + return self.sign == other.sign and self.mask == other.mask + + def __ne__(self, other): + return not self.__eq__(other) +# end Field + + +class MultiField: + """Class representing a compound instruction field""" + def __init__(self, subs, mask): + self.subs = subs + self.sign = subs[0].sign + self.mask = mask + + def __str__(self): + return str(self.subs) + + def str_extract(self): + global bitop_width + ret = '0' + pos = 0 + for f in reversed(self.subs): + ext = f.str_extract() + if pos == 0: + ret = ext + else: + ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})' + pos += f.len + return ret + + def __ne__(self, other): + if len(self.subs) != len(other.subs): + return True + for a, b in zip(self.subs, other.subs): + if a.__class__ != b.__class__ or a != b: + return True + return False + + def __eq__(self, other): + return not self.__ne__(other) +# end MultiField + + +class ConstField: + """Class representing an argument field with constant value""" + def __init__(self, value): + self.value = value + self.mask = 0 + self.sign = value < 0 + + def __str__(self): + return str(self.value) + + def str_extract(self): + return str(self.value) + + def __cmp__(self, other): + return self.value - other.value +# end ConstField + + +class FunctionField: + """Class representing a field passed through a function""" + def __init__(self, func, base): + self.mask = base.mask + self.sign = base.sign + self.base = base + self.func = func + + def __str__(self): + return self.func + '(' + str(self.base) + ')' + + def str_extract(self): + return self.func + '(ctx, ' + self.base.str_extract() + ')' + + def __eq__(self, other): + return self.func == other.func and self.base == other.base + + def __ne__(self, other): + return not self.__eq__(other) +# end FunctionField + + +class ParameterField: + """Class representing a pseudo-field read from a function""" + def __init__(self, func): + self.mask = 0 + self.sign = 0 + self.func = func + + def __str__(self): + return self.func + + def str_extract(self): + return self.func + '(ctx)' + + def __eq__(self, other): + return self.func == other.func + + def __ne__(self, other): + return not self.__eq__(other) +# end ParameterField + + +class Arguments: + """Class representing the extracted fields of a format""" + def __init__(self, nm, flds, types, extern): + self.name = nm + self.extern = extern + self.fields = flds + self.types = types + + def __str__(self): + return self.name + ' ' + str(self.fields) + + def struct_name(self): + return 'arg_' + self.name + + def output_def(self): + if not self.extern: + output('typedef struct {\n') + for (n, t) in zip(self.fields, self.types): + output(f' {t} {n};\n') + output('} ', self.struct_name(), ';\n\n') +# end Arguments + + +class General: + """Common code between instruction formats and instruction patterns""" + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): + self.name = name + self.file = input_file + self.lineno = lineno + self.base = base + self.fixedbits = fixb + self.fixedmask = fixm + self.undefmask = udfm + self.fieldmask = fldm + self.fields = flds + self.width = w + + def __str__(self): + return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) + + def str1(self, i): + return str_indent(i) + self.__str__() +# end General + + +class Format(General): + """Class representing an instruction format""" + + def extract_name(self): + global decode_function + return decode_function + '_extract_' + self.name + + def output_extract(self): + output('static void ', self.extract_name(), '(DisasContext *ctx, ', + self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') + for n, f in self.fields.items(): + output(' a->', n, ' = ', f.str_extract(), ';\n') + output('}\n\n') +# end Format + + +class Pattern(General): + """Class representing an instruction pattern""" + + def output_decl(self): + global translate_scope + global translate_prefix + output('typedef ', self.base.base.struct_name(), + ' arg_', self.name, ';\n') + output(translate_scope, 'bool ', translate_prefix, '_', self.name, + '(DisasContext *ctx, arg_', self.name, ' *a);\n') + + def output_code(self, i, extracted, outerbits, outermask): + global translate_prefix + ind = str_indent(i) + arg = self.base.base.name + output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') + if not extracted: + output(ind, self.base.extract_name(), + '(ctx, &u.f_', arg, ', insn);\n') + for n, f in self.fields.items(): + output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') + output(ind, 'if (', translate_prefix, '_', self.name, + '(ctx, &u.f_', arg, ')) return true;\n') + + # Normal patterns do not have children. + def build_tree(self): + return + def prop_masks(self): + return + def prop_format(self): + return + def prop_width(self): + return + +# end Pattern + + +class MultiPattern(General): + """Class representing a set of instruction patterns""" + + def __init__(self, lineno): + self.file = input_file + self.lineno = lineno + self.pats = [] + self.base = None + self.fixedbits = 0 + self.fixedmask = 0 + self.undefmask = 0 + self.width = None + + def __str__(self): + r = 'group' + if self.fixedbits is not None: + r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) + return r + + def output_decl(self): + for p in self.pats: + p.output_decl() + + def prop_masks(self): + global insnmask + + fixedmask = insnmask + undefmask = insnmask + + # Collect fixedmask/undefmask for all of the children. + for p in self.pats: + p.prop_masks() + fixedmask &= p.fixedmask + undefmask &= p.undefmask + + # Widen fixedmask until all fixedbits match + repeat = True + fixedbits = 0 + while repeat and fixedmask != 0: + fixedbits = None + for p in self.pats: + thisbits = p.fixedbits & fixedmask + if fixedbits is None: + fixedbits = thisbits + elif fixedbits != thisbits: + fixedmask &= ~(fixedbits ^ thisbits) + break + else: + repeat = False + + self.fixedbits = fixedbits + self.fixedmask = fixedmask + self.undefmask = undefmask + + def build_tree(self): + for p in self.pats: + p.build_tree() + + def prop_format(self): + for p in self.pats: + p.build_tree() + + def prop_width(self): + width = None + for p in self.pats: + p.prop_width() + if width is None: + width = p.width + elif width != p.width: + error_with_file(self.file, self.lineno, + 'width mismatch in patterns within braces') + self.width = width + +# end MultiPattern + + +class IncMultiPattern(MultiPattern): + """Class representing an overlapping set of instruction patterns""" + + def output_code(self, i, extracted, outerbits, outermask): + global translate_prefix + ind = str_indent(i) + for p in self.pats: + if outermask != p.fixedmask: + innermask = p.fixedmask & ~outermask + innerbits = p.fixedbits & ~outermask + output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') + output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') + p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) + output(ind, '}\n') + else: + p.output_code(i, extracted, p.fixedbits, p.fixedmask) +#end IncMultiPattern + + +class Tree: + """Class representing a node in a decode tree""" + + def __init__(self, fm, tm): + self.fixedmask = fm + self.thismask = tm + self.subs = [] + self.base = None + + def str1(self, i): + ind = str_indent(i) + r = ind + whex(self.fixedmask) + if self.format: + r += ' ' + self.format.name + r += ' [\n' + for (b, s) in self.subs: + r += ind + f' {whex(b)}:\n' + r += s.str1(i + 4) + '\n' + r += ind + ']' + return r + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + ind = str_indent(i) + + # If we identified all nodes below have the same format, + # extract the fields now. + if not extracted and self.base: + output(ind, self.base.extract_name(), + '(ctx, &u.f_', self.base.base.name, ', insn);\n') + extracted = True + + # Attempt to aid the compiler in producing compact switch statements. + # If the bits in the mask are contiguous, extract them. + sh = is_contiguous(self.thismask) + if sh > 0: + # Propagate SH down into the local functions. + def str_switch(b, sh=sh): + return f'(insn >> {sh}) & {b >> sh:#x}' + + def str_case(b, sh=sh): + return hex(b >> sh) + else: + def str_switch(b): + return f'insn & {whexC(b)}' + + def str_case(b): + return whexC(b) + + output(ind, 'switch (', str_switch(self.thismask), ') {\n') + for b, s in sorted(self.subs): + assert (self.thismask & ~s.fixedmask) == 0 + innermask = outermask | self.thismask + innerbits = outerbits | b + output(ind, 'case ', str_case(b), ':\n') + output(ind, ' /* ', + str_match_bits(innerbits, innermask), ' */\n') + s.output_code(i + 4, extracted, innerbits, innermask) + output(ind, ' break;\n') + output(ind, '}\n') +# end Tree + + +class ExcMultiPattern(MultiPattern): + """Class representing a non-overlapping set of instruction patterns""" + + def output_code(self, i, extracted, outerbits, outermask): + # Defer everything to our decomposed Tree node + self.tree.output_code(i, extracted, outerbits, outermask) + + @staticmethod + def __build_tree(pats, outerbits, outermask): + # Find the intersection of all remaining fixedmask. + innermask = ~outermask & insnmask + for i in pats: + innermask &= i.fixedmask + + if innermask == 0: + # Edge condition: One pattern covers the entire insnmask + if len(pats) == 1: + t = Tree(outermask, innermask) + t.subs.append((0, pats[0])) + return t + + text = 'overlapping patterns:' + for p in pats: + text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) + error_with_file(pats[0].file, pats[0].lineno, text) + + fullmask = outermask | innermask + + # Sort each element of pats into the bin selected by the mask. + bins = {} + for i in pats: + fb = i.fixedbits & innermask + if fb in bins: + bins[fb].append(i) + else: + bins[fb] = [i] + + # We must recurse if any bin has more than one element or if + # the single element in the bin has not been fully matched. + t = Tree(fullmask, innermask) + + for b, l in bins.items(): + s = l[0] + if len(l) > 1 or s.fixedmask & ~fullmask != 0: + s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) + t.subs.append((b, s)) + + return t + + def build_tree(self): + super().prop_format() + self.tree = self.__build_tree(self.pats, self.fixedbits, + self.fixedmask) + + @staticmethod + def __prop_format(tree): + """Propagate Format objects into the decode tree""" + + # Depth first search. + for (b, s) in tree.subs: + if isinstance(s, Tree): + ExcMultiPattern.__prop_format(s) + + # If all entries in SUBS have the same format, then + # propagate that into the tree. + f = None + for (b, s) in tree.subs: + if f is None: + f = s.base + if f is None: + return + if f is not s.base: + return + tree.base = f + + def prop_format(self): + super().prop_format() + self.__prop_format(self.tree) + +# end ExcMultiPattern + + +def parse_field(lineno, name, toks): + """Parse one instruction field from TOKS at LINENO""" + global fields + global insnwidth + + # A "simple" field will have only one entry; + # a "multifield" will have several. + subs = [] + width = 0 + func = None + for t in toks: + if re.match('^!function=', t): + if func: + error(lineno, 'duplicate function') + func = t.split('=') + func = func[1] + continue + + if re.fullmatch('[0-9]+:s[0-9]+', t): + # Signed field extract + subtoks = t.split(':s') + sign = True + elif re.fullmatch('[0-9]+:[0-9]+', t): + # Unsigned field extract + subtoks = t.split(':') + sign = False + else: + error(lineno, f'invalid field token "{t}"') + po = int(subtoks[0]) + le = int(subtoks[1]) + if po + le > insnwidth: + error(lineno, f'field {t} too large') + f = Field(sign, po, le) + subs.append(f) + width += le + + if width > insnwidth: + error(lineno, 'field too large') + if len(subs) == 0: + if func: + f = ParameterField(func) + else: + error(lineno, 'field with no value') + else: + if len(subs) == 1: + f = subs[0] + else: + mask = 0 + for s in subs: + if mask & s.mask: + error(lineno, 'field components overlap') + mask |= s.mask + f = MultiField(subs, mask) + if func: + f = FunctionField(func, f) + + if name in fields: + error(lineno, 'duplicate field', name) + fields[name] = f +# end parse_field + + +def parse_arguments(lineno, name, toks): + """Parse one argument set from TOKS at LINENO""" + global arguments + global re_C_ident + global anyextern + + flds = [] + types = [] + extern = False + for n in toks: + if re.fullmatch('!extern', n): + extern = True + anyextern = True + continue + if re.fullmatch(re_C_ident + ':' + re_C_ident, n): + (n, t) = n.split(':') + elif re.fullmatch(re_C_ident, n): + t = 'int' + else: + error(lineno, f'invalid argument set token "{n}"') + if n in flds: + error(lineno, f'duplicate argument "{n}"') + flds.append(n) + types.append(t) + + if name in arguments: + error(lineno, 'duplicate argument set', name) + arguments[name] = Arguments(name, flds, types, extern) +# end parse_arguments + + +def lookup_field(lineno, name): + global fields + if name in fields: + return fields[name] + error(lineno, 'undefined field', name) + + +def add_field(lineno, flds, new_name, f): + if new_name in flds: + error(lineno, 'duplicate field', new_name) + flds[new_name] = f + return flds + + +def add_field_byname(lineno, flds, new_name, old_name): + return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) + + +def infer_argument_set(flds): + global arguments + global decode_function + + for arg in arguments.values(): + if eq_fields_for_args(flds, arg): + return arg + + name = decode_function + str(len(arguments)) + arg = Arguments(name, flds.keys(), ['int'] * len(flds), False) + arguments[name] = arg + return arg + + +def infer_format(arg, fieldmask, flds, width): + global arguments + global formats + global decode_function + + const_flds = {} + var_flds = {} + for n, c in flds.items(): + if c is ConstField: + const_flds[n] = c + else: + var_flds[n] = c + + # Look for an existing format with the same argument set and fields + for fmt in formats.values(): + if arg and fmt.base != arg: + continue + if fieldmask != fmt.fieldmask: + continue + if width != fmt.width: + continue + if not eq_fields_for_fmts(flds, fmt.fields): + continue + return (fmt, const_flds) + + name = decode_function + '_Fmt_' + str(len(formats)) + if not arg: + arg = infer_argument_set(flds) + + fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) + formats[name] = fmt + + return (fmt, const_flds) +# end infer_format + + +def parse_generic(lineno, parent_pat, name, toks): + """Parse one instruction format from TOKS at LINENO""" + global fields + global arguments + global formats + global allpatterns + global re_arg_ident + global re_fld_ident + global re_fmt_ident + global re_C_ident + global insnwidth + global insnmask + global variablewidth + + is_format = parent_pat is None + + fixedmask = 0 + fixedbits = 0 + undefmask = 0 + width = 0 + flds = {} + arg = None + fmt = None + for t in toks: + # '&Foo' gives a format an explicit argument set. + if re.fullmatch(re_arg_ident, t): + tt = t[1:] + if arg: + error(lineno, 'multiple argument sets') + if tt in arguments: + arg = arguments[tt] + else: + error(lineno, 'undefined argument set', t) + continue + + # '@Foo' gives a pattern an explicit format. + if re.fullmatch(re_fmt_ident, t): + tt = t[1:] + if fmt: + error(lineno, 'multiple formats') + if tt in formats: + fmt = formats[tt] + else: + error(lineno, 'undefined format', t) + continue + + # '%Foo' imports a field. + if re.fullmatch(re_fld_ident, t): + tt = t[1:] + flds = add_field_byname(lineno, flds, tt, tt) + continue + + # 'Foo=%Bar' imports a field with a different name. + if re.fullmatch(re_C_ident + '=' + re_fld_ident, t): + (fname, iname) = t.split('=%') + flds = add_field_byname(lineno, flds, fname, iname) + continue + + # 'Foo=number' sets an argument field to a constant value + if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t): + (fname, value) = t.split('=') + value = int(value) + flds = add_field(lineno, flds, fname, ConstField(value)) + continue + + # Pattern of 0s, 1s, dots and dashes indicate required zeros, + # required ones, or dont-cares. + if re.fullmatch('[01.-]+', t): + shift = len(t) + fms = t.replace('0', '1') + fms = fms.replace('.', '0') + fms = fms.replace('-', '0') + fbs = t.replace('.', '0') + fbs = fbs.replace('-', '0') + ubm = t.replace('1', '0') + ubm = ubm.replace('.', '0') + ubm = ubm.replace('-', '1') + fms = int(fms, 2) + fbs = int(fbs, 2) + ubm = int(ubm, 2) + fixedbits = (fixedbits << shift) | fbs + fixedmask = (fixedmask << shift) | fms + undefmask = (undefmask << shift) | ubm + # Otherwise, fieldname:fieldwidth + elif re.fullmatch(re_C_ident + ':s?[0-9]+', t): + (fname, flen) = t.split(':') + sign = False + if flen[0] == 's': + sign = True + flen = flen[1:] + shift = int(flen, 10) + if shift + width > insnwidth: + error(lineno, f'field {fname} exceeds insnwidth') + f = Field(sign, insnwidth - width - shift, shift) + flds = add_field(lineno, flds, fname, f) + fixedbits <<= shift + fixedmask <<= shift + undefmask <<= shift + else: + error(lineno, f'invalid token "{t}"') + width += shift + + if variablewidth and width < insnwidth and width % 8 == 0: + shift = insnwidth - width + fixedbits <<= shift + fixedmask <<= shift + undefmask <<= shift + undefmask |= (1 << shift) - 1 + + # We should have filled in all of the bits of the instruction. + elif not (is_format and width == 0) and width != insnwidth: + error(lineno, f'definition has {width} bits') + + # Do not check for fields overlapping fields; one valid usage + # is to be able to duplicate fields via import. + fieldmask = 0 + for f in flds.values(): + fieldmask |= f.mask + + # Fix up what we've parsed to match either a format or a pattern. + if is_format: + # Formats cannot reference formats. + if fmt: + error(lineno, 'format referencing format') + # If an argument set is given, then there should be no fields + # without a place to store it. + if arg: + for f in flds.keys(): + if f not in arg.fields: + error(lineno, f'field {f} not in argument set {arg.name}') + else: + arg = infer_argument_set(flds) + if name in formats: + error(lineno, 'duplicate format name', name) + fmt = Format(name, lineno, arg, fixedbits, fixedmask, + undefmask, fieldmask, flds, width) + formats[name] = fmt + else: + # Patterns can reference a format ... + if fmt: + # ... but not an argument simultaneously + if arg: + error(lineno, 'pattern specifies both format and argument set') + if fixedmask & fmt.fixedmask: + error(lineno, 'pattern fixed bits overlap format fixed bits') + if width != fmt.width: + error(lineno, 'pattern uses format of different width') + fieldmask |= fmt.fieldmask + fixedbits |= fmt.fixedbits + fixedmask |= fmt.fixedmask + undefmask |= fmt.undefmask + else: + (fmt, flds) = infer_format(arg, fieldmask, flds, width) + arg = fmt.base + for f in flds.keys(): + if f not in arg.fields: + error(lineno, f'field {f} not in argument set {arg.name}') + if f in fmt.fields.keys(): + error(lineno, f'field {f} set by format and pattern') + for f in arg.fields: + if f not in flds.keys() and f not in fmt.fields.keys(): + error(lineno, f'field {f} not initialized') + pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, + undefmask, fieldmask, flds, width) + parent_pat.pats.append(pat) + allpatterns.append(pat) + + # Validate the masks that we have assembled. + if fieldmask & fixedmask: + error(lineno, 'fieldmask overlaps fixedmask ', + f'({whex(fieldmask)} & {whex(fixedmask)})') + if fieldmask & undefmask: + error(lineno, 'fieldmask overlaps undefmask ', + f'({whex(fieldmask)} & {whex(undefmask)})') + if fixedmask & undefmask: + error(lineno, 'fixedmask overlaps undefmask ', + f'({whex(fixedmask)} & {whex(undefmask)})') + if not is_format: + allbits = fieldmask | fixedmask | undefmask + if allbits != insnmask: + error(lineno, 'bits left unspecified ', + f'({whex(allbits ^ insnmask)})') +# end parse_general + + +def parse_file(f, parent_pat): + """Parse all of the patterns within a file""" + global re_arg_ident + global re_fld_ident + global re_fmt_ident + global re_pat_ident + + # Read all of the lines of the file. Concatenate lines + # ending in backslash; discard empty lines and comments. + toks = [] + lineno = 0 + nesting = 0 + nesting_pats = [] + + for line in f: + lineno += 1 + + # Expand and strip spaces, to find indent. + line = line.rstrip() + line = line.expandtabs() + len1 = len(line) + line = line.lstrip() + len2 = len(line) + + # Discard comments + end = line.find('#') + if end >= 0: + line = line[:end] + + t = line.split() + if len(toks) != 0: + # Next line after continuation + toks.extend(t) + else: + # Allow completely blank lines. + if len1 == 0: + continue + indent = len1 - len2 + # Empty line due to comment. + if len(t) == 0: + # Indentation must be correct, even for comment lines. + if indent != nesting: + error(lineno, 'indentation ', indent, ' != ', nesting) + continue + start_lineno = lineno + toks = t + + # Continuation? + if toks[-1] == '\\': + toks.pop() + continue + + name = toks[0] + del toks[0] + + # End nesting? + if name == '}' or name == ']': + if len(toks) != 0: + error(start_lineno, 'extra tokens after close brace') + + # Make sure { } and [ ] nest properly. + if (name == '}') != isinstance(parent_pat, IncMultiPattern): + error(lineno, 'mismatched close brace') + + try: + parent_pat = nesting_pats.pop() + except: + error(lineno, 'extra close brace') + + nesting -= 2 + if indent != nesting: + error(lineno, 'indentation ', indent, ' != ', nesting) + + toks = [] + continue + + # Everything else should have current indentation. + if indent != nesting: + error(start_lineno, 'indentation ', indent, ' != ', nesting) + + # Start nesting? + if name == '{' or name == '[': + if len(toks) != 0: + error(start_lineno, 'extra tokens after open brace') + + if name == '{': + nested_pat = IncMultiPattern(start_lineno) + else: + nested_pat = ExcMultiPattern(start_lineno) + parent_pat.pats.append(nested_pat) + nesting_pats.append(parent_pat) + parent_pat = nested_pat + + nesting += 2 + toks = [] + continue + + # Determine the type of object needing to be parsed. + if re.fullmatch(re_fld_ident, name): + parse_field(start_lineno, name[1:], toks) + elif re.fullmatch(re_arg_ident, name): + parse_arguments(start_lineno, name[1:], toks) + elif re.fullmatch(re_fmt_ident, name): + parse_generic(start_lineno, None, name[1:], toks) + elif re.fullmatch(re_pat_ident, name): + parse_generic(start_lineno, parent_pat, name, toks) + else: + error(lineno, f'invalid token "{name}"') + toks = [] + + if nesting != 0: + error(lineno, 'missing close brace') +# end parse_file + + +class SizeTree: + """Class representing a node in a size decode tree""" + + def __init__(self, m, w): + self.mask = m + self.subs = [] + self.base = None + self.width = w + + def str1(self, i): + ind = str_indent(i) + r = ind + whex(self.mask) + ' [\n' + for (b, s) in self.subs: + r += ind + f' {whex(b)}:\n' + r += s.str1(i + 4) + '\n' + r += ind + ']' + return r + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + ind = str_indent(i) + + # If we need to load more bytes to test, do so now. + if extracted < self.width: + output(ind, f'insn = {decode_function}_load_bytes', + f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') + extracted = self.width + + # Attempt to aid the compiler in producing compact switch statements. + # If the bits in the mask are contiguous, extract them. + sh = is_contiguous(self.mask) + if sh > 0: + # Propagate SH down into the local functions. + def str_switch(b, sh=sh): + return f'(insn >> {sh}) & {b >> sh:#x}' + + def str_case(b, sh=sh): + return hex(b >> sh) + else: + def str_switch(b): + return f'insn & {whexC(b)}' + + def str_case(b): + return whexC(b) + + output(ind, 'switch (', str_switch(self.mask), ') {\n') + for b, s in sorted(self.subs): + innermask = outermask | self.mask + innerbits = outerbits | b + output(ind, 'case ', str_case(b), ':\n') + output(ind, ' /* ', + str_match_bits(innerbits, innermask), ' */\n') + s.output_code(i + 4, extracted, innerbits, innermask) + output(ind, '}\n') + output(ind, 'return insn;\n') +# end SizeTree + +class SizeLeaf: + """Class representing a leaf node in a size decode tree""" + + def __init__(self, m, w): + self.mask = m + self.width = w + + def str1(self, i): + return str_indent(i) + whex(self.mask) + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + global decode_function + ind = str_indent(i) + + # If we need to load more bytes, do so now. + if extracted < self.width: + output(ind, f'insn = {decode_function}_load_bytes', + f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') + extracted = self.width + output(ind, 'return insn;\n') +# end SizeLeaf + + +def build_size_tree(pats, width, outerbits, outermask): + global insnwidth + + # Collect the mask of bits that are fixed in this width + innermask = 0xff << (insnwidth - width) + innermask &= ~outermask + minwidth = None + onewidth = True + for i in pats: + innermask &= i.fixedmask + if minwidth is None: + minwidth = i.width + elif minwidth != i.width: + onewidth = False; + if minwidth < i.width: + minwidth = i.width + + if onewidth: + return SizeLeaf(innermask, minwidth) + + if innermask == 0: + if width < minwidth: + return build_size_tree(pats, width + 8, outerbits, outermask) + + pnames = [] + for p in pats: + pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) + error_with_file(pats[0].file, pats[0].lineno, + f'overlapping patterns size {width}:', pnames) + + bins = {} + for i in pats: + fb = i.fixedbits & innermask + if fb in bins: + bins[fb].append(i) + else: + bins[fb] = [i] + + fullmask = outermask | innermask + lens = sorted(bins.keys()) + if len(lens) == 1: + b = lens[0] + return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) + + r = SizeTree(innermask, width) + for b, l in bins.items(): + s = build_size_tree(l, width, b | outerbits, fullmask) + r.subs.append((b, s)) + return r +# end build_size_tree + + +def prop_size(tree): + """Propagate minimum widths up the decode size tree""" + + if isinstance(tree, SizeTree): + min = None + for (b, s) in tree.subs: + width = prop_size(s) + if min is None or min > width: + min = width + assert min >= tree.width + tree.width = min + else: + min = tree.width + return min +# end prop_size + + +def main(): + global arguments + global formats + global allpatterns + global translate_scope + global translate_prefix + global output_fd + global output_file + global input_file + global insnwidth + global insntype + global insnmask + global decode_function + global bitop_width + global variablewidth + global anyextern + + decode_scope = 'static ' + + long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', + 'static-decode=', 'varinsnwidth='] + try: + (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) + except getopt.GetoptError as err: + error(0, err) + for o, a in opts: + if o in ('-o', '--output'): + output_file = a + elif o == '--decode': + decode_function = a + decode_scope = '' + elif o == '--static-decode': + decode_function = a + elif o == '--translate': + translate_prefix = a + translate_scope = '' + elif o in ('-w', '--insnwidth', '--varinsnwidth'): + if o == '--varinsnwidth': + variablewidth = True + insnwidth = int(a) + if insnwidth == 16: + insntype = 'uint16_t' + insnmask = 0xffff + elif insnwidth == 64: + insntype = 'uint64_t' + insnmask = 0xffffffffffffffff + bitop_width = 64 + elif insnwidth != 32: + error(0, 'cannot handle insns of width', insnwidth) + else: + assert False, 'unhandled option' + + if len(args) < 1: + error(0, 'missing input file') + + toppat = ExcMultiPattern(0) + + for filename in args: + input_file = filename + f = open(filename, 'rt', encoding='utf-8') + parse_file(f, toppat) + f.close() + + # We do not want to compute masks for toppat, because those masks + # are used as a starting point for build_tree. For toppat, we must + # insist that decode begins from naught. + for i in toppat.pats: + i.prop_masks() + + toppat.build_tree() + toppat.prop_format() + + if variablewidth: + for i in toppat.pats: + i.prop_width() + stree = build_size_tree(toppat.pats, 8, 0, 0) + prop_size(stree) + + if output_file: + output_fd = open(output_file, 'wt', encoding='utf-8') + else: + output_fd = io.TextIOWrapper(sys.stdout.buffer, + encoding=sys.stdout.encoding, + errors="ignore") + + output_autogen() + for n in sorted(arguments.keys()): + f = arguments[n] + f.output_def() + + # A single translate function can be invoked for different patterns. + # Make sure that the argument sets are the same, and declare the + # function only once. + # + # If we're sharing formats, we're likely also sharing trans_* functions, + # but we can't tell which ones. Prevent issues from the compiler by + # suppressing redundant declaration warnings. + if anyextern: + output("#pragma GCC diagnostic push\n", + "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", + "#ifdef __clang__\n" + "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", + "#endif\n\n") + + out_pats = {} + for i in allpatterns: + if i.name in out_pats: + p = out_pats[i.name] + if i.base.base != p.base.base: + error(0, i.name, ' has conflicting argument sets') + else: + i.output_decl() + out_pats[i.name] = i + output('\n') + + if anyextern: + output("#pragma GCC diagnostic pop\n\n") + + for n in sorted(formats.keys()): + f = formats[n] + f.output_extract() + + output(decode_scope, 'bool ', decode_function, + '(DisasContext *ctx, ', insntype, ' insn)\n{\n') + + i4 = str_indent(4) + + if len(allpatterns) != 0: + output(i4, 'union {\n') + for n in sorted(arguments.keys()): + f = arguments[n] + output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') + output(i4, '} u;\n\n') + toppat.output_code(4, False, 0, 0) + + output(i4, 'return false;\n') + output('}\n') + + if variablewidth: + output('\n', decode_scope, insntype, ' ', decode_function, + '_load(DisasContext *ctx)\n{\n', + ' ', insntype, ' insn = 0;\n\n') + stree.output_code(4, 0, 0, 0) + output('}\n') + + if output_file: + output_fd.close() +# end main + + +if __name__ == '__main__': + main() diff --git a/scripts/device-crash-test b/scripts/device-crash-test new file mode 100755 index 000000000..7fbd99158 --- /dev/null +++ b/scripts/device-crash-test @@ -0,0 +1,591 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2017 Red Hat Inc +# +# Author: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Run QEMU with all combinations of -machine and -device types, +check for crashes and unexpected errors. +""" + +import os +import sys +import glob +import logging +import traceback +import re +import random +import argparse +from itertools import chain + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python')) +from qemu.machine import QEMUMachine +from qemu.aqmp import ConnectError + +logger = logging.getLogger('device-crash-test') +dbg = logger.debug + + +# Purposes of the following rule list: +# * Avoiding verbose log messages when we find known non-fatal +# (exitcode=1) errors +# * Avoiding fatal errors when we find known crashes +# * Skipping machines/devices that are known not to work out of +# the box, when running in --quick mode +# +# Keeping the rule list updated is desirable, but not required, +# because unexpected cases where QEMU exits with exitcode=1 will +# just trigger a INFO message. + +# Valid error rule keys: +# * accel: regexp, full match only +# * machine: regexp, full match only +# * device: regexp, full match only +# * log: regexp, partial match allowed +# * exitcode: if not present, defaults to 1. If None, matches any exitcode +# * warn: if True, matching failures will be logged as warnings +# * expected: if True, QEMU is expected to always fail every time +# when testing the corresponding test case +# * loglevel: log level of log output when there's a match. +ERROR_RULE_LIST = [ + # Machines that won't work out of the box: + # MACHINE | ERROR MESSAGE + {'machine':'niagara', 'expected':True}, # Unable to load a firmware for -M niagara + {'machine':'boston', 'expected':True}, # Please provide either a -kernel or -bios argument + {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null) + + # devices that don't work out of the box because they require extra options to "-device DEV": + # DEVICE | ERROR MESSAGE + {'device':'.*-(i386|x86_64)-cpu', 'expected':True}, # CPU socket-id is not set + {'device':'icp', 'expected':True}, # icp_realize: required link 'xics' not found: Property '.xics' not found + {'device':'ics', 'expected':True}, # ics_base_realize: required link 'xics' not found: Property '.xics' not found + # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True + {'device':'ide-cd'}, # No drive specified + {'device':'ide-hd', 'expected':True}, # No drive specified + {'device':'ipmi-bmc-extern', 'expected':True}, # IPMI external bmc requires chardev attribute + {'device':'isa-debugcon', 'expected':True}, # Can't create serial device, empty char device + {'device':'isa-ipmi-bt', 'expected':True}, # IPMI device requires a bmc attribute to be set + {'device':'isa-ipmi-kcs', 'expected':True}, # IPMI device requires a bmc attribute to be set + {'device':'isa-parallel', 'expected':True}, # Can't create serial device, empty char device + {'device':'ivshmem-doorbell', 'expected':True}, # You must specify a 'chardev' + {'device':'ivshmem-plain', 'expected':True}, # You must specify a 'memdev' + {'device':'loader', 'expected':True}, # please include valid arguments + {'device':'nand', 'expected':True}, # Unsupported NAND block size 0x1 + {'device':'nvdimm', 'expected':True}, # 'memdev' property is not set + {'device':'nvme', 'expected':True}, # Device initialization failed + {'device':'pc-dimm', 'expected':True}, # 'memdev' property is not set + {'device':'pci-bridge', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. + {'device':'pci-bridge-seat', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. + {'device':'pxb', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. + {'device':'scsi-block', 'expected':True}, # drive property not set + {'device':'scsi-generic', 'expected':True}, # drive property not set + {'device':'scsi-hd', 'expected':True}, # drive property not set + {'device':'spapr-pci-host-bridge', 'expected':True}, # BUID not specified for PHB + {'device':'spapr-rng', 'expected':True}, # spapr-rng needs an RNG backend! + {'device':'spapr-vty', 'expected':True}, # chardev property not set + {'device':'tpm-tis', 'expected':True}, # tpm_tis: backend driver with id (null) could not be found + {'device':'unimplemented-device', 'expected':True}, # property 'size' not specified or zero + {'device':'usb-braille', 'expected':True}, # Property chardev is required + {'device':'usb-mtp', 'expected':True}, # rootdir property must be configured + {'device':'usb-redir', 'expected':True}, # Parameter 'chardev' is missing + {'device':'usb-serial', 'expected':True}, # Property chardev is required + {'device':'usb-storage', 'expected':True}, # drive property not set + {'device':'vfio-amd-xgbe', 'expected':True}, # -device vfio-amd-xgbe: vfio error: wrong host device name + {'device':'vfio-calxeda-xgmac', 'expected':True}, # -device vfio-calxeda-xgmac: vfio error: wrong host device name + {'device':'vfio-pci', 'expected':True}, # No provided host device + {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0 + {'device':'vhost-scsi.*', 'expected':True}, # vhost-scsi: missing wwpn + {'device':'vhost-vsock-device', 'expected':True}, # guest-cid property must be greater than 2 + {'device':'vhost-vsock-pci', 'expected':True}, # guest-cid property must be greater than 2 + {'device':'virtio-9p-ccw', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL + {'device':'virtio-9p-device', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL + {'device':'virtio-9p-pci', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL + {'device':'virtio-blk-ccw', 'expected':True}, # drive property not set + {'device':'virtio-blk-device', 'expected':True}, # drive property not set + {'device':'virtio-blk-device', 'expected':True}, # drive property not set + {'device':'virtio-blk-pci', 'expected':True}, # drive property not set + {'device':'virtio-crypto-ccw', 'expected':True}, # 'cryptodev' parameter expects a valid object + {'device':'virtio-crypto-device', 'expected':True}, # 'cryptodev' parameter expects a valid object + {'device':'virtio-crypto-pci', 'expected':True}, # 'cryptodev' parameter expects a valid object + {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required + {'device':'virtio-input-host-pci', 'expected':True}, # evdev property is required + {'device':'xen-pvdevice', 'expected':True}, # Device ID invalid, it must always be supplied + {'device':'vhost-vsock-ccw', 'expected':True}, # guest-cid property must be greater than 2 + {'device':'zpci', 'expected':True}, # target must be defined + {'device':'pnv-(occ|icp|lpc)', 'expected':True}, # required link 'xics' not found: Property '.xics' not found + {'device':'powernv-cpu-.*', 'expected':True}, # pnv_core_realize: required link 'xics' not found: Property '.xics' not found + + # ioapic devices are already created by pc and will fail: + {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed + {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True}, # Only 1 ioapics allowed + + # "spapr-cpu-core needs a pseries machine" + {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True}, + + # KVM-specific devices shouldn't be tried without accel=kvm: + {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True}, + + # xen-specific machines and devices: + {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True}, + {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True}, + + # this fails on some machine-types, but not all, so they don't have expected=True: + {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide + + # Silence INFO messages for errors that are common on multiple + # devices/machines: + {'log':r"No '[\w-]+' bus found for device '[\w-]+'"}, + {'log':r"images* must be given with the 'pflash' parameter"}, + {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"}, + {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"}, + {'log':r"Couldn't find rom image '[\w-]+\.bin'"}, + {'log':r"speed mismatch trying to attach usb device"}, + {'log':r"Can't create a second ISA bus"}, + {'log':r"duplicate fw_cfg file name"}, + # sysbus-related error messages: most machines reject most dynamic sysbus devices: + {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"}, + {'log':r"Device [\w.,-]+ is not supported by this machine yet"}, + {'log':r"Device [\w.,-]+ can not be dynamically instantiated"}, + {'log':r"Platform Bus: Can not fit MMIO region of size "}, + # other more specific errors we will ignore: + {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"}, + {'log':r"MSI(-X)? is not supported by interrupt controller"}, + {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"}, + {'log':r"Ignoring smp_cpus value"}, + {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"}, + {'log':r"This CPU requires a smaller page size than the system is using"}, + {'log':r"MSI-X support is mandatory in the S390 architecture"}, + {'log':r"rom check and register reset failed"}, + {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"}, + {'log':r"Multiple VT220 operator consoles are not supported"}, + {'log':r"core 0 already populated"}, + {'log':r"could not find stage1 bootloader"}, + + # other exitcode=1 failures not listed above will just generate INFO messages: + {'exitcode':1, 'loglevel':logging.INFO}, + + # everything else (including SIGABRT and SIGSEGV) will be a fatal error: + {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL}, +] + + +def errorRuleTestCaseMatch(rule, t): + """Check if a test case specification can match a error rule + + This only checks if a error rule is a candidate match + for a given test case, it won't check if the test case + results/output match the rule. See ruleListResultMatch(). + """ + return (('machine' not in rule or + 'machine' not in t or + re.match(rule['machine'] + '$', t['machine'])) and + ('accel' not in rule or + 'accel' not in t or + re.match(rule['accel'] + '$', t['accel'])) and + ('device' not in rule or + 'device' not in t or + re.match(rule['device'] + '$', t['device']))) + + +def ruleListCandidates(t): + """Generate the list of candidates that can match a test case""" + for i, rule in enumerate(ERROR_RULE_LIST): + if errorRuleTestCaseMatch(rule, t): + yield (i, rule) + + +def findExpectedResult(t): + """Check if there's an expected=True error rule for a test case + + Returns (i, rule) tuple, where i is the index in + ERROR_RULE_LIST and rule is the error rule itself. + """ + for i, rule in ruleListCandidates(t): + if rule.get('expected'): + return (i, rule) + + +def ruleListResultMatch(rule, r): + """Check if test case results/output match a error rule + + It is valid to call this function only if + errorRuleTestCaseMatch() is True for the rule (e.g. on + rules returned by ruleListCandidates()) + """ + assert errorRuleTestCaseMatch(rule, r['testcase']) + return ((rule.get('exitcode', 1) is None or + r['exitcode'] == rule.get('exitcode', 1)) and + ('log' not in rule or + re.search(rule['log'], r['log'], re.MULTILINE))) + + +def checkResultRuleList(r): + """Look up error rule for a given test case result + + Returns (i, rule) tuple, where i is the index in + ERROR_RULE_LIST and rule is the error rule itself. + """ + for i, rule in ruleListCandidates(r['testcase']): + if ruleListResultMatch(rule, r): + return i, rule + + raise Exception("this should never happen") + + +def qemuOptsEscape(s): + """Escape option value QemuOpts""" + return s.replace(",", ",,") + + +def formatTestCase(t): + """Format test case info as "key=value key=value" for prettier logging output""" + return ' '.join('%s=%s' % (k, v) for k, v in t.items()) + + +def qomListTypeNames(vm, **kwargs): + """Run qom-list-types QMP command, return type names""" + types = vm.command('qom-list-types', **kwargs) + return [t['name'] for t in types] + + +def infoQDM(vm): + """Parse 'info qdm' output""" + args = {'command-line': 'info qdm'} + devhelp = vm.command('human-monitor-command', **args) + for l in devhelp.split('\n'): + l = l.strip() + if l == '' or l.endswith(':'): + continue + d = {'name': re.search(r'name "([^"]+)"', l).group(1), + 'no-user': (re.search(', no-user', l) is not None)} + yield d + + +class QemuBinaryInfo(object): + def __init__(self, binary, devtype): + if devtype is None: + devtype = 'device' + + self.binary = binary + self._machine_info = {} + + dbg("devtype: %r", devtype) + args = ['-S', '-machine', 'none,accel=kvm:tcg'] + dbg("querying info for QEMU binary: %s", binary) + vm = QEMUMachine(binary=binary, args=args) + vm.launch() + try: + self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False)) + # there's no way to query DeviceClass::user_creatable using QMP, + # so use 'info qdm': + self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']]) + self.machines = list(m['name'] for m in vm.command('query-machines')) + self.user_devs = self.alldevs.difference(self.no_user_devs) + self.kvm_available = vm.command('query-kvm')['enabled'] + finally: + vm.shutdown() + + def machineInfo(self, machine): + """Query for information on a specific machine-type + + Results are cached internally, in case the same machine- + type is queried multiple times. + """ + if machine in self._machine_info: + return self._machine_info[machine] + + mi = {} + args = ['-S', '-machine', '%s' % (machine)] + dbg("querying machine info for binary=%s machine=%s", self.binary, machine) + vm = QEMUMachine(binary=self.binary, args=args) + try: + vm.launch() + mi['runnable'] = True + except Exception: + dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info()) + dbg("log: %r", vm.get_log()) + mi['runnable'] = False + + vm.shutdown() + self._machine_info[machine] = mi + return mi + + +BINARY_INFO = {} + + +def getBinaryInfo(args, binary): + if binary not in BINARY_INFO: + BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype) + return BINARY_INFO[binary] + + +def checkOneCase(args, testcase): + """Check one specific case + + Returns a dictionary containing failure information on error, + or None on success + """ + binary = testcase['binary'] + accel = testcase['accel'] + machine = testcase['machine'] + device = testcase['device'] + + dbg("will test: %r", testcase) + + args = ['-S', '-machine', '%s,accel=%s' % (machine, accel), + '-device', qemuOptsEscape(device)] + cmdline = ' '.join([binary] + args) + dbg("will launch QEMU: %s", cmdline) + vm = QEMUMachine(binary=binary, args=args, qmp_timer=15) + + exc = None + exc_traceback = None + try: + vm.launch() + except Exception as this_exc: + exc = this_exc + exc_traceback = traceback.format_exc() + dbg("Exception while running test case") + finally: + vm.shutdown() + ec = vm.exitcode() + log = vm.get_log() + + if exc is not None or ec != 0: + return {'exc': exc, + 'exc_traceback':exc_traceback, + 'exitcode':ec, + 'log':log, + 'testcase':testcase, + 'cmdline':cmdline} + + +def binariesToTest(args, testcase): + if args.qemu: + r = args.qemu + else: + r = [f.path for f in os.scandir('.') + if f.name.startswith('qemu-system-') and + f.is_file() and os.access(f, os.X_OK)] + return r + + +def accelsToTest(args, testcase): + if getBinaryInfo(args, testcase['binary']).kvm_available: + yield 'kvm' + yield 'tcg' + + +def machinesToTest(args, testcase): + return getBinaryInfo(args, testcase['binary']).machines + + +def devicesToTest(args, testcase): + return getBinaryInfo(args, testcase['binary']).user_devs + + +TESTCASE_VARIABLES = [ + ('binary', binariesToTest), + ('accel', accelsToTest), + ('machine', machinesToTest), + ('device', devicesToTest), +] + + +def genCases1(args, testcases, var, fn): + """Generate new testcases for one variable + + If an existing item already has a variable set, don't + generate new items and just return it directly. This + allows the "-t" command-line option to be used to choose + a specific test case. + """ + for testcase in testcases: + if var in testcase: + yield testcase.copy() + else: + for i in fn(args, testcase): + t = testcase.copy() + t[var] = i + yield t + + +def genCases(args, testcase): + """Generate test cases for all variables + """ + cases = [testcase.copy()] + for var, fn in TESTCASE_VARIABLES: + dbg("var: %r, fn: %r", var, fn) + cases = genCases1(args, cases, var, fn) + return cases + + +def casesToTest(args, testcase): + cases = genCases(args, testcase) + if args.random: + cases = list(cases) + cases = random.sample(cases, min(args.random, len(cases))) + if args.debug: + cases = list(cases) + dbg("%d test cases to test", len(cases)) + if args.shuffle: + cases = list(cases) + random.shuffle(cases) + return cases + + +def logFailure(f, level): + t = f['testcase'] + logger.log(level, "failed: %s", formatTestCase(t)) + logger.log(level, "cmdline: %s", f['cmdline']) + for l in f['log'].strip().split('\n'): + logger.log(level, "log: %s", l) + logger.log(level, "exit code: %r", f['exitcode']) + + # If the Exception is merely a QMP connect error, + # reduce the logging level for its traceback to + # improve visual clarity. + if isinstance(f.get('exc'), ConnectError): + logger.log(level, "%s.%s: %s", + type(f['exc']).__module__, + type(f['exc']).__qualname__, + str(f['exc'])) + level = logging.DEBUG + + if f['exc_traceback']: + logger.log(level, "exception:") + for l in f['exc_traceback'].split('\n'): + logger.log(level, " %s", l.rstrip('\n')) + + +def main(): + parser = argparse.ArgumentParser(description="QEMU -device crash test") + parser.add_argument('-t', metavar='KEY=VALUE', nargs='*', + help="Limit test cases to KEY=VALUE", + action='append', dest='testcases', default=[]) + parser.add_argument('-d', '--debug', action='store_true', + help='debug output') + parser.add_argument('-v', '--verbose', action='store_true', default=True, + help='verbose output') + parser.add_argument('-q', '--quiet', dest='verbose', action='store_false', + help='non-verbose output') + parser.add_argument('-r', '--random', type=int, metavar='COUNT', + help='run a random sample of COUNT test cases', + default=0) + parser.add_argument('--shuffle', action='store_true', + help='Run test cases in random order') + parser.add_argument('--dry-run', action='store_true', + help="Don't run any tests, just generate list") + parser.add_argument('-D', '--devtype', metavar='TYPE', + help="Test only device types that implement TYPE") + parser.add_argument('-Q', '--quick', action='store_true', default=True, + help="Quick mode: skip test cases that are expected to fail") + parser.add_argument('-F', '--full', action='store_false', dest='quick', + help="Full mode: test cases that are expected to fail") + parser.add_argument('--strict', action='store_true', dest='strict', + help="Treat all warnings as fatal") + parser.add_argument('qemu', nargs='*', metavar='QEMU', + help='QEMU binary to run') + args = parser.parse_args() + + if args.debug: + lvl = logging.DEBUG + elif args.verbose: + lvl = logging.INFO + else: + lvl = logging.WARN + logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s') + + if not args.debug: + # Async QMP, when in use, is chatty about connection failures. + # This script knowingly generates a ton of connection errors. + # Silence this logger. + logging.getLogger('qemu.aqmp.qmp_client').setLevel(logging.CRITICAL) + + fatal_failures = [] + wl_stats = {} + skipped = 0 + total = 0 + + tc = {} + dbg("testcases: %r", args.testcases) + if args.testcases: + for t in chain(*args.testcases): + for kv in t.split(): + k, v = kv.split('=', 1) + tc[k] = v + + if len(binariesToTest(args, tc)) == 0: + print("No QEMU binary found", file=sys.stderr) + parser.print_usage(sys.stderr) + return 1 + + for t in casesToTest(args, tc): + logger.info("running test case: %s", formatTestCase(t)) + total += 1 + + expected_match = findExpectedResult(t) + if (args.quick and + (expected_match or + not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])): + dbg("skipped: %s", formatTestCase(t)) + skipped += 1 + continue + + if args.dry_run: + continue + + try: + f = checkOneCase(args, t) + except KeyboardInterrupt: + break + + if f: + i, rule = checkResultRuleList(f) + dbg("testcase: %r, rule list match: %r", t, rule) + wl_stats.setdefault(i, []).append(f) + level = rule.get('loglevel', logging.DEBUG) + logFailure(f, level) + if rule.get('fatal') or (args.strict and level >= logging.WARN): + fatal_failures.append(f) + else: + dbg("success: %s", formatTestCase(t)) + if expected_match: + logger.warn("Didn't fail as expected: %s", formatTestCase(t)) + + logger.info("Total: %d test cases", total) + if skipped: + logger.info("Skipped %d test cases", skipped) + + if args.debug: + stats = sorted([(len(wl_stats.get(i, [])), rule) for i, rule in + enumerate(ERROR_RULE_LIST)], key=lambda x: x[0]) + for count, rule in stats: + dbg("error rule stats: %d: %r", count, rule) + + if fatal_failures: + for f in fatal_failures: + t = f['testcase'] + logger.error("Fatal failure: %s", formatTestCase(t)) + logger.error("Fatal failures on some machine/device combinations") + return 1 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/disas-objdump.pl b/scripts/disas-objdump.pl new file mode 100755 index 000000000..bec905f04 --- /dev/null +++ b/scripts/disas-objdump.pl @@ -0,0 +1,101 @@ +#!/usr/bin/env perl + +use warnings; + +use File::Temp qw/ tempfile /; +use Getopt::Long; + +# Default to the system objdump if a cross-compiler edition not given. +my $aobjdump = "objdump"; +my $hobjdump = ""; +my $tobjdump = ""; +my $hmachine = ""; +my $tmachine = ""; + +GetOptions ('O|objdump=s' => \$aobjdump, + 'host-objdump=s' => \$hobjdump, + 'target-objdump=s' => \$tobjdump, + 'h|host-machine=s' => \$hmachine, + 't|target-machine=s' => \$tmachine); + +# But we can't default the machines. Sanity check that we've at least one. +die "No host or target machine type" if !$hmachine && !$tmachine; + +# Reuse one temp file for all of the hunks. +my ($outh, $outname) = tempfile(); +binmode($outh); +END { unlink $outname; } + +# Pre-construct the command-lines for executing the dump. +sub mkobjcommand ($$) { + my ($cmd, $mach) = @_; + return 0 if !$mach; + $cmd = $aobjdump if !$cmd; + return "$cmd -m $mach --disassemble-all -b binary"; +} + +$objdump[1] = mkobjcommand($hobjdump, $hmachine); +$objdump[2] = mkobjcommand($tobjdump, $tmachine); + +# Zero-initialize current dumping state. +my $mem = ""; +my $inobjd = 0; +my $vma = 0; + +sub objcommand { + my $ret = $objdump[$inobjd]; + if (!$ret) { + die "Host machine type not specified" if $inobjd == 1; + die "Target machine type not specified" if $inobjd == 2; + die "Internal error"; + } + return $ret; +} + +while (<>) { + # Collect the data from the relevant OBJD-* lines ... + if (/^OBJD-H: /) { + die "Internal error" if $inobjd == 2; + $mem = $mem . pack("H*", substr($_, 8, -1)); + $inobjd = 1; + } elsif (/^OBJD-T: /) { + die "Internal error" if $inobjd == 1; + $mem = $mem . pack("H*", substr($_, 8, -1)); + $inobjd = 2; + } + # ... which will always be followed by a blank line, + # at which point we should produce our dump. + elsif ($inobjd) { + # Rewrite the temp file in one go; it will usually be small. + sysseek $outh, 0, 0; + truncate $outh, 0; + syswrite $outh, $mem; + + my $cmd = objcommand(); + $cmd = $cmd . " --adjust-vma=" . $vma if $vma; + $cmd = $cmd . " " . $outname; + + # Pipe from objdump... + open IN, "-|", $cmd; + + # ... copying all but the first 7 lines of boilerplate to our stdout. + my $i = 0; + while (<IN>) { + print if (++$i > 7); + } + close IN; + print "\n"; + + $mem = ""; + $inobjd = 0; + $vma = 0; + } + # The line before "OBJD-*" will be of the form "0x<hex>+: +\n". + # Extract the value for passing to --adjust-vma. + elsif (/^(0x[0-9a-fA-F]+):\s*$/) { + $vma = $1; + print; + } else { + print; + } +} diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py new file mode 100644 index 000000000..4177261d3 --- /dev/null +++ b/scripts/dump-guest-memory.py @@ -0,0 +1,598 @@ +""" +This python script adds a new gdb command, "dump-guest-memory". It +should be loaded with "source dump-guest-memory.py" at the (gdb) +prompt. + +Copyright (C) 2013, Red Hat, Inc. + +Authors: + Laszlo Ersek <lersek@redhat.com> + Janosch Frank <frankja@linux.vnet.ibm.com> + +This work is licensed under the terms of the GNU GPL, version 2 or later. See +the COPYING file in the top-level directory. +""" + +import ctypes +import struct + +try: + UINTPTR_T = gdb.lookup_type("uintptr_t") +except Exception as inst: + raise gdb.GdbError("Symbols must be loaded prior to sourcing dump-guest-memory.\n" + "Symbols may be loaded by 'attach'ing a QEMU process id or by " + "'load'ing a QEMU binary.") + +TARGET_PAGE_SIZE = 0x1000 +TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 + +# Special value for e_phnum. This indicates that the real number of +# program headers is too large to fit into e_phnum. Instead the real +# value is in the field sh_info of section 0. +PN_XNUM = 0xFFFF + +EV_CURRENT = 1 + +ELFCLASS32 = 1 +ELFCLASS64 = 2 + +ELFDATA2LSB = 1 +ELFDATA2MSB = 2 + +ET_CORE = 4 + +PT_LOAD = 1 +PT_NOTE = 4 + +EM_386 = 3 +EM_PPC = 20 +EM_PPC64 = 21 +EM_S390 = 22 +EM_AARCH = 183 +EM_X86_64 = 62 + +VMCOREINFO_FORMAT_ELF = 1 + +def le16_to_cpu(val): + return struct.unpack("<H", struct.pack("=H", val))[0] + +def le32_to_cpu(val): + return struct.unpack("<I", struct.pack("=I", val))[0] + +def le64_to_cpu(val): + return struct.unpack("<Q", struct.pack("=Q", val))[0] + +class ELF(object): + """Representation of a ELF file.""" + + def __init__(self, arch): + self.ehdr = None + self.notes = [] + self.segments = [] + self.notes_size = 0 + self.endianness = None + self.elfclass = ELFCLASS64 + + if arch == 'aarch64-le': + self.endianness = ELFDATA2LSB + self.elfclass = ELFCLASS64 + self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) + self.ehdr.e_machine = EM_AARCH + + elif arch == 'aarch64-be': + self.endianness = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) + self.ehdr.e_machine = EM_AARCH + + elif arch == 'X86_64': + self.endianness = ELFDATA2LSB + self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) + self.ehdr.e_machine = EM_X86_64 + + elif arch == '386': + self.endianness = ELFDATA2LSB + self.elfclass = ELFCLASS32 + self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) + self.ehdr.e_machine = EM_386 + + elif arch == 's390': + self.endianness = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) + self.ehdr.e_machine = EM_S390 + + elif arch == 'ppc64-le': + self.endianness = ELFDATA2LSB + self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) + self.ehdr.e_machine = EM_PPC64 + + elif arch == 'ppc64-be': + self.endianness = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) + self.ehdr.e_machine = EM_PPC64 + + else: + raise gdb.GdbError("No valid arch type specified.\n" + "Currently supported types:\n" + "aarch64-be, aarch64-le, X86_64, 386, s390, " + "ppc64-be, ppc64-le") + + self.add_segment(PT_NOTE, 0, 0) + + def add_note(self, n_name, n_desc, n_type): + """Adds a note to the ELF.""" + + note = get_arch_note(self.endianness, len(n_name), len(n_desc)) + note.n_namesz = len(n_name) + 1 + note.n_descsz = len(n_desc) + note.n_name = n_name.encode() + note.n_type = n_type + + # Desc needs to be 4 byte aligned (although the 64bit spec + # specifies 8 byte). When defining n_desc as uint32 it will be + # automatically aligned but we need the memmove to copy the + # string into it. + ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc)) + + self.notes.append(note) + self.segments[0].p_filesz += ctypes.sizeof(note) + self.segments[0].p_memsz += ctypes.sizeof(note) + + + def add_vmcoreinfo_note(self, vmcoreinfo): + """Adds a vmcoreinfo note to the ELF dump.""" + # compute the header size, and copy that many bytes from the note + header = get_arch_note(self.endianness, 0, 0) + ctypes.memmove(ctypes.pointer(header), + vmcoreinfo, ctypes.sizeof(header)) + if header.n_descsz > 1 << 20: + print('warning: invalid vmcoreinfo size') + return + # now get the full note + note = get_arch_note(self.endianness, + header.n_namesz - 1, header.n_descsz) + ctypes.memmove(ctypes.pointer(note), vmcoreinfo, ctypes.sizeof(note)) + + self.notes.append(note) + self.segments[0].p_filesz += ctypes.sizeof(note) + self.segments[0].p_memsz += ctypes.sizeof(note) + + def add_segment(self, p_type, p_paddr, p_size): + """Adds a segment to the elf.""" + + phdr = get_arch_phdr(self.endianness, self.elfclass) + phdr.p_type = p_type + phdr.p_paddr = p_paddr + phdr.p_vaddr = p_paddr + phdr.p_filesz = p_size + phdr.p_memsz = p_size + self.segments.append(phdr) + self.ehdr.e_phnum += 1 + + def to_file(self, elf_file): + """Writes all ELF structures to the passed file. + + Structure: + Ehdr + Segment 0:PT_NOTE + Segment 1:PT_LOAD + Segment N:PT_LOAD + Note 0..N + Dump contents + """ + elf_file.write(self.ehdr) + off = ctypes.sizeof(self.ehdr) + \ + len(self.segments) * ctypes.sizeof(self.segments[0]) + + for phdr in self.segments: + phdr.p_offset = off + elf_file.write(phdr) + off += phdr.p_filesz + + for note in self.notes: + elf_file.write(note) + + +def get_arch_note(endianness, len_name, len_desc): + """Returns a Note class with the specified endianness.""" + + if endianness == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + len_name = len_name + 1 + + class Note(superclass): + """Represents an ELF note, includes the content.""" + + _fields_ = [("n_namesz", ctypes.c_uint32), + ("n_descsz", ctypes.c_uint32), + ("n_type", ctypes.c_uint32), + ("n_name", ctypes.c_char * len_name), + ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))] + return Note() + + +class Ident(ctypes.Structure): + """Represents the ELF ident array in the ehdr structure.""" + + _fields_ = [('ei_mag0', ctypes.c_ubyte), + ('ei_mag1', ctypes.c_ubyte), + ('ei_mag2', ctypes.c_ubyte), + ('ei_mag3', ctypes.c_ubyte), + ('ei_class', ctypes.c_ubyte), + ('ei_data', ctypes.c_ubyte), + ('ei_version', ctypes.c_ubyte), + ('ei_osabi', ctypes.c_ubyte), + ('ei_abiversion', ctypes.c_ubyte), + ('ei_pad', ctypes.c_ubyte * 7)] + + def __init__(self, endianness, elfclass): + self.ei_mag0 = 0x7F + self.ei_mag1 = ord('E') + self.ei_mag2 = ord('L') + self.ei_mag3 = ord('F') + self.ei_class = elfclass + self.ei_data = endianness + self.ei_version = EV_CURRENT + + +def get_arch_ehdr(endianness, elfclass): + """Returns a EHDR64 class with the specified endianness.""" + + if endianness == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + class EHDR64(superclass): + """Represents the 64 bit ELF header struct.""" + + _fields_ = [('e_ident', Ident), + ('e_type', ctypes.c_uint16), + ('e_machine', ctypes.c_uint16), + ('e_version', ctypes.c_uint32), + ('e_entry', ctypes.c_uint64), + ('e_phoff', ctypes.c_uint64), + ('e_shoff', ctypes.c_uint64), + ('e_flags', ctypes.c_uint32), + ('e_ehsize', ctypes.c_uint16), + ('e_phentsize', ctypes.c_uint16), + ('e_phnum', ctypes.c_uint16), + ('e_shentsize', ctypes.c_uint16), + ('e_shnum', ctypes.c_uint16), + ('e_shstrndx', ctypes.c_uint16)] + + def __init__(self): + super(superclass, self).__init__() + self.e_ident = Ident(endianness, elfclass) + self.e_type = ET_CORE + self.e_version = EV_CURRENT + self.e_ehsize = ctypes.sizeof(self) + self.e_phoff = ctypes.sizeof(self) + self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass)) + self.e_phnum = 0 + + + class EHDR32(superclass): + """Represents the 32 bit ELF header struct.""" + + _fields_ = [('e_ident', Ident), + ('e_type', ctypes.c_uint16), + ('e_machine', ctypes.c_uint16), + ('e_version', ctypes.c_uint32), + ('e_entry', ctypes.c_uint32), + ('e_phoff', ctypes.c_uint32), + ('e_shoff', ctypes.c_uint32), + ('e_flags', ctypes.c_uint32), + ('e_ehsize', ctypes.c_uint16), + ('e_phentsize', ctypes.c_uint16), + ('e_phnum', ctypes.c_uint16), + ('e_shentsize', ctypes.c_uint16), + ('e_shnum', ctypes.c_uint16), + ('e_shstrndx', ctypes.c_uint16)] + + def __init__(self): + super(superclass, self).__init__() + self.e_ident = Ident(endianness, elfclass) + self.e_type = ET_CORE + self.e_version = EV_CURRENT + self.e_ehsize = ctypes.sizeof(self) + self.e_phoff = ctypes.sizeof(self) + self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass)) + self.e_phnum = 0 + + # End get_arch_ehdr + if elfclass == ELFCLASS64: + return EHDR64() + else: + return EHDR32() + + +def get_arch_phdr(endianness, elfclass): + """Returns a 32 or 64 bit PHDR class with the specified endianness.""" + + if endianness == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + class PHDR64(superclass): + """Represents the 64 bit ELF program header struct.""" + + _fields_ = [('p_type', ctypes.c_uint32), + ('p_flags', ctypes.c_uint32), + ('p_offset', ctypes.c_uint64), + ('p_vaddr', ctypes.c_uint64), + ('p_paddr', ctypes.c_uint64), + ('p_filesz', ctypes.c_uint64), + ('p_memsz', ctypes.c_uint64), + ('p_align', ctypes.c_uint64)] + + class PHDR32(superclass): + """Represents the 32 bit ELF program header struct.""" + + _fields_ = [('p_type', ctypes.c_uint32), + ('p_offset', ctypes.c_uint32), + ('p_vaddr', ctypes.c_uint32), + ('p_paddr', ctypes.c_uint32), + ('p_filesz', ctypes.c_uint32), + ('p_memsz', ctypes.c_uint32), + ('p_flags', ctypes.c_uint32), + ('p_align', ctypes.c_uint32)] + + # End get_arch_phdr + if elfclass == ELFCLASS64: + return PHDR64() + else: + return PHDR32() + + +def int128_get64(val): + """Returns low 64bit part of Int128 struct.""" + + try: + assert val["hi"] == 0 + return val["lo"] + except gdb.error: + u64t = gdb.lookup_type('uint64_t').array(2) + u64 = val.cast(u64t) + if sys.byteorder == 'little': + assert u64[1] == 0 + return u64[0] + else: + assert u64[0] == 0 + return u64[1] + + +def qlist_foreach(head, field_str): + """Generator for qlists.""" + + var_p = head["lh_first"] + while var_p != 0: + var = var_p.dereference() + var_p = var[field_str]["le_next"] + yield var + + +def qemu_map_ram_ptr(block, offset): + """Returns qemu vaddr for given guest physical address.""" + + return block["host"] + offset + + +def memory_region_get_ram_ptr(memory_region): + if memory_region["alias"] != 0: + return (memory_region_get_ram_ptr(memory_region["alias"].dereference()) + + memory_region["alias_offset"]) + + return qemu_map_ram_ptr(memory_region["ram_block"], 0) + + +def get_guest_phys_blocks(): + """Returns a list of ram blocks. + + Each block entry contains: + 'target_start': guest block phys start address + 'target_end': guest block phys end address + 'host_addr': qemu vaddr of the block's start + """ + + guest_phys_blocks = [] + + print("guest RAM blocks:") + print("target_start target_end host_addr message " + "count") + print("---------------- ---------------- ---------------- ------- " + "-----") + + current_map_p = gdb.parse_and_eval("address_space_memory.current_map") + current_map = current_map_p.dereference() + + # Conversion to int is needed for python 3 + # compatibility. Otherwise range doesn't cast the value itself and + # breaks. + for cur in range(int(current_map["nr"])): + flat_range = (current_map["ranges"] + cur).dereference() + memory_region = flat_range["mr"].dereference() + + # we only care about RAM + if (not memory_region["ram"] or + memory_region["ram_device"] or + memory_region["nonvolatile"]): + continue + + section_size = int128_get64(flat_range["addr"]["size"]) + target_start = int128_get64(flat_range["addr"]["start"]) + target_end = target_start + section_size + host_addr = (memory_region_get_ram_ptr(memory_region) + + flat_range["offset_in_region"]) + predecessor = None + + # find continuity in guest physical address space + if len(guest_phys_blocks) > 0: + predecessor = guest_phys_blocks[-1] + predecessor_size = (predecessor["target_end"] - + predecessor["target_start"]) + + # the memory API guarantees monotonically increasing + # traversal + assert predecessor["target_end"] <= target_start + + # we want continuity in both guest-physical and + # host-virtual memory + if (predecessor["target_end"] < target_start or + predecessor["host_addr"] + predecessor_size != host_addr): + predecessor = None + + if predecessor is None: + # isolated mapping, add it to the list + guest_phys_blocks.append({"target_start": target_start, + "target_end": target_end, + "host_addr": host_addr}) + message = "added" + else: + # expand predecessor until @target_end; predecessor's + # start doesn't change + predecessor["target_end"] = target_end + message = "joined" + + print("%016x %016x %016x %-7s %5u" % + (target_start, target_end, host_addr.cast(UINTPTR_T), + message, len(guest_phys_blocks))) + + return guest_phys_blocks + + +# The leading docstring doesn't have idiomatic Python formatting. It is +# printed by gdb's "help" command (the first line is printed in the +# "help data" summary), and it should match how other help texts look in +# gdb. +class DumpGuestMemory(gdb.Command): + """Extract guest vmcore from qemu process coredump. + +The two required arguments are FILE and ARCH: +FILE identifies the target file to write the guest vmcore to. +ARCH specifies the architecture for which the core will be generated. + +This GDB command reimplements the dump-guest-memory QMP command in +python, using the representation of guest memory as captured in the qemu +coredump. The qemu process that has been dumped must have had the +command line option "-machine dump-guest-core=on" which is the default. + +For simplicity, the "paging", "begin" and "end" parameters of the QMP +command are not supported -- no attempt is made to get the guest's +internal paging structures (ie. paging=false is hard-wired), and guest +memory is always fully dumped. + +Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be, +ppc64-le guests are supported. + +The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are +not written to the vmcore. Preparing these would require context that is +only present in the KVM host kernel module when the guest is alive. A +fake ELF note is written instead, only to keep the ELF parser of "crash" +happy. + +Dependent on how busted the qemu process was at the time of the +coredump, this command might produce unpredictable results. If qemu +deliberately called abort(), or it was dumped in response to a signal at +a halfway fortunate point, then its coredump should be in reasonable +shape and this command should mostly work.""" + + def __init__(self): + super(DumpGuestMemory, self).__init__("dump-guest-memory", + gdb.COMMAND_DATA, + gdb.COMPLETE_FILENAME) + self.elf = None + self.guest_phys_blocks = None + + def dump_init(self, vmcore): + """Prepares and writes ELF structures to core file.""" + + # Needed to make crash happy, data for more useful notes is + # not available in a qemu core. + self.elf.add_note("NONE", "EMPTY", 0) + + # We should never reach PN_XNUM for paging=false dumps, + # there's just a handful of discontiguous ranges after + # merging. + # The constant is needed to account for the PT_NOTE segment. + phdr_num = len(self.guest_phys_blocks) + 1 + assert phdr_num < PN_XNUM + + for block in self.guest_phys_blocks: + block_size = block["target_end"] - block["target_start"] + self.elf.add_segment(PT_LOAD, block["target_start"], block_size) + + self.elf.to_file(vmcore) + + def dump_iterate(self, vmcore): + """Writes guest core to file.""" + + qemu_core = gdb.inferiors()[0] + for block in self.guest_phys_blocks: + cur = block["host_addr"] + left = block["target_end"] - block["target_start"] + print("dumping range at %016x for length %016x" % + (cur.cast(UINTPTR_T), left)) + + while left > 0: + chunk_size = min(TARGET_PAGE_SIZE, left) + chunk = qemu_core.read_memory(cur, chunk_size) + vmcore.write(chunk) + cur += chunk_size + left -= chunk_size + + def phys_memory_read(self, addr, size): + qemu_core = gdb.inferiors()[0] + for block in self.guest_phys_blocks: + if block["target_start"] <= addr \ + and addr + size <= block["target_end"]: + haddr = block["host_addr"] + (addr - block["target_start"]) + return qemu_core.read_memory(haddr, size) + return None + + def add_vmcoreinfo(self): + if gdb.lookup_symbol("vmcoreinfo_realize")[0] is None: + return + vmci = 'vmcoreinfo_realize::vmcoreinfo_state' + if not gdb.parse_and_eval("%s" % vmci) \ + or not gdb.parse_and_eval("(%s)->has_vmcoreinfo" % vmci): + return + + fmt = gdb.parse_and_eval("(%s)->vmcoreinfo.guest_format" % vmci) + addr = gdb.parse_and_eval("(%s)->vmcoreinfo.paddr" % vmci) + size = gdb.parse_and_eval("(%s)->vmcoreinfo.size" % vmci) + + fmt = le16_to_cpu(fmt) + addr = le64_to_cpu(addr) + size = le32_to_cpu(size) + + if fmt != VMCOREINFO_FORMAT_ELF: + return + + vmcoreinfo = self.phys_memory_read(addr, size) + if vmcoreinfo: + self.elf.add_vmcoreinfo_note(bytes(vmcoreinfo)) + + def invoke(self, args, from_tty): + """Handles command invocation from gdb.""" + + # Unwittingly pressing the Enter key after the command should + # not dump the same multi-gig coredump to the same file. + self.dont_repeat() + + argv = gdb.string_to_argv(args) + if len(argv) != 2: + raise gdb.GdbError("usage: dump-guest-memory FILE ARCH") + + self.elf = ELF(argv[1]) + self.guest_phys_blocks = get_guest_phys_blocks() + self.add_vmcoreinfo() + + with open(argv[0], "wb") as vmcore: + self.dump_init(vmcore) + self.dump_iterate(vmcore) + +DumpGuestMemory() diff --git a/scripts/entitlement.sh b/scripts/entitlement.sh new file mode 100755 index 000000000..e2c956a3a --- /dev/null +++ b/scripts/entitlement.sh @@ -0,0 +1,33 @@ +#!/bin/sh -e +# +# Helper script for the build process to apply entitlements + +in_place=: +if [ "$1" = --install ]; then + shift + in_place=false +fi + +DST="$1" +SRC="$2" +ICON="$3" +ENTITLEMENT="$4" + +if $in_place; then + trap 'rm "$DST.tmp"' exit + cp -af "$SRC" "$DST.tmp" + SRC="$DST.tmp" +else + cd "$MESON_INSTALL_DESTDIR_PREFIX" +fi + +if test -n "$ENTITLEMENT"; then + codesign --entitlements "$ENTITLEMENT" --force -s - "$SRC" +fi + +# Add the QEMU icon to the binary on Mac OS +Rez -append "$ICON" -o "$SRC" +SetFile -a C "$SRC" + +mv -f "$SRC" "$DST" +trap '' exit diff --git a/scripts/extract-vsssdk-headers b/scripts/extract-vsssdk-headers new file mode 100755 index 000000000..9e38510f0 --- /dev/null +++ b/scripts/extract-vsssdk-headers @@ -0,0 +1,35 @@ +#! /bin/bash + +# extract-vsssdk-headers +# Author: Paolo Bonzini <pbonzini@redhat.com> + +set -e +if test $# != 1 || ! test -f "$1"; then + echo 'Usage: extract-vsssdk-headers /path/to/setup.exe' >&2 + exit 1 +fi + +if ! command -v msiextract > /dev/null; then + echo 'msiextract not found. Please install msitools.' >&2 + exit 1 +fi + +if test -e inc; then + echo '"inc" already exists.' >&2 + exit 1 +fi + +# Extract .MSI file in the .exe, looking for the OLE compound +# document signature. Extra data at the end does not matter. +export LC_ALL=C +MAGIC=$'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1' +offset=$(grep -abom1 "$MAGIC" "$1" | sed -n 's/:/\n/; P') +tmpdir=$(mktemp -d) +trap 'rm -fr -- "$tmpdir" vsssdk.msi' EXIT HUP INT QUIT ALRM TERM +tail -c +$(($offset+1)) -- "$1" > vsssdk.msi + +# Now extract the files. +msiextract -C $tmpdir vsssdk.msi +mv "$tmpdir/Program Files/Microsoft/VSSSDK72/inc" inc +echo 'Extracted SDK headers into "inc" directory.' +exit 0 diff --git a/scripts/feature_to_c.sh b/scripts/feature_to_c.sh new file mode 100644 index 000000000..b1169899c --- /dev/null +++ b/scripts/feature_to_c.sh @@ -0,0 +1,68 @@ +#!/bin/sh + +# Convert text files to compilable C arrays. +# +# Copyright (C) 2007 Free Software Foundation, Inc. +# +# This file is part of GDB. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + +if test -z "$1"; then + echo "Usage: $0 INPUTFILE..." + exit 1 +fi + +for input; do + arrayname=xml_feature_$(echo $input | sed 's,.*/,,; s/[-.]/_/g') + + ${AWK:-awk} 'BEGIN { n = 0 + printf "#include \"qemu/osdep.h\"\n" + print "static const char '$arrayname'[] = {" + for (i = 0; i < 255; i++) + _ord_[sprintf("%c", i)] = i + } { + split($0, line, ""); + printf " " + for (i = 1; i <= length($0); i++) { + c = line[i] + if (c == "'\''") { + printf "'\''\\'\'''\'', " + } else if (c == "\\") { + printf "'\''\\\\'\'', " + } else if (_ord_[c] >= 32 && _ord_[c] < 127) { + printf "'\''%s'\'', ", c + } else { + printf "'\''\\%03o'\'', ", _ord_[c] + } + if (i % 10 == 0) + printf "\n " + } + printf "'\''\\n'\'', \n" + } END { + print " 0 };" + }' < $input +done + +echo +echo "const char *const xml_builtin[][2] = {" + +for input; do + basename=$(echo $input | sed 's,.*/,,') + arrayname=xml_feature_$(echo $input | sed 's,.*/,,; s/[-.]/_/g') + echo " { \"$basename\", $arrayname }," +done + +echo " { (char *)0, (char *)0 }" +echo "};" diff --git a/scripts/fix-multiline-comments.sh b/scripts/fix-multiline-comments.sh new file mode 100755 index 000000000..c15a04127 --- /dev/null +++ b/scripts/fix-multiline-comments.sh @@ -0,0 +1,62 @@ +#! /bin/sh +# +# Fix multiline comments to match docs/devel/style.rst +# +# Copyright (C) 2018 Red Hat, Inc. +# +# Author: Paolo Bonzini +# +# Usage: scripts/fix-multiline-comments.sh [-i] FILE... +# +# -i edits the file in place (requires gawk 4.1.0). +# +# Set the AWK environment variable to choose the awk interpreter to use +# (default 'awk') + +if test "$1" = -i; then + # gawk extension + inplace="-i inplace" + shift +fi +${AWK-awk} $inplace 'BEGIN { indent = -1 } +{ + line = $0 + # apply a star to the indent on lines after the first + if (indent != -1) { + if (line == "") { + line = sp " *" + } else if (substr(line, 1, indent + 2) == sp " ") { + line = sp " *" substr(line, indent + 3) + } + } + + is_lead = (line ~ /^[ \t]*\/\*/) + is_trail = (line ~ /\*\//) + if (is_lead && !is_trail) { + # grab the indent at the start of a comment, but not for + # single-line comments + match(line, /^[ \t]*\/\*/) + indent = RLENGTH - 2 + sp = substr(line, 1, indent) + } + + # the regular expression filters out lone /*, /**, or */ + if (indent != -1 && !(line ~ /^[ \t]*(\/\*+|\*\/)[ \t]*$/)) { + if (is_lead) { + # split the leading /* or /** on a separate line + match(line, /^[ \t]*\/\*+/) + lead = substr(line, 1, RLENGTH) + match(line, /^[ \t]*\/\*+[ \t]*/) + line = lead "\n" sp " *" substr(line, RLENGTH) + } + if (is_trail) { + # split the trailing */ on a separate line + match(line, /[ \t]*\*\//) + line = substr(line, 1, RSTART - 1) "\n" sp " */" + } + } + if (is_trail) { + indent = -1 + } + print line +}' "$@" diff --git a/scripts/gensyscalls.sh b/scripts/gensyscalls.sh new file mode 100755 index 000000000..8fb450e3c --- /dev/null +++ b/scripts/gensyscalls.sh @@ -0,0 +1,102 @@ +#!/bin/sh +# +# Update syscall_nr.h files from linux headers asm-generic/unistd.h +# +# This code is licensed under the GPL version 2 or later. See +# the COPYING file in the top-level directory. +# + +linux="$1" +output="$2" + +TMP=$(mktemp -d) + +if [ "$linux" = "" ] ; then + echo "Needs path to linux source tree" 1>&2 + exit 1 +fi + +if [ "$output" = "" ] ; then + output="$PWD" +fi + +upper() +{ + echo "$1" | tr "[:lower:]" "[:upper:]" | tr "[:punct:]" "_" +} + +qemu_arch() +{ + case "$1" in + arm64) + echo "aarch64" + ;; + *) + echo "$1" + ;; + esac +} + +read_includes() +{ + arch=$1 + bits=$2 + + cpp -P -nostdinc -fdirectives-only \ + -D_UAPI_ASM_$(upper ${arch})_BITSPERLONG_H \ + -D__BITS_PER_LONG=${bits} \ + -I${linux}/arch/${arch}/include/uapi/ \ + -I${linux}/include/uapi \ + -I${TMP} \ + "${linux}/arch/${arch}/include/uapi/asm/unistd.h" +} + +filter_defines() +{ + grep -e "#define __NR_" -e "#define __NR3264" +} + +rename_defines() +{ + sed "s/ __NR_/ TARGET_NR_/g;s/(__NR_/(TARGET_NR_/g" +} + +evaluate_values() +{ + sed "s/#define TARGET_NR_/QEMU TARGET_NR_/" | \ + cpp -P -nostdinc | \ + sed "s/^QEMU /#define /" +} + +generate_syscall_nr() +{ + arch=$1 + bits=$2 + file="$3" + guard="$(upper LINUX_USER_$(qemu_arch $arch)_$(basename "$file"))" + + (echo "/*" + echo " * This file contains the system call numbers." + echo " * Do not modify." + echo " * This file is generated by scripts/gensyscalls.sh" + echo " */" + echo "#ifndef ${guard}" + echo "#define ${guard}" + echo + read_includes $arch $bits | filter_defines | rename_defines | \ + evaluate_values | sort -n -k 3 + echo + echo "#endif /* ${guard} */") > "$file" +} + +mkdir "$TMP/asm" +> "$TMP/asm/bitsperlong.h" + +generate_syscall_nr arm64 64 "$output/linux-user/aarch64/syscall_nr.h" +generate_syscall_nr nios2 32 "$output/linux-user/nios2/syscall_nr.h" +generate_syscall_nr openrisc 32 "$output/linux-user/openrisc/syscall_nr.h" + +generate_syscall_nr riscv 32 "$output/linux-user/riscv/syscall32_nr.h" +generate_syscall_nr riscv 64 "$output/linux-user/riscv/syscall64_nr.h" +generate_syscall_nr hexagon 32 "$output/linux-user/hexagon/syscall_nr.h" +rm -fr "$TMP" diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl new file mode 100755 index 000000000..e5499b94b --- /dev/null +++ b/scripts/get_maintainer.pl @@ -0,0 +1,2144 @@ +#!/usr/bin/env perl +# (c) 2007, Joe Perches <joe@perches.com> +# created from checkpatch.pl +# +# Print selected MAINTAINERS information for +# the files modified in a patch or for a file +# +# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch> +# perl scripts/get_maintainer.pl [OPTIONS] -f <file> +# +# Licensed under the terms of the GNU GPL License version 2 + +use strict; +use warnings; + +my $P = $0; +my $V = '0.26'; + +use Getopt::Long qw(:config no_auto_abbrev); + +my $lk_path = "./"; +my $email = 1; +my $email_usename = 1; +my $email_maintainer = 1; +my $email_reviewer = 1; +my $email_list = 1; +my $email_subscriber_list = 0; +my $email_git = 0; +my $email_git_all_signature_types = 0; +my $email_git_blame = 0; +my $email_git_blame_signatures = 1; +my $email_git_fallback = 1; +my $email_git_min_signatures = 1; +my $email_git_max_maintainers = 5; +my $email_git_min_percent = 5; +my $email_git_since = "1-year-ago"; +my $email_hg_since = "-365"; +my $interactive = 0; +my $email_remove_duplicates = 1; +my $email_use_mailmap = 1; +my $output_multiline = 1; +my $output_separator = ", "; +my $output_roles = 0; +my $output_rolestats = 1; +my $scm = 0; +my $web = 0; +my $subsystem = 0; +my $status = 0; +my $keywords = 1; +my $sections = 0; +my $file_emails = 0; +my $from_filename = 0; +my $pattern_depth = 0; +my $version = 0; +my $help = 0; + +my $vcs_used = 0; + +my $exit = 0; + +my %commit_author_hash; +my %commit_signer_hash; + +# Signature types of people who are either +# a) responsible for the code in question, or +# b) familiar enough with it to give relevant feedback +my @signature_tags = (); +push(@signature_tags, "Signed-off-by:"); +push(@signature_tags, "Reviewed-by:"); +push(@signature_tags, "Acked-by:"); + +my $signature_pattern = "\(" . join("|", @signature_tags) . "\)"; + +# rfc822 email address - preloaded methods go here. +my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])"; +my $rfc822_char = '[\\000-\\377]'; + +# VCS command support: class-like functions and strings + +my %VCS_cmds; + +my %VCS_cmds_git = ( + "execute_cmd" => \&git_execute_cmd, + "available" => '(which("git") ne "") && (-e ".git")', + "find_signers_cmd" => + "git log --no-color --follow --since=\$email_git_since " . + '--format="GitCommit: %H%n' . + 'GitAuthor: %an <%ae>%n' . + 'GitDate: %aD%n' . + 'GitSubject: %s%n' . + '%b%n"' . + " -- \$file", + "find_commit_signers_cmd" => + "git log --no-color " . + '--format="GitCommit: %H%n' . + 'GitAuthor: %an <%ae>%n' . + 'GitDate: %aD%n' . + 'GitSubject: %s%n' . + '%b%n"' . + " -1 \$commit", + "find_commit_author_cmd" => + "git log --no-color " . + '--format="GitCommit: %H%n' . + 'GitAuthor: %an <%ae>%n' . + 'GitDate: %aD%n' . + 'GitSubject: %s%n"' . + " -1 \$commit", + "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file", + "blame_file_cmd" => "git blame -l \$file", + "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})", + "blame_commit_pattern" => "^([0-9a-f]+) ", + "author_pattern" => "^GitAuthor: (.*)", + "subject_pattern" => "^GitSubject: (.*)", +); + +my %VCS_cmds_hg = ( + "execute_cmd" => \&hg_execute_cmd, + "available" => '(which("hg") ne "") && (-d ".hg")', + "find_signers_cmd" => + "hg log --date=\$email_hg_since " . + "--template='HgCommit: {node}\\n" . + "HgAuthor: {author}\\n" . + "HgSubject: {desc}\\n'" . + " -- \$file", + "find_commit_signers_cmd" => + "hg log " . + "--template='HgSubject: {desc}\\n'" . + " -r \$commit", + "find_commit_author_cmd" => + "hg log " . + "--template='HgCommit: {node}\\n" . + "HgAuthor: {author}\\n" . + "HgSubject: {desc|firstline}\\n'" . + " -r \$commit", + "blame_range_cmd" => "", # not supported + "blame_file_cmd" => "hg blame -n \$file", + "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})", + "blame_commit_pattern" => "^([ 0-9a-f]+):", + "author_pattern" => "^HgAuthor: (.*)", + "subject_pattern" => "^HgSubject: (.*)", +); + +my $conf = which_conf(".get_maintainer.conf"); +if (-f $conf) { + my @conf_args; + open(my $conffile, '<', "$conf") + or warn "$P: Can't find a readable .get_maintainer.conf file $!\n"; + + while (<$conffile>) { + my $line = $_; + + $line =~ s/\s*\n?$//g; + $line =~ s/^\s*//g; + $line =~ s/\s+/ /g; + + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + + my @words = split(" ", $line); + foreach my $word (@words) { + last if ($word =~ m/^#/); + push (@conf_args, $word); + } + } + close($conffile); + unshift(@ARGV, @conf_args) if @conf_args; +} + +if (!GetOptions( + 'email!' => \$email, + 'git!' => \$email_git, + 'git-all-signature-types!' => \$email_git_all_signature_types, + 'git-blame!' => \$email_git_blame, + 'git-blame-signatures!' => \$email_git_blame_signatures, + 'git-fallback!' => \$email_git_fallback, + 'git-min-signatures=i' => \$email_git_min_signatures, + 'git-max-maintainers=i' => \$email_git_max_maintainers, + 'git-min-percent=i' => \$email_git_min_percent, + 'git-since=s' => \$email_git_since, + 'hg-since=s' => \$email_hg_since, + 'i|interactive!' => \$interactive, + 'remove-duplicates!' => \$email_remove_duplicates, + 'mailmap!' => \$email_use_mailmap, + 'm!' => \$email_maintainer, + 'r!' => \$email_reviewer, + 'n!' => \$email_usename, + 'l!' => \$email_list, + 's!' => \$email_subscriber_list, + 'multiline!' => \$output_multiline, + 'roles!' => \$output_roles, + 'rolestats!' => \$output_rolestats, + 'separator=s' => \$output_separator, + 'subsystem!' => \$subsystem, + 'status!' => \$status, + 'scm!' => \$scm, + 'web!' => \$web, + 'pattern-depth=i' => \$pattern_depth, + 'k|keywords!' => \$keywords, + 'sections!' => \$sections, + 'fe|file-emails!' => \$file_emails, + 'f|file' => \$from_filename, + 'v|version' => \$version, + 'h|help|usage' => \$help, + )) { + die "$P: invalid argument - use --help if necessary\n"; +} + +if ($help != 0) { + usage(); + exit 0; +} + +if ($version != 0) { + print("${P} ${V}\n"); + exit 0; +} + +if (-t STDIN && !@ARGV) { + # We're talking to a terminal, but have no command line arguments. + die "$P: missing patchfile or -f file - use --help if necessary\n"; +} + +$output_multiline = 0 if ($output_separator ne ", "); +$output_rolestats = 1 if ($interactive); +$output_roles = 1 if ($output_rolestats); + +if ($sections) { + $email = 0; + $email_list = 0; + $scm = 0; + $status = 0; + $subsystem = 0; + $web = 0; + $keywords = 0; + $interactive = 0; +} else { + my $selections = $email + $scm + $status + $subsystem + $web; + if ($selections == 0) { + die "$P: Missing required option: email, scm, status, subsystem or web\n"; + } +} + +if ($email && + ($email_maintainer + $email_reviewer + + $email_list + $email_subscriber_list + + $email_git + $email_git_blame) == 0) { + die "$P: Please select at least 1 email option\n"; +} + +if (!top_of_tree($lk_path)) { + die "$P: The current directory does not appear to be " + . "a QEMU source tree.\n"; +} + +## Read MAINTAINERS for type/value pairs + +my @typevalue = (); +my %keyword_hash; + +open (my $maint, '<', "${lk_path}MAINTAINERS") + or die "$P: Can't open MAINTAINERS: $!\n"; +while (<$maint>) { + my $line = $_; + + if ($line =~ m/^(.):\s*(.*)/) { + my $type = $1; + my $value = $2; + + ##Filename pattern matching + if ($type eq "F" || $type eq "X") { + $value =~ s@\.@\\\.@g; ##Convert . to \. + $value =~ s/\*/\.\*/g; ##Convert * to .* + $value =~ s/\?/\./g; ##Convert ? to . + ##if pattern is a directory and it lacks a trailing slash, add one + if ((-d $value)) { + $value =~ s@([^/])$@$1/@; + } + } elsif ($type eq "K") { + $keyword_hash{@typevalue} = $value; + } + push(@typevalue, "$type:$value"); + } elsif (!/^(\s)*$/) { + $line =~ s/\n$//g; + push(@typevalue, $line); + } +} +close($maint); + + +# +# Read mail address map +# + +my $mailmap; + +read_mailmap(); + +sub read_mailmap { + $mailmap = { + names => {}, + addresses => {} + }; + + return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap")); + + open(my $mailmap_file, '<', "${lk_path}.mailmap") + or warn "$P: Can't open .mailmap: $!\n"; + + while (<$mailmap_file>) { + s/#.*$//; #strip comments + s/^\s+|\s+$//g; #trim + + next if (/^\s*$/); #skip empty lines + #entries have one of the following formats: + # name1 <mail1> + # <mail1> <mail2> + # name1 <mail1> <mail2> + # name1 <mail1> name2 <mail2> + # (see man git-shortlog) + + if (/^([^<]+)<([^>]+)>$/) { + my $real_name = $1; + my $address = $2; + + $real_name =~ s/\s+$//; + ($real_name, $address) = parse_email("$real_name <$address>"); + $mailmap->{names}->{$address} = $real_name; + + } elsif (/^<([^>]+)>\s*<([^>]+)>$/) { + my $real_address = $1; + my $wrong_address = $2; + + $mailmap->{addresses}->{$wrong_address} = $real_address; + + } elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) { + my $real_name = $1; + my $real_address = $2; + my $wrong_address = $3; + + $real_name =~ s/\s+$//; + ($real_name, $real_address) = + parse_email("$real_name <$real_address>"); + $mailmap->{names}->{$wrong_address} = $real_name; + $mailmap->{addresses}->{$wrong_address} = $real_address; + + } elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) { + my $real_name = $1; + my $real_address = $2; + my $wrong_name = $3; + my $wrong_address = $4; + + $real_name =~ s/\s+$//; + ($real_name, $real_address) = + parse_email("$real_name <$real_address>"); + + $wrong_name =~ s/\s+$//; + ($wrong_name, $wrong_address) = + parse_email("$wrong_name <$wrong_address>"); + + my $wrong_email = format_email($wrong_name, $wrong_address, 1); + $mailmap->{names}->{$wrong_email} = $real_name; + $mailmap->{addresses}->{$wrong_email} = $real_address; + } + } + close($mailmap_file); +} + +## use the filenames on the command line or find the filenames in the patchfiles + +my @files = (); +my @range = (); +my @keyword_tvi = (); +my @file_emails = (); + +if (!@ARGV) { + push(@ARGV, "&STDIN"); +} + +foreach my $file (@ARGV) { + if ($file ne "&STDIN") { + ##if $file is a directory and it lacks a trailing slash, add one + if ((-d $file)) { + $file =~ s@([^/])$@$1/@; + } elsif (!(stat $file)) { + die "$P: file '${file}' not found: $!\n"; + } + } + if ($from_filename) { + push(@files, $file); + if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) { + open(my $f, '<', $file) + or die "$P: Can't open $file: $!\n"; + my $text = do { local($/) ; <$f> }; + close($f); + if ($keywords) { + foreach my $line (keys %keyword_hash) { + if ($text =~ m/$keyword_hash{$line}/x) { + push(@keyword_tvi, $line); + } + } + } + if ($file_emails) { + my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g; + push(@file_emails, clean_file_emails(@poss_addr)); + } + } + } else { + my $file_cnt = @files; + my $lastfile; + + open(my $patch, "< $file") + or die "$P: Can't open $file: $!\n"; + + # We can check arbitrary information before the patch + # like the commit message, mail headers, etc... + # This allows us to match arbitrary keywords against any part + # of a git format-patch generated file (subject tags, etc...) + + my $patch_prefix = ""; #Parsing the intro + + while (<$patch>) { + my $patch_line = $_; + if (m/^\+\+\+\s+(\S+)/) { + my $filename = $1; + $filename =~ s@^[^/]*/@@; + $filename =~ s@\n@@; + $lastfile = $filename; + push(@files, $filename); + $patch_prefix = "^[+-].*"; #Now parsing the actual patch + } elsif (m/^\@\@ -(\d+),(\d+)/) { + if ($email_git_blame) { + push(@range, "$lastfile:$1:$2"); + } + } elsif ($keywords) { + foreach my $line (keys %keyword_hash) { + if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) { + push(@keyword_tvi, $line); + } + } + } + } + close($patch); + + if ($file_cnt == @files) { + warn "$P: file '${file}' doesn't appear to be a patch. " + . "Add -f to options?\n"; + } + @files = sort_and_uniq(@files); + } +} + +@file_emails = uniq(@file_emails); + +my %email_hash_name; +my %email_hash_address; +my @email_to = (); +my %hash_list_to; +my @list_to = (); +my @scm = (); +my @web = (); +my @subsystem = (); +my @status = (); +my %deduplicate_name_hash = (); +my %deduplicate_address_hash = (); + +my @maintainers = get_maintainers(); + +if (@maintainers) { + @maintainers = merge_email(@maintainers); + output(@maintainers); +} + +if ($scm) { + @scm = uniq(@scm); + output(@scm); +} + +if ($status) { + @status = uniq(@status); + output(@status); +} + +if ($subsystem) { + @subsystem = uniq(@subsystem); + output(@subsystem); +} + +if ($web) { + @web = uniq(@web); + output(@web); +} + +exit($exit); + +sub range_is_maintained { + my ($start, $end) = @_; + + for (my $i = $start; $i < $end; $i++) { + my $line = $typevalue[$i]; + if ($line =~ m/^(.):\s*(.*)/) { + my $type = $1; + my $value = $2; + if ($type eq 'S') { + if ($value =~ /(maintain|support)/i) { + return 1; + } + } + } + } + return 0; +} + +sub range_has_maintainer { + my ($start, $end) = @_; + + for (my $i = $start; $i < $end; $i++) { + my $line = $typevalue[$i]; + if ($line =~ m/^(.):\s*(.*)/) { + my $type = $1; + my $value = $2; + if ($type eq 'M') { + return 1; + } + } + } + return 0; +} + +sub get_maintainers { + %email_hash_name = (); + %email_hash_address = (); + %commit_author_hash = (); + %commit_signer_hash = (); + @email_to = (); + %hash_list_to = (); + @list_to = (); + @scm = (); + @web = (); + @subsystem = (); + @status = (); + %deduplicate_name_hash = (); + %deduplicate_address_hash = (); + if ($email_git_all_signature_types) { + $signature_pattern = "(.+?)[Bb][Yy]:"; + } else { + $signature_pattern = "\(" . join("|", @signature_tags) . "\)"; + } + + # Find responsible parties + + my %exact_pattern_match_hash = (); + + foreach my $file (@files) { + + my %hash; + my $tvi = find_first_section(); + while ($tvi < @typevalue) { + my $start = find_starting_index($tvi); + my $end = find_ending_index($tvi); + my $exclude = 0; + my $i; + + #Do not match excluded file patterns + + for ($i = $start; $i < $end; $i++) { + my $line = $typevalue[$i]; + if ($line =~ m/^(.):\s*(.*)/) { + my $type = $1; + my $value = $2; + if ($type eq 'X') { + if (file_match_pattern($file, $value)) { + $exclude = 1; + last; + } + } + } + } + + if (!$exclude) { + for ($i = $start; $i < $end; $i++) { + my $line = $typevalue[$i]; + if ($line =~ m/^(.):\s*(.*)/) { + my $type = $1; + my $value = $2; + if ($type eq 'F') { + if (file_match_pattern($file, $value)) { + my $value_pd = ($value =~ tr@/@@); + my $file_pd = ($file =~ tr@/@@); + $value_pd++ if (substr($value,-1,1) ne "/"); + $value_pd = -1 if ($value =~ /^\.\*/); + if ($value_pd >= $file_pd && + range_is_maintained($start, $end) && + range_has_maintainer($start, $end)) { + $exact_pattern_match_hash{$file} = 1; + } + if ($pattern_depth == 0 || + (($file_pd - $value_pd) < $pattern_depth)) { + $hash{$tvi} = $value_pd; + } + } + } + } + } + } + $tvi = $end + 1; + } + + foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { + add_categories($line); + if ($sections) { + my $i; + my $start = find_starting_index($line); + my $end = find_ending_index($line); + for ($i = $start; $i < $end; $i++) { + my $line = $typevalue[$i]; + if ($line =~ /^[FX]:/) { ##Restore file patterns + $line =~ s/([^\\])\.([^\*])/$1\?$2/g; + $line =~ s/([^\\])\.$/$1\?/g; ##Convert . back to ? + $line =~ s/\\\./\./g; ##Convert \. to . + $line =~ s/\.\*/\*/g; ##Convert .* to * + } + $line =~ s/^([A-Z]):/$1:\t/g; + print("$line\n"); + } + print("\n"); + } + } + } + + if ($keywords) { + @keyword_tvi = sort_and_uniq(@keyword_tvi); + foreach my $line (@keyword_tvi) { + add_categories($line); + } + } + + foreach my $email (@email_to, @list_to) { + $email->[0] = deduplicate_email($email->[0]); + } + + if ($email) { + if (! $interactive) { + $email_git_fallback = 0 if @email_to > 0 || $email_git || $email_git_blame; + if ($email_git_fallback) { + print STDERR "get_maintainer.pl: No maintainers found, printing recent contributors.\n"; + print STDERR "get_maintainer.pl: Do not blindly cc: them on patches! Use common sense.\n"; + print STDERR "\n"; + } + } + + foreach my $file (@files) { + if ($email_git || ($email_git_fallback && + !$exact_pattern_match_hash{$file})) { + vcs_file_signoffs($file); + } + if ($email_git_blame) { + vcs_file_blame($file); + } + } + + foreach my $email (@file_emails) { + my ($name, $address) = parse_email($email); + + my $tmp_email = format_email($name, $address, $email_usename); + push_email_address($tmp_email, ''); + add_role($tmp_email, 'in file'); + } + } + + my @to = (); + if ($email || $email_list) { + if ($email) { + @to = (@to, @email_to); + } + if ($email_list) { + @to = (@to, @list_to); + } + } + + if ($interactive) { + @to = interactive_get_maintainers(\@to); + } + + return @to; +} + +sub file_match_pattern { + my ($file, $pattern) = @_; + if (substr($pattern, -1) eq "/") { + if ($file =~ m@^$pattern@) { + return 1; + } + } else { + if ($file =~ m@^$pattern@) { + my $s1 = ($file =~ tr@/@@); + my $s2 = ($pattern =~ tr@/@@); + if ($s1 == $s2) { + return 1; + } + } + } + return 0; +} + +sub usage { + print <<EOT; +usage: $P [options] patchfile + $P [options] -f file|directory +version: $V + +MAINTAINER field selection options: + --email => print email address(es) if any + --git => include recent git \*-by: signers + --git-all-signature-types => include signers regardless of signature type + or use only ${signature_pattern} signers (default: $email_git_all_signature_types) + --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback) + --git-min-signatures => number of signatures required (default: $email_git_min_signatures) + --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers) + --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent) + --git-blame => use git blame to find modified commits for patch or file + --git-since => git history to use (default: $email_git_since) + --hg-since => hg history to use (default: $email_hg_since) + --interactive => display a menu (mostly useful if used with the --git option) + --m => include maintainer(s) if any + --r => include reviewer(s) if any + --n => include name 'Full Name <addr\@domain.tld>' + --l => include list(s) if any + --s => include subscriber only list(s) if any + --remove-duplicates => minimize duplicate email names/addresses + --roles => show roles (status:subsystem, git-signer, list, etc...) + --rolestats => show roles and statistics (commits/total_commits, %) + --file-emails => add email addresses found in -f file (default: 0 (off)) + --scm => print SCM tree(s) if any + --status => print status if any + --subsystem => print subsystem name if any + --web => print website(s) if any + +Output type options: + --separator [, ] => separator for multiple entries on 1 line + using --separator also sets --nomultiline if --separator is not [, ] + --multiline => print 1 entry per line + +Other options: + --pattern-depth => Number of pattern directory traversals (default: 0 (all)) + --keywords => scan patch for keywords (default: $keywords) + --sections => print all of the subsystem sections with pattern matches + --mailmap => use .mailmap file (default: $email_use_mailmap) + --version => show version + --help => show this help information + +Default options: + [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0 + --remove-duplicates --rolestats] + +Notes: + Using "-f directory" may give unexpected results: + Used with "--git", git signators for _all_ files in and below + directory are examined as git recurses directories. + Any specified X: (exclude) pattern matches are _not_ ignored. + Used with "--nogit", directory is used as a pattern match, + no individual file within the directory or subdirectory + is matched. + Used with "--git-blame", does not iterate all files in directory + Using "--git-blame" is slow and may add old committers and authors + that are no longer active maintainers to the output. + Using "--roles" or "--rolestats" with git send-email --cc-cmd or any + other automated tools that expect only ["name"] <email address> + may not work because of additional output after <email address>. + Using "--rolestats" and "--git-blame" shows the #/total=% commits, + not the percentage of the entire file authored. # of commits is + not a good measure of amount of code authored. 1 major commit may + contain a thousand lines, 5 trivial commits may modify a single line. + If git is not installed, but mercurial (hg) is installed and an .hg + repository exists, the following options apply to mercurial: + --git, + --git-min-signatures, --git-max-maintainers, --git-min-percent, and + --git-blame + Use --hg-since not --git-since to control date selection + File ".get_maintainer.conf", if it exists in the QEMU source root + directory, can change whatever get_maintainer defaults are desired. + Entries in this file can be any command line argument. + This file is prepended to any additional command line arguments. + Multiple lines and # comments are allowed. +EOT +} + +sub top_of_tree { + my ($lk_path) = @_; + + if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") { + $lk_path .= "/"; + } + if ( (-f "${lk_path}COPYING") + && (-f "${lk_path}MAINTAINERS") + && (-f "${lk_path}Makefile") + && (-d "${lk_path}docs") + && (-f "${lk_path}VERSION") + && (-d "${lk_path}linux-user/") + && (-d "${lk_path}softmmu/")) { + return 1; + } + return 0; +} + +sub parse_email { + my ($formatted_email) = @_; + + my $name = ""; + my $address = ""; + + if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) { + $name = $1; + $address = $2; + } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) { + $address = $1; + } elsif ($formatted_email =~ /^(.+\@\S*).*$/) { + $address = $1; + } + + $name =~ s/^\s+|\s+$//g; + $name =~ s/^\"|\"$//g; + $address =~ s/^\s+|\s+$//g; + + if ($name =~ /[^\w \-]/i) { ##has "must quote" chars + $name =~ s/(?<!\\)"/\\"/g; ##escape quotes + $name = "\"$name\""; + } + + return ($name, $address); +} + +sub format_email { + my ($name, $address, $usename) = @_; + + my $formatted_email; + + $name =~ s/^\s+|\s+$//g; + $name =~ s/^\"|\"$//g; + $address =~ s/^\s+|\s+$//g; + + if ($name =~ /[^\w \-]/i) { ##has "must quote" chars + $name =~ s/(?<!\\)"/\\"/g; ##escape quotes + $name = "\"$name\""; + } + + if ($usename) { + if ("$name" eq "") { + $formatted_email = "$address"; + } else { + $formatted_email = "$name <$address>"; + } + } else { + $formatted_email = $address; + } + + return $formatted_email; +} + +sub find_first_section { + my $index = 0; + + while ($index < @typevalue) { + my $tv = $typevalue[$index]; + if (($tv =~ m/^(.):\s*(.*)/)) { + last; + } + $index++; + } + + return $index; +} + +sub find_starting_index { + my ($index) = @_; + + while ($index > 0) { + my $tv = $typevalue[$index]; + if (!($tv =~ m/^(.):\s*(.*)/)) { + last; + } + $index--; + } + + return $index; +} + +sub find_ending_index { + my ($index) = @_; + + while ($index < @typevalue) { + my $tv = $typevalue[$index]; + if (!($tv =~ m/^(.):\s*(.*)/)) { + last; + } + $index++; + } + + return $index; +} + +sub get_subsystem_name { + my ($index) = @_; + + my $start = find_starting_index($index); + + my $subsystem = $typevalue[$start]; + if (length($subsystem) > 20) { + $subsystem = substr($subsystem, 0, 17); + $subsystem =~ s/\s*$//; + $subsystem = $subsystem . "..."; + } + return $subsystem; +} + +sub get_maintainer_role { + my ($index) = @_; + + my $i; + my $start = find_starting_index($index); + my $end = find_ending_index($index); + + my $role = "unknown"; + my $subsystem = get_subsystem_name($index); + + for ($i = $start + 1; $i < $end; $i++) { + my $tv = $typevalue[$i]; + if ($tv =~ m/^(.):\s*(.*)/) { + my $ptype = $1; + my $pvalue = $2; + if ($ptype eq "S") { + $role = $pvalue; + } + } + } + + $role = lc($role); + if ($role eq "supported") { + $role = "supporter"; + } elsif ($role eq "maintained") { + $role = "maintainer"; + } elsif ($role eq "odd fixes") { + $role = "odd fixer"; + } elsif ($role eq "orphan") { + $role = "orphan minder"; + } elsif ($role eq "obsolete") { + $role = "obsolete minder"; + } elsif ($role eq "buried alive in reporters") { + $role = "chief penguin"; + } + + return $role . ":" . $subsystem; +} + +sub get_list_role { + my ($index) = @_; + + my $subsystem = get_subsystem_name($index); + + if ($subsystem eq "THE REST") { + $subsystem = ""; + } + + return $subsystem; +} + +sub add_categories { + my ($index) = @_; + + my $i; + my $start = find_starting_index($index); + my $end = find_ending_index($index); + + push(@subsystem, $typevalue[$start]); + + for ($i = $start + 1; $i < $end; $i++) { + my $tv = $typevalue[$i]; + if ($tv =~ m/^(.):\s*(.*)/) { + my $ptype = $1; + my $pvalue = $2; + if ($ptype eq "L") { + my $list_address = $pvalue; + my $list_additional = ""; + my $list_role = get_list_role($i); + + if ($list_role ne "") { + $list_role = ":" . $list_role; + } + if ($list_address =~ m/([^\s]+)\s+(.*)$/) { + $list_address = $1; + $list_additional = $2; + } + if ($list_additional =~ m/subscribers-only/) { + if ($email_subscriber_list) { + if (!$hash_list_to{lc($list_address)}) { + $hash_list_to{lc($list_address)} = 1; + push(@list_to, [$list_address, + "subscriber list${list_role}"]); + } + } + } else { + if ($email_list) { + if (!$hash_list_to{lc($list_address)}) { + $hash_list_to{lc($list_address)} = 1; + if ($list_additional =~ m/moderated/) { + push(@list_to, [$list_address, + "moderated list${list_role}"]); + } else { + push(@list_to, [$list_address, + "open list${list_role}"]); + } + } + } + } + } elsif ($ptype eq "M") { + my ($name, $address) = parse_email($pvalue); + if ($name eq "") { + if ($i > 0) { + my $tv = $typevalue[$i - 1]; + if ($tv =~ m/^(.):\s*(.*)/) { + if ($1 eq "P") { + $name = $2; + $pvalue = format_email($name, $address, $email_usename); + } + } + } + } + if ($email_maintainer) { + my $role = get_maintainer_role($i); + push_email_addresses($pvalue, $role); + } + } elsif ($ptype eq "R") { + my ($name, $address) = parse_email($pvalue); + if ($name eq "") { + if ($i > 0) { + my $tv = $typevalue[$i - 1]; + if ($tv =~ m/^(.):\s*(.*)/) { + if ($1 eq "P") { + $name = $2; + $pvalue = format_email($name, $address, $email_usename); + } + } + } + } + if ($email_reviewer) { + my $subsystem = get_subsystem_name($i); + push_email_addresses($pvalue, "reviewer:$subsystem"); + } + } elsif ($ptype eq "T") { + push(@scm, $pvalue); + } elsif ($ptype eq "W") { + push(@web, $pvalue); + } elsif ($ptype eq "S") { + push(@status, $pvalue); + } + } + } +} + +sub email_inuse { + my ($name, $address) = @_; + + return 1 if (($name eq "") && ($address eq "")); + return 1 if (($name ne "") && exists($email_hash_name{lc($name)})); + return 1 if (($address ne "") && exists($email_hash_address{lc($address)})); + + return 0; +} + +sub push_email_address { + my ($line, $role) = @_; + + my ($name, $address) = parse_email($line); + + if ($address eq "") { + return 0; + } + + if (!$email_remove_duplicates) { + push(@email_to, [format_email($name, $address, $email_usename), $role]); + } elsif (!email_inuse($name, $address)) { + push(@email_to, [format_email($name, $address, $email_usename), $role]); + $email_hash_name{lc($name)}++ if ($name ne ""); + $email_hash_address{lc($address)}++; + } + + return 1; +} + +sub push_email_addresses { + my ($address, $role) = @_; + + my @address_list = (); + + if (rfc822_valid($address)) { + push_email_address($address, $role); + } elsif (@address_list = rfc822_validlist($address)) { + my $array_count = shift(@address_list); + while (my $entry = shift(@address_list)) { + push_email_address($entry, $role); + } + } else { + if (!push_email_address($address, $role)) { + warn("Invalid MAINTAINERS address: '" . $address . "'\n"); + } + } +} + +sub add_role { + my ($line, $role) = @_; + + my ($name, $address) = parse_email($line); + my $email = format_email($name, $address, $email_usename); + + foreach my $entry (@email_to) { + if ($email_remove_duplicates) { + my ($entry_name, $entry_address) = parse_email($entry->[0]); + if (($name eq $entry_name || $address eq $entry_address) + && ($role eq "" || !($entry->[1] =~ m/$role/)) + ) { + if ($entry->[1] eq "") { + $entry->[1] = "$role"; + } else { + $entry->[1] = "$entry->[1],$role"; + } + } + } else { + if ($email eq $entry->[0] + && ($role eq "" || !($entry->[1] =~ m/$role/)) + ) { + if ($entry->[1] eq "") { + $entry->[1] = "$role"; + } else { + $entry->[1] = "$entry->[1],$role"; + } + } + } + } +} + +sub which { + my ($bin) = @_; + + foreach my $path (split(/:/, $ENV{PATH})) { + if (-e "$path/$bin") { + return "$path/$bin"; + } + } + + return ""; +} + +sub which_conf { + my ($conf) = @_; + + foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) { + if (-e "$path/$conf") { + return "$path/$conf"; + } + } + + return ""; +} + +sub mailmap_email { + my ($line) = @_; + + my ($name, $address) = parse_email($line); + my $email = format_email($name, $address, 1); + my $real_name = $name; + my $real_address = $address; + + if (exists $mailmap->{names}->{$email} || + exists $mailmap->{addresses}->{$email}) { + if (exists $mailmap->{names}->{$email}) { + $real_name = $mailmap->{names}->{$email}; + } + if (exists $mailmap->{addresses}->{$email}) { + $real_address = $mailmap->{addresses}->{$email}; + } + } else { + if (exists $mailmap->{names}->{$address}) { + $real_name = $mailmap->{names}->{$address}; + } + if (exists $mailmap->{addresses}->{$address}) { + $real_address = $mailmap->{addresses}->{$address}; + } + } + return format_email($real_name, $real_address, 1); +} + +sub mailmap { + my (@addresses) = @_; + + my @mapped_emails = (); + foreach my $line (@addresses) { + push(@mapped_emails, mailmap_email($line)); + } + merge_by_realname(@mapped_emails) if ($email_use_mailmap); + return @mapped_emails; +} + +sub merge_by_realname { + my %address_map; + my (@emails) = @_; + + foreach my $email (@emails) { + my ($name, $address) = parse_email($email); + if (exists $address_map{$name}) { + $address = $address_map{$name}; + $email = format_email($name, $address, 1); + } else { + $address_map{$name} = $address; + } + } +} + +sub git_execute_cmd { + my ($cmd) = @_; + my @lines = (); + + my $output = `$cmd`; + $output =~ s/^\s*//gm; + @lines = split("\n", $output); + + return @lines; +} + +sub hg_execute_cmd { + my ($cmd) = @_; + my @lines = (); + + my $output = `$cmd`; + @lines = split("\n", $output); + + return @lines; +} + +sub extract_formatted_signatures { + my (@signature_lines) = @_; + + my @type = @signature_lines; + + s/\s*(.*):.*/$1/ for (@type); + + # cut -f2- -d":" + s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines); + +## Reformat email addresses (with names) to avoid badly written signatures + + foreach my $signer (@signature_lines) { + $signer = deduplicate_email($signer); + } + + return (\@type, \@signature_lines); +} + +sub vcs_find_signers { + my ($cmd) = @_; + my $commits; + my @lines = (); + my @signatures = (); + + @lines = &{$VCS_cmds{"execute_cmd"}}($cmd); + + my $pattern = $VCS_cmds{"commit_pattern"}; + + $commits = grep(/$pattern/, @lines); # of commits + + @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines); + + return (0, @signatures) if !@signatures; + + save_commits_by_author(@lines) if ($interactive); + save_commits_by_signer(@lines) if ($interactive); + + my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures); + + return ($commits, @$signers_ref); +} + +sub vcs_find_author { + my ($cmd) = @_; + my @lines = (); + + @lines = &{$VCS_cmds{"execute_cmd"}}($cmd); + + return @lines if !@lines; + + my @authors = (); + foreach my $line (@lines) { + if ($line =~ m/$VCS_cmds{"author_pattern"}/) { + my $author = $1; + my ($name, $address) = parse_email($author); + $author = format_email($name, $address, 1); + push(@authors, $author); + } + } + + save_commits_by_author(@lines) if ($interactive); + save_commits_by_signer(@lines) if ($interactive); + + return @authors; +} + +sub vcs_save_commits { + my ($cmd) = @_; + my @lines = (); + my @commits = (); + + @lines = &{$VCS_cmds{"execute_cmd"}}($cmd); + + foreach my $line (@lines) { + if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) { + push(@commits, $1); + } + } + + return @commits; +} + +sub vcs_blame { + my ($file) = @_; + my $cmd; + my @commits = (); + + return @commits if (!(-f $file)); + + if (@range && $VCS_cmds{"blame_range_cmd"} eq "") { + my @all_commits = (); + + $cmd = $VCS_cmds{"blame_file_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd + @all_commits = vcs_save_commits($cmd); + + foreach my $file_range_diff (@range) { + next if (!($file_range_diff =~ m/(.+):(.+):(.+)/)); + my $diff_file = $1; + my $diff_start = $2; + my $diff_length = $3; + next if ("$file" ne "$diff_file"); + for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) { + push(@commits, $all_commits[$i]); + } + } + } elsif (@range) { + foreach my $file_range_diff (@range) { + next if (!($file_range_diff =~ m/(.+):(.+):(.+)/)); + my $diff_file = $1; + my $diff_start = $2; + my $diff_length = $3; + next if ("$file" ne "$diff_file"); + $cmd = $VCS_cmds{"blame_range_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd + push(@commits, vcs_save_commits($cmd)); + } + } else { + $cmd = $VCS_cmds{"blame_file_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd + @commits = vcs_save_commits($cmd); + } + + foreach my $commit (@commits) { + $commit =~ s/^\^//g; + } + + return @commits; +} + +my $printed_novcs = 0; +sub vcs_exists { + %VCS_cmds = %VCS_cmds_git; + return 1 if eval $VCS_cmds{"available"}; + %VCS_cmds = %VCS_cmds_hg; + return 2 if eval $VCS_cmds{"available"}; + %VCS_cmds = (); + if (!$printed_novcs) { + warn("$P: No supported VCS found. Add --nogit to options?\n"); + warn("Using a git repository produces better results.\n"); + warn("Try latest git repository using:\n"); + warn("git clone https://gitlab.com/qemu-project/qemu.git\n"); + $printed_novcs = 1; + } + return 0; +} + +sub vcs_is_git { + vcs_exists(); + return $vcs_used == 1; +} + +sub vcs_is_hg { + return $vcs_used == 2; +} + +sub interactive_get_maintainers { + my ($list_ref) = @_; + my @list = @$list_ref; + + vcs_exists(); + + my %selected; + my %authored; + my %signed; + my $count = 0; + my $maintained = 0; + foreach my $entry (@list) { + $maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i); + $selected{$count} = 1; + $authored{$count} = 0; + $signed{$count} = 0; + $count++; + } + + #menu loop + my $done = 0; + my $print_options = 0; + my $redraw = 1; + while (!$done) { + $count = 0; + if ($redraw) { + printf STDERR "\n%1s %2s %-65s", + "*", "#", "email/list and role:stats"; + if ($email_git || + ($email_git_fallback && !$maintained) || + $email_git_blame) { + print STDERR "auth sign"; + } + print STDERR "\n"; + foreach my $entry (@list) { + my $email = $entry->[0]; + my $role = $entry->[1]; + my $sel = ""; + $sel = "*" if ($selected{$count}); + my $commit_author = $commit_author_hash{$email}; + my $commit_signer = $commit_signer_hash{$email}; + my $authored = 0; + my $signed = 0; + $authored++ for (@{$commit_author}); + $signed++ for (@{$commit_signer}); + printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email; + printf STDERR "%4d %4d", $authored, $signed + if ($authored > 0 || $signed > 0); + printf STDERR "\n %s\n", $role; + if ($authored{$count}) { + my $commit_author = $commit_author_hash{$email}; + foreach my $ref (@{$commit_author}) { + print STDERR " Author: @{$ref}[1]\n"; + } + } + if ($signed{$count}) { + my $commit_signer = $commit_signer_hash{$email}; + foreach my $ref (@{$commit_signer}) { + print STDERR " @{$ref}[2]: @{$ref}[1]\n"; + } + } + + $count++; + } + } + my $date_ref = \$email_git_since; + $date_ref = \$email_hg_since if (vcs_is_hg()); + if ($print_options) { + $print_options = 0; + if (vcs_exists()) { + print STDERR <<EOT + +Version Control options: +g use git history [$email_git] +gf use git-fallback [$email_git_fallback] +b use git blame [$email_git_blame] +bs use blame signatures [$email_git_blame_signatures] +c# minimum commits [$email_git_min_signatures] +%# min percent [$email_git_min_percent] +d# history to use [$$date_ref] +x# max maintainers [$email_git_max_maintainers] +t all signature types [$email_git_all_signature_types] +m use .mailmap [$email_use_mailmap] +EOT + } + print STDERR <<EOT + +Additional options: +0 toggle all +tm toggle maintainers +tg toggle git entries +tl toggle open list entries +ts toggle subscriber list entries +f emails in file [$file_emails] +k keywords in file [$keywords] +r remove duplicates [$email_remove_duplicates] +p# pattern match depth [$pattern_depth] +EOT + } + print STDERR +"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): "; + + my $input = <STDIN>; + chomp($input); + + $redraw = 1; + my $rerun = 0; + my @wish = split(/[, ]+/, $input); + foreach my $nr (@wish) { + $nr = lc($nr); + my $sel = substr($nr, 0, 1); + my $str = substr($nr, 1); + my $val = 0; + $val = $1 if $str =~ /^(\d+)$/; + + if ($sel eq "y") { + $interactive = 0; + $done = 1; + $output_rolestats = 0; + $output_roles = 0; + last; + } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) { + $selected{$nr - 1} = !$selected{$nr - 1}; + } elsif ($sel eq "*" || $sel eq '^') { + my $toggle = 0; + $toggle = 1 if ($sel eq '*'); + for (my $i = 0; $i < $count; $i++) { + $selected{$i} = $toggle; + } + } elsif ($sel eq "0") { + for (my $i = 0; $i < $count; $i++) { + $selected{$i} = !$selected{$i}; + } + } elsif ($sel eq "t") { + if (lc($str) eq "m") { + for (my $i = 0; $i < $count; $i++) { + $selected{$i} = !$selected{$i} + if ($list[$i]->[1] =~ /^(maintainer|supporter)/i); + } + } elsif (lc($str) eq "g") { + for (my $i = 0; $i < $count; $i++) { + $selected{$i} = !$selected{$i} + if ($list[$i]->[1] =~ /^(author|commit|signer)/i); + } + } elsif (lc($str) eq "l") { + for (my $i = 0; $i < $count; $i++) { + $selected{$i} = !$selected{$i} + if ($list[$i]->[1] =~ /^(open list)/i); + } + } elsif (lc($str) eq "s") { + for (my $i = 0; $i < $count; $i++) { + $selected{$i} = !$selected{$i} + if ($list[$i]->[1] =~ /^(subscriber list)/i); + } + } + } elsif ($sel eq "a") { + if ($val > 0 && $val <= $count) { + $authored{$val - 1} = !$authored{$val - 1}; + } elsif ($str eq '*' || $str eq '^') { + my $toggle = 0; + $toggle = 1 if ($str eq '*'); + for (my $i = 0; $i < $count; $i++) { + $authored{$i} = $toggle; + } + } + } elsif ($sel eq "s") { + if ($val > 0 && $val <= $count) { + $signed{$val - 1} = !$signed{$val - 1}; + } elsif ($str eq '*' || $str eq '^') { + my $toggle = 0; + $toggle = 1 if ($str eq '*'); + for (my $i = 0; $i < $count; $i++) { + $signed{$i} = $toggle; + } + } + } elsif ($sel eq "o") { + $print_options = 1; + $redraw = 1; + } elsif ($sel eq "g") { + if ($str eq "f") { + bool_invert(\$email_git_fallback); + } else { + bool_invert(\$email_git); + } + $rerun = 1; + } elsif ($sel eq "b") { + if ($str eq "s") { + bool_invert(\$email_git_blame_signatures); + } else { + bool_invert(\$email_git_blame); + } + $rerun = 1; + } elsif ($sel eq "c") { + if ($val > 0) { + $email_git_min_signatures = $val; + $rerun = 1; + } + } elsif ($sel eq "x") { + if ($val > 0) { + $email_git_max_maintainers = $val; + $rerun = 1; + } + } elsif ($sel eq "%") { + if ($str ne "" && $val >= 0) { + $email_git_min_percent = $val; + $rerun = 1; + } + } elsif ($sel eq "d") { + if (vcs_is_git()) { + $email_git_since = $str; + } elsif (vcs_is_hg()) { + $email_hg_since = $str; + } + $rerun = 1; + } elsif ($sel eq "t") { + bool_invert(\$email_git_all_signature_types); + $rerun = 1; + } elsif ($sel eq "f") { + bool_invert(\$file_emails); + $rerun = 1; + } elsif ($sel eq "r") { + bool_invert(\$email_remove_duplicates); + $rerun = 1; + } elsif ($sel eq "m") { + bool_invert(\$email_use_mailmap); + read_mailmap(); + $rerun = 1; + } elsif ($sel eq "k") { + bool_invert(\$keywords); + $rerun = 1; + } elsif ($sel eq "p") { + if ($str ne "" && $val >= 0) { + $pattern_depth = $val; + $rerun = 1; + } + } elsif ($sel eq "h" || $sel eq "?") { + print STDERR <<EOT + +Interactive mode allows you to select the various maintainers, submitters, +commit signers and mailing lists that could be CC'd on a patch. + +Any *'d entry is selected. + +If you have git or hg installed, you can choose to summarize the commit +history of files in the patch. Also, each line of the current file can +be matched to its commit author and that commits signers with blame. + +Various knobs exist to control the length of time for active commit +tracking, the maximum number of commit authors and signers to add, +and such. + +Enter selections at the prompt until you are satisfied that the selected +maintainers are appropriate. You may enter multiple selections separated +by either commas or spaces. + +EOT + } else { + print STDERR "invalid option: '$nr'\n"; + $redraw = 0; + } + } + if ($rerun) { + print STDERR "git-blame can be very slow, please have patience..." + if ($email_git_blame); + goto &get_maintainers; + } + } + + #drop not selected entries + $count = 0; + my @new_emailto = (); + foreach my $entry (@list) { + if ($selected{$count}) { + push(@new_emailto, $list[$count]); + } + $count++; + } + return @new_emailto; +} + +sub bool_invert { + my ($bool_ref) = @_; + + if ($$bool_ref) { + $$bool_ref = 0; + } else { + $$bool_ref = 1; + } +} + +sub deduplicate_email { + my ($email) = @_; + + my $matched = 0; + my ($name, $address) = parse_email($email); + $email = format_email($name, $address, 1); + $email = mailmap_email($email); + + return $email if (!$email_remove_duplicates); + + ($name, $address) = parse_email($email); + + if ($name ne "" && $deduplicate_name_hash{lc($name)}) { + $name = $deduplicate_name_hash{lc($name)}->[0]; + $address = $deduplicate_name_hash{lc($name)}->[1]; + $matched = 1; + } elsif ($deduplicate_address_hash{lc($address)}) { + $name = $deduplicate_address_hash{lc($address)}->[0]; + $address = $deduplicate_address_hash{lc($address)}->[1]; + $matched = 1; + } + if (!$matched) { + $deduplicate_name_hash{lc($name)} = [ $name, $address ]; + $deduplicate_address_hash{lc($address)} = [ $name, $address ]; + } + $email = format_email($name, $address, 1); + $email = mailmap_email($email); + return $email; +} + +sub save_commits_by_author { + my (@lines) = @_; + + my @authors = (); + my @commits = (); + my @subjects = (); + + foreach my $line (@lines) { + if ($line =~ m/$VCS_cmds{"author_pattern"}/) { + my $author = $1; + $author = deduplicate_email($author); + push(@authors, $author); + } + push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/); + push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/); + } + + for (my $i = 0; $i < @authors; $i++) { + my $exists = 0; + foreach my $ref(@{$commit_author_hash{$authors[$i]}}) { + if (@{$ref}[0] eq $commits[$i] && + @{$ref}[1] eq $subjects[$i]) { + $exists = 1; + last; + } + } + if (!$exists) { + push(@{$commit_author_hash{$authors[$i]}}, + [ ($commits[$i], $subjects[$i]) ]); + } + } +} + +sub save_commits_by_signer { + my (@lines) = @_; + + my $commit = ""; + my $subject = ""; + + foreach my $line (@lines) { + $commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/); + $subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/); + if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) { + my @signatures = ($line); + my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures); + my @types = @$types_ref; + my @signers = @$signers_ref; + + my $type = $types[0]; + my $signer = $signers[0]; + + $signer = deduplicate_email($signer); + + my $exists = 0; + foreach my $ref(@{$commit_signer_hash{$signer}}) { + if (@{$ref}[0] eq $commit && + @{$ref}[1] eq $subject && + @{$ref}[2] eq $type) { + $exists = 1; + last; + } + } + if (!$exists) { + push(@{$commit_signer_hash{$signer}}, + [ ($commit, $subject, $type) ]); + } + } + } +} + +sub vcs_assign { + my ($role, $divisor, @lines) = @_; + + my %hash; + my $count = 0; + + return if (@lines <= 0); + + if ($divisor <= 0) { + warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n"); + $divisor = 1; + } + + @lines = mailmap(@lines); + + return if (@lines <= 0); + + @lines = sort(@lines); + + # uniq -c + $hash{$_}++ for @lines; + + # sort -rn + foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { + my $sign_offs = $hash{$line}; + my $percent = $sign_offs * 100 / $divisor; + + $percent = 100 if ($percent > 100); + $count++; + last if ($sign_offs < $email_git_min_signatures || + $count > $email_git_max_maintainers || + $percent < $email_git_min_percent); + push_email_address($line, ''); + if ($output_rolestats) { + my $fmt_percent = sprintf("%.0f", $percent); + add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%"); + } else { + add_role($line, $role); + } + } +} + +sub vcs_file_signoffs { + my ($file) = @_; + + my @signers = (); + my $commits; + + $vcs_used = vcs_exists(); + return if (!$vcs_used); + + my $cmd = $VCS_cmds{"find_signers_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; # interpolate $cmd + + ($commits, @signers) = vcs_find_signers($cmd); + + foreach my $signer (@signers) { + $signer = deduplicate_email($signer); + } + + vcs_assign("commit_signer", $commits, @signers); +} + +sub vcs_file_blame { + my ($file) = @_; + + my @signers = (); + my @all_commits = (); + my @commits = (); + my $total_commits; + my $total_lines; + + $vcs_used = vcs_exists(); + return if (!$vcs_used); + + @all_commits = vcs_blame($file); + @commits = uniq(@all_commits); + $total_commits = @commits; + $total_lines = @all_commits; + + if ($email_git_blame_signatures) { + if (vcs_is_hg()) { + my $commit_count; + my @commit_signers = (); + my $commit = join(" -r ", @commits); + my $cmd; + + $cmd = $VCS_cmds{"find_commit_signers_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; #substitute variables in $cmd + + ($commit_count, @commit_signers) = vcs_find_signers($cmd); + + push(@signers, @commit_signers); + } else { + foreach my $commit (@commits) { + my $commit_count; + my @commit_signers = (); + my $cmd; + + $cmd = $VCS_cmds{"find_commit_signers_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; #substitute variables in $cmd + + ($commit_count, @commit_signers) = vcs_find_signers($cmd); + + push(@signers, @commit_signers); + } + } + } + + if ($from_filename) { + if ($output_rolestats) { + my @blame_signers; + if (vcs_is_hg()) {{ # Double brace for last exit + my $commit_count; + my @commit_signers = (); + @commits = uniq(@commits); + @commits = sort(@commits); + my $commit = join(" -r ", @commits); + my $cmd; + + $cmd = $VCS_cmds{"find_commit_author_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; #substitute variables in $cmd + + my @lines = (); + + @lines = &{$VCS_cmds{"execute_cmd"}}($cmd); + + last if !@lines; + + my @authors = (); + foreach my $line (@lines) { + if ($line =~ m/$VCS_cmds{"author_pattern"}/) { + my $author = $1; + $author = deduplicate_email($author); + push(@authors, $author); + } + } + + save_commits_by_author(@lines) if ($interactive); + save_commits_by_signer(@lines) if ($interactive); + + push(@signers, @authors); + }} + else { + foreach my $commit (@commits) { + my $i; + my $cmd = $VCS_cmds{"find_commit_author_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd + my @author = vcs_find_author($cmd); + next if !@author; + + my $formatted_author = deduplicate_email($author[0]); + + my $count = grep(/$commit/, @all_commits); + for ($i = 0; $i < $count ; $i++) { + push(@blame_signers, $formatted_author); + } + } + } + if (@blame_signers) { + vcs_assign("authored lines", $total_lines, @blame_signers); + } + } + foreach my $signer (@signers) { + $signer = deduplicate_email($signer); + } + vcs_assign("commits", $total_commits, @signers); + } else { + foreach my $signer (@signers) { + $signer = deduplicate_email($signer); + } + vcs_assign("modified commits", $total_commits, @signers); + } +} + +sub uniq { + my (@parms) = @_; + + my %saw; + @parms = grep(!$saw{$_}++, @parms); + return @parms; +} + +sub sort_and_uniq { + my (@parms) = @_; + + my %saw; + @parms = sort @parms; + @parms = grep(!$saw{$_}++, @parms); + return @parms; +} + +sub clean_file_emails { + my (@file_emails) = @_; + my @fmt_emails = (); + + foreach my $email (@file_emails) { + $email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g; + my ($name, $address) = parse_email($email); + if ($name eq '"[,\.]"') { + $name = ""; + } + + my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name); + if (@nw > 2) { + my $first = $nw[@nw - 3]; + my $middle = $nw[@nw - 2]; + my $last = $nw[@nw - 1]; + + if (((length($first) == 1 && $first =~ m/[A-Za-z]/) || + (length($first) == 2 && substr($first, -1) eq ".")) || + (length($middle) == 1 || + (length($middle) == 2 && substr($middle, -1) eq "."))) { + $name = "$first $middle $last"; + } else { + $name = "$middle $last"; + } + } + + if (substr($name, -1) =~ /[,\.]/) { + $name = substr($name, 0, length($name) - 1); + } elsif (substr($name, -2) =~ /[,\.]"/) { + $name = substr($name, 0, length($name) - 2) . '"'; + } + + if (substr($name, 0, 1) =~ /[,\.]/) { + $name = substr($name, 1, length($name) - 1); + } elsif (substr($name, 0, 2) =~ /"[,\.]/) { + $name = '"' . substr($name, 2, length($name) - 2); + } + + my $fmt_email = format_email($name, $address, $email_usename); + push(@fmt_emails, $fmt_email); + } + return @fmt_emails; +} + +sub merge_email { + my @lines; + my %saw; + + for (@_) { + my ($address, $role) = @$_; + if (!$saw{$address}) { + if ($output_roles) { + push(@lines, "$address ($role)"); + } else { + push(@lines, $address); + } + $saw{$address} = 1; + } + } + + return @lines; +} + +sub output { + my (@parms) = @_; + + if ($output_multiline) { + foreach my $line (@parms) { + print("${line}\n"); + } + } else { + print(join($output_separator, @parms)); + print("\n"); + } +} + +my $rfc822re; + +sub make_rfc822re { +# Basic lexical tokens are specials, domain_literal, quoted_string, atom, and +# comment. We must allow for rfc822_lwsp (or comments) after each of these. +# This regexp will only work on addresses which have had comments stripped +# and replaced with rfc822_lwsp. + + my $specials = '()<>@,;:\\\\".\\[\\]'; + my $controls = '\\000-\\037\\177'; + + my $dtext = "[^\\[\\]\\r\\\\]"; + my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*"; + + my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*"; + +# Use zero-width assertion to spot the limit of an atom. A simple +# $rfc822_lwsp* causes the regexp engine to hang occasionally. + my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))"; + my $word = "(?:$atom|$quoted_string)"; + my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*"; + + my $sub_domain = "(?:$atom|$domain_literal)"; + my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*"; + + my $addr_spec = "$localpart\@$rfc822_lwsp*$domain"; + + my $phrase = "$word*"; + my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)"; + my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*"; + my $mailbox = "(?:$addr_spec|$phrase$route_addr)"; + + my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*"; + my $address = "(?:$mailbox|$group)"; + + return "$rfc822_lwsp*$address"; +} + +sub rfc822_strip_comments { + my $s = shift; +# Recursively remove comments, and replace with a single space. The simpler +# regexps in the Email Addressing FAQ are imperfect - they will miss escaped +# chars in atoms, for example. + + while ($s =~ s/^((?:[^"\\]|\\.)* + (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*) + \((?:[^()\\]|\\.)*\)/$1 /osx) {} + return $s; +} + +# valid: returns true if the parameter is an RFC822 valid address +# +sub rfc822_valid { + my $s = rfc822_strip_comments(shift); + + if (!$rfc822re) { + $rfc822re = make_rfc822re(); + } + + return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/; +} + +# validlist: In scalar context, returns true if the parameter is an RFC822 +# valid list of addresses. +# +# In list context, returns an empty list on failure (an invalid +# address was found); otherwise a list whose first element is the +# number of addresses found and whose remaining elements are the +# addresses. This is needed to disambiguate failure (invalid) +# from success with no addresses found, because an empty string is +# a valid list. + +sub rfc822_validlist { + my $s = rfc822_strip_comments(shift); + + if (!$rfc822re) { + $rfc822re = make_rfc822re(); + } + # * null list items are valid according to the RFC + # * the '1' business is to aid in distinguishing failure from no results + + my @r; + if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so && + $s =~ m/^$rfc822_char*$/) { + while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) { + push(@r, $1); + } + return wantarray ? (scalar(@r), @r) : 1; + } + return wantarray ? () : 0; +} diff --git a/scripts/git-submodule.sh b/scripts/git-submodule.sh new file mode 100755 index 000000000..e225d3a96 --- /dev/null +++ b/scripts/git-submodule.sh @@ -0,0 +1,104 @@ +#!/bin/sh +# +# This code is licensed under the GPL version 2 or later. See +# the COPYING file in the top-level directory. + +substat=".git-submodule-status" + +command=$1 +shift +maybe_modules="$@" + +# if --with-git-submodules=ignore, do nothing +test "$command" = "ignore" && exit 0 + +test -z "$GIT" && GIT=git + +cd "$(dirname "$0")/.." + +update_error() { + echo "$0: $*" + echo + echo "Unable to automatically checkout GIT submodules '$modules'." + echo "If you require use of an alternative GIT binary (for example to" + echo "enable use of a transparent proxy), then please specify it by" + echo "running configure by with the '--with-git' argument. e.g." + echo + echo " $ ./configure --with-git='tsocks git'" + echo + echo "Alternatively you may disable automatic GIT submodule checkout" + echo "with:" + echo + echo " $ ./configure --with-git-submodules=validate" + echo + echo "and then manually update submodules prior to running make, with:" + echo + echo " $ scripts/git-submodule.sh update $modules" + echo + exit 1 +} + +validate_error() { + if test "$1" = "validate"; then + echo "GIT submodules checkout is out of date, and submodules" + echo "configured for validate only. Please run" + echo " scripts/git-submodule.sh update $maybe_modules" + echo "from the source directory or call configure with" + echo " --with-git-submodules=update" + echo "To disable GIT submodules validation, use" + echo " --with-git-submodules=ignore" + fi + exit 1 +} + +modules="" +for m in $maybe_modules +do + $GIT submodule status $m 1> /dev/null 2>&1 + if test $? = 0 + then + modules="$modules $m" + else + echo "warn: ignoring non-existent submodule $m" + fi +done + +if test -n "$maybe_modules" && ! test -e ".git" +then + echo "$0: unexpectedly called with submodules but no git checkout exists" + exit 1 +fi + +case "$command" in +status|validate) + if test -z "$maybe_modules" + then + test -s ${substat} && validate_error "$command" || exit 0 + fi + + test -f "$substat" || validate_error "$command" + for module in $modules; do + CURSTATUS=$($GIT submodule status $module) + OLDSTATUS=$(cat $substat | grep $module) + if test "$CURSTATUS" != "$OLDSTATUS"; then + validate_error "$command" + fi + done + exit 0 + ;; +update) + if test -z "$maybe_modules" + then + test -e $substat || touch $substat + exit 0 + fi + + $GIT submodule update --init $modules 1>/dev/null + test $? -ne 0 && update_error "failed to update modules" + + $GIT submodule status $modules > "${substat}" + test $? -ne 0 && update_error "failed to save git submodule status" >&2 + ;; +esac + +exit 0 diff --git a/scripts/git.orderfile b/scripts/git.orderfile new file mode 100644 index 000000000..b32203b71 --- /dev/null +++ b/scripts/git.orderfile @@ -0,0 +1,39 @@ +# +# order file for git, to produce patches which are easier to review +# by diffing the important stuff like interface changes first. +# +# one-off usage: +# git diff -O scripts/git.orderfile ... +# +# add to git config: +# git config diff.orderFile scripts/git.orderfile +# + +# Documentation +docs/* +*.rst +*.rst.inc + +# build system +configure +Makefile* +*.mak +meson.build + +# qapi schema +qapi/*.json +qga/*.json + +# semantic patches +*.cocci + +# headers +*.h +*.h.inc + +# decoding tree specification +*.decode + +# code +*.c +*.c.inc diff --git a/scripts/hxtool b/scripts/hxtool new file mode 100755 index 000000000..80516b943 --- /dev/null +++ b/scripts/hxtool @@ -0,0 +1,24 @@ +#!/bin/sh + +hxtoh() +{ + flag=1 + while read -r str; do + case $str in + HXCOMM*) + ;; + SRST*|ERST*) flag=$(($flag^1)) + ;; + *) + test $flag -eq 1 && printf "%s\n" "$str" + ;; + esac + done +} + +case "$1" in +"-h") hxtoh ;; +*) exit 1 ;; +esac < "$2" + +exit 0 diff --git a/scripts/hxtool-conv.pl b/scripts/hxtool-conv.pl new file mode 100755 index 000000000..eede40b34 --- /dev/null +++ b/scripts/hxtool-conv.pl @@ -0,0 +1,137 @@ +#!/usr/bin/perl -w +# +# Script to convert .hx file STEXI/ETEXI blocks to SRST/ERST +# +# Copyright (C) 2020 Linaro +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# (at your option) any later version. See the COPYING file in the +# top-level directory. + +# This script was only ever intended as a one-off conversion operation. +# Please excuse the places where it is a bit hacky. +# Some manual intervention after the conversion is expected, as are +# some warnings from makeinfo. +# Warning: this script is not idempotent: don't try to run it on +# a .hx file that already has SRST/ERST sections. + +# Expected usage: +# scripts/hxtool-conv.pl file.hx > file.hx.new + +use utf8; + +my $reading_texi = 0; +my $texiblock = ''; +my @tables = (); + +sub update_tables($) { + my ($texi) = @_; + # Update our list of open table directives: every @table + # line in the texi fragment is added to the list, and every + # @end table line means we remove an entry from the list. + # If this fragment had a completely self contained table with + # both the @table and @end table lines, this will be a no-op. + foreach (split(/\n/, $texi)) { + push @tables, $_ if /^\@table/; + pop @tables if /^\@end table/; + } +} + +sub only_table_directives($) { + # Return true if every line in the fragment is a start or end table directive + my ($texi) = @_; + foreach (split(/\n/, $texi)) { + return 0 unless /^\@table/ or /^\@end table/; + } + return 1; +} + +sub output_rstblock($) { + # Write the output to /tmp/frag.texi, wrapped in whatever current @table + # lines we need. + my ($texi) = @_; + + # As a special case, if this fragment is only table directives and + # nothing else, update our set of open table directives but otherwise + # ignore it. This avoids emitting an empty SRST/ERST block. + if (only_table_directives($texi)) { + update_tables($texi); + return; + } + + open(my $fragfh, '>', '/tmp/frag.texi'); + # First output the currently active set of open table directives + print $fragfh join("\n", @tables); + # Next, update our list of open table directives. + # We need to do this before we emit the closing table directives + # so that we emit the right number if this fragment had an + # unbalanced set of directives. + update_tables($texi); + # Then emit the texi fragment itself. + print $fragfh "\n$texi\n"; + # Finally, add the necessary closing table directives. + print $fragfh "\@end table\n" x scalar @tables; + close $fragfh; + + # Now invoke makeinfo/pandoc on it and slurp the results into a string + open(my $fh, '-|', "makeinfo --force -o - --docbook " + . "-D 'qemu_system_x86 QEMU_SYSTEM_X86_MACRO' " + . "-D 'qemu_system QEMU_SYSTEM_MACRO' /tmp/frag.texi " + . " | pandoc -f docbook -t rst") + or die "can't start makeinfo/pandoc: $!"; + + binmode $fh, ':encoding(utf8)'; + + print "SRST\n"; + + # Slurp the whole thing into a string so we can do multiline + # string matches on it. + my $rst = do { + local $/ = undef; + <$fh>; + }; + $rst =~ s/^- − /- /gm; + $rst =~ s/“/"/gm; + $rst =~ s/”/"/gm; + $rst =~ s/‘/'/gm; + $rst =~ s/’/'/gm; + $rst =~ s/QEMU_SYSTEM_MACRO/|qemu_system|/g; + $rst =~ s/QEMU_SYSTEM_X86_MACRO/|qemu_system_x86|/g; + $rst =~ s/(?=::\n\n +\|qemu)/.. parsed-literal/g; + $rst =~ s/:\n\n::$/::/gm; + + # Fix up the invalid reference format makeinfo/pandoc emit: + # `Some string here <#anchorname>`__ + # should be: + # :ref:`anchorname` + $rst =~ s/\`[^<`]+\<\#([^>]+)\>\`__/:ref:`$1`/gm; + print $rst; + + close $fh or die "error on close: $!"; + print "ERST\n"; +} + +# Read the whole .hx input file. +while (<>) { + # Always print the current line + print; + if (/STEXI/) { + $reading_texi = 1; + $texiblock = ''; + next; + } + if (/ETEXI/) { + $reading_texi = 0; + # dump RST version of block + output_rstblock($texiblock); + next; + } + if ($reading_texi) { + # Accumulate the texi into a string + # but drop findex entries as they will confuse makeinfo + next if /^\@findex/; + $texiblock .= $_; + } +} + +die "Unexpectedly still in texi block at EOF" if $reading_texi; diff --git a/scripts/kernel-doc b/scripts/kernel-doc new file mode 100755 index 000000000..240923d50 --- /dev/null +++ b/scripts/kernel-doc @@ -0,0 +1,2442 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 + +use warnings; +use strict; + +## Copyright (c) 1998 Michael Zucchi, All Rights Reserved ## +## Copyright (C) 2000, 1 Tim Waugh <twaugh@redhat.com> ## +## Copyright (C) 2001 Simon Huggins ## +## Copyright (C) 2005-2012 Randy Dunlap ## +## Copyright (C) 2012 Dan Luedtke ## +## ## +## #define enhancements by Armin Kuster <akuster@mvista.com> ## +## Copyright (c) 2000 MontaVista Software, Inc. ## +## ## +## This software falls under the GNU General Public License. ## +## Please read the COPYING file for more information ## + +# 18/01/2001 - Cleanups +# Functions prototyped as foo(void) same as foo() +# Stop eval'ing where we don't need to. +# -- huggie@earth.li + +# 27/06/2001 - Allowed whitespace after initial "/**" and +# allowed comments before function declarations. +# -- Christian Kreibich <ck@whoop.org> + +# Still to do: +# - add perldoc documentation +# - Look more closely at some of the scarier bits :) + +# 26/05/2001 - Support for separate source and object trees. +# Return error code. +# Keith Owens <kaos@ocs.com.au> + +# 23/09/2001 - Added support for typedefs, structs, enums and unions +# Support for Context section; can be terminated using empty line +# Small fixes (like spaces vs. \s in regex) +# -- Tim Jansen <tim@tjansen.de> + +# 25/07/2012 - Added support for HTML5 +# -- Dan Luedtke <mail@danrl.de> + +sub usage { + my $message = <<"EOF"; +Usage: $0 [OPTION ...] FILE ... + +Read C language source or header FILEs, extract embedded documentation comments, +and print formatted documentation to standard output. + +The documentation comments are identified by "/**" opening comment mark. See +Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. + +Output format selection (mutually exclusive): + -man Output troff manual page format. This is the default. + -rst Output reStructuredText format. + -none Do not output documentation, only warnings. + +Output format selection modifier (affects only ReST output): + + -sphinx-version Use the ReST C domain dialect compatible with an + specific Sphinx Version. + If not specified, kernel-doc will auto-detect using + the sphinx-build version found on PATH. + +Output selection (mutually exclusive): + -export Only output documentation for symbols that have been + exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() + in any input FILE or -export-file FILE. + -internal Only output documentation for symbols that have NOT been + exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() + in any input FILE or -export-file FILE. + -function NAME Only output documentation for the given function(s) + or DOC: section title(s). All other functions and DOC: + sections are ignored. May be specified multiple times. + -nosymbol NAME Exclude the specified symbols from the output + documentation. May be specified multiple times. + +Output selection modifiers: + -no-doc-sections Do not output DOC: sections. + -enable-lineno Enable output of #define LINENO lines. Only works with + reStructuredText format. + -export-file FILE Specify an additional FILE in which to look for + EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with + -export or -internal. May be specified multiple times. + +Other parameters: + -v Verbose output, more warnings and other information. + -h Print this help. + -Werror Treat warnings as errors. + +EOF + print $message; + exit 1; +} + +# +# format of comments. +# In the following table, (...)? signifies optional structure. +# (...)* signifies 0 or more structure elements +# /** +# * function_name(:)? (- short description)? +# (* @parameterx: (description of parameter x)?)* +# (* a blank line)? +# * (Description:)? (Description of function)? +# * (section header: (section description)? )* +# (*)?*/ +# +# So .. the trivial example would be: +# +# /** +# * my_function +# */ +# +# If the Description: header tag is omitted, then there must be a blank line +# after the last parameter specification. +# e.g. +# /** +# * my_function - does my stuff +# * @my_arg: its mine damnit +# * +# * Does my stuff explained. +# */ +# +# or, could also use: +# /** +# * my_function - does my stuff +# * @my_arg: its mine damnit +# * Description: Does my stuff explained. +# */ +# etc. +# +# Besides functions you can also write documentation for structs, unions, +# enums and typedefs. Instead of the function name you must write the name +# of the declaration; the struct/union/enum/typedef must always precede +# the name. Nesting of declarations is not supported. +# Use the argument mechanism to document members or constants. +# e.g. +# /** +# * struct my_struct - short description +# * @a: first member +# * @b: second member +# * +# * Longer description +# */ +# struct my_struct { +# int a; +# int b; +# /* private: */ +# int c; +# }; +# +# All descriptions can be multiline, except the short function description. +# +# For really longs structs, you can also describe arguments inside the +# body of the struct. +# eg. +# /** +# * struct my_struct - short description +# * @a: first member +# * @b: second member +# * +# * Longer description +# */ +# struct my_struct { +# int a; +# int b; +# /** +# * @c: This is longer description of C +# * +# * You can use paragraphs to describe arguments +# * using this method. +# */ +# int c; +# }; +# +# This should be use only for struct/enum members. +# +# You can also add additional sections. When documenting kernel functions you +# should document the "Context:" of the function, e.g. whether the functions +# can be called form interrupts. Unlike other sections you can end it with an +# empty line. +# A non-void function should have a "Return:" section describing the return +# value(s). +# Example-sections should contain the string EXAMPLE so that they are marked +# appropriately in DocBook. +# +# Example: +# /** +# * user_function - function that can only be called in user context +# * @a: some argument +# * Context: !in_interrupt() +# * +# * Some description +# * Example: +# * user_function(22); +# */ +# ... +# +# +# All descriptive text is further processed, scanning for the following special +# patterns, which are highlighted appropriately. +# +# 'funcname()' - function +# '$ENVVAR' - environmental variable +# '&struct_name' - name of a structure (up to two words including 'struct') +# '&struct_name.member' - name of a structure member +# '@parameter' - name of a parameter +# '%CONST' - name of a constant. +# '``LITERAL``' - literal string without any spaces on it. + +## init lots of data + +my $errors = 0; +my $warnings = 0; +my $anon_struct_union = 0; + +# match expressions used to find embedded type information +my $type_constant = '\b``([^\`]+)``\b'; +my $type_constant2 = '\%([-_\w]+)'; +my $type_func = '(\w+)\(\)'; +my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; +my $type_param_ref = '([\!]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; +my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params +my $type_fp_param2 = '\@(\w+->\S+)\(\)'; # Special RST handling for structs with func ptr params +my $type_env = '(\$\w+)'; +my $type_enum = '#(enum\s*([_\w]+))'; +my $type_struct = '#(struct\s*([_\w]+))'; +my $type_typedef = '#(([A-Z][_\w]*))'; +my $type_union = '#(union\s*([_\w]+))'; +my $type_member = '#([_\w]+)(\.|->)([_\w]+)'; +my $type_fallback = '(?!)'; # this never matches +my $type_member_func = $type_member . '\(\)'; + +# Output conversion substitutions. +# One for each output format + +# these are pretty rough +my @highlights_man = ( + [$type_constant, "\$1"], + [$type_constant2, "\$1"], + [$type_func, "\\\\fB\$1\\\\fP"], + [$type_enum, "\\\\fI\$1\\\\fP"], + [$type_struct, "\\\\fI\$1\\\\fP"], + [$type_typedef, "\\\\fI\$1\\\\fP"], + [$type_union, "\\\\fI\$1\\\\fP"], + [$type_param, "\\\\fI\$1\\\\fP"], + [$type_param_ref, "\\\\fI\$1\$2\\\\fP"], + [$type_member, "\\\\fI\$1\$2\$3\\\\fP"], + [$type_fallback, "\\\\fI\$1\\\\fP"] + ); +my $blankline_man = ""; + +# rst-mode +my @highlights_rst = ( + [$type_constant, "``\$1``"], + [$type_constant2, "``\$1``"], + # Note: need to escape () to avoid func matching later + [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"], + [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"], + [$type_fp_param, "**\$1\\\\(\\\\)**"], + [$type_fp_param2, "**\$1\\\\(\\\\)**"], + [$type_func, "\$1()"], + [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"], + # in rst this can refer to any type + [$type_fallback, "\\:c\\:type\\:`\$1`"], + [$type_param_ref, "**\$1\$2**"] + ); +my $blankline_rst = "\n"; + +# read arguments +if ($#ARGV == -1) { + usage(); +} + +my $kernelversion; +my ($sphinx_major, $sphinx_minor, $sphinx_patch); + +my $dohighlight = ""; + +my $verbose = 0; +my $Werror = 0; +my $output_mode = "rst"; +my $output_preformatted = 0; +my $no_doc_sections = 0; +my $enable_lineno = 0; +my @highlights = @highlights_rst; +my $blankline = $blankline_rst; +my $modulename = "Kernel API"; + +use constant { + OUTPUT_ALL => 0, # output all symbols and doc sections + OUTPUT_INCLUDE => 1, # output only specified symbols + OUTPUT_EXPORTED => 2, # output exported symbols + OUTPUT_INTERNAL => 3, # output non-exported symbols +}; +my $output_selection = OUTPUT_ALL; +my $show_not_found = 0; # No longer used + +my @export_file_list; + +my @build_time; +if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) && + (my $seconds = `date -d"${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') { + @build_time = gmtime($seconds); +} else { + @build_time = localtime; +} + +my $man_date = ('January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', + 'November', 'December')[$build_time[4]] . + " " . ($build_time[5]+1900); + +# Essentially these are globals. +# They probably want to be tidied up, made more localised or something. +# CAVEAT EMPTOR! Some of the others I localised may not want to be, which +# could cause "use of undefined value" or other bugs. +my ($function, %function_table, %parametertypes, $declaration_purpose); +my %nosymbol_table = (); +my $declaration_start_line; +my ($type, $declaration_name, $return_type); +my ($newsection, $newcontents, $prototype, $brcount, %source_map); + +if (defined($ENV{'KBUILD_VERBOSE'})) { + $verbose = "$ENV{'KBUILD_VERBOSE'}"; +} + +if (defined($ENV{'KDOC_WERROR'})) { + $Werror = "$ENV{'KDOC_WERROR'}"; +} + +if (defined($ENV{'KCFLAGS'})) { + my $kcflags = "$ENV{'KCFLAGS'}"; + + if ($kcflags =~ /Werror/) { + $Werror = 1; + } +} + +# Generated docbook code is inserted in a template at a point where +# docbook v3.1 requires a non-zero sequence of RefEntry's; see: +# https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html +# We keep track of number of generated entries and generate a dummy +# if needs be to ensure the expanded template can be postprocessed +# into html. +my $section_counter = 0; + +my $lineprefix=""; + +# Parser states +use constant { + STATE_NORMAL => 0, # normal code + STATE_NAME => 1, # looking for function name + STATE_BODY_MAYBE => 2, # body - or maybe more description + STATE_BODY => 3, # the body of the comment + STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line + STATE_PROTO => 5, # scanning prototype + STATE_DOCBLOCK => 6, # documentation block + STATE_INLINE => 7, # gathering doc outside main block +}; +my $state; +my $in_doc_sect; +my $leading_space; + +# Inline documentation state +use constant { + STATE_INLINE_NA => 0, # not applicable ($state != STATE_INLINE) + STATE_INLINE_NAME => 1, # looking for member name (@foo:) + STATE_INLINE_TEXT => 2, # looking for member documentation + STATE_INLINE_END => 3, # done + STATE_INLINE_ERROR => 4, # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. +}; +my $inline_doc_state; + +#declaration types: can be +# 'function', 'struct', 'union', 'enum', 'typedef' +my $decl_type; + +my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start. +my $doc_end = '\*/'; +my $doc_com = '\s*\*\s*'; +my $doc_com_body = '\s*\* ?'; +my $doc_decl = $doc_com . '(\w+)'; +# @params and a strictly limited set of supported section names +my $doc_sect = $doc_com . + '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:(.*)'; +my $doc_content = $doc_com_body . '(.*)'; +my $doc_block = $doc_com . 'DOC:\s*(.*)?'; +my $doc_inline_start = '^\s*/\*\*\s*$'; +my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; +my $doc_inline_end = '^\s*\*/\s*$'; +my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; +my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; + +my %parameterdescs; +my %parameterdesc_start_lines; +my @parameterlist; +my %sections; +my @sectionlist; +my %section_start_lines; +my $sectcheck; +my $struct_actual; + +my $contents = ""; +my $new_start_line = 0; + +# the canonical section names. see also $doc_sect above. +my $section_default = "Description"; # default section +my $section_intro = "Introduction"; +my $section = $section_default; +my $section_context = "Context"; +my $section_return = "Return"; + +my $undescribed = "-- undescribed --"; + +reset_state(); + +while ($ARGV[0] =~ m/^--?(.*)/) { + my $cmd = $1; + shift @ARGV; + if ($cmd eq "man") { + $output_mode = "man"; + @highlights = @highlights_man; + $blankline = $blankline_man; + } elsif ($cmd eq "rst") { + $output_mode = "rst"; + @highlights = @highlights_rst; + $blankline = $blankline_rst; + } elsif ($cmd eq "none") { + $output_mode = "none"; + } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document + $modulename = shift @ARGV; + } elsif ($cmd eq "function") { # to only output specific functions + $output_selection = OUTPUT_INCLUDE; + $function = shift @ARGV; + $function_table{$function} = 1; + } elsif ($cmd eq "nosymbol") { # Exclude specific symbols + my $symbol = shift @ARGV; + $nosymbol_table{$symbol} = 1; + } elsif ($cmd eq "export") { # only exported symbols + $output_selection = OUTPUT_EXPORTED; + %function_table = (); + } elsif ($cmd eq "internal") { # only non-exported symbols + $output_selection = OUTPUT_INTERNAL; + %function_table = (); + } elsif ($cmd eq "export-file") { + my $file = shift @ARGV; + push(@export_file_list, $file); + } elsif ($cmd eq "v") { + $verbose = 1; + } elsif ($cmd eq "Werror") { + $Werror = 1; + } elsif (($cmd eq "h") || ($cmd eq "help")) { + usage(); + } elsif ($cmd eq 'no-doc-sections') { + $no_doc_sections = 1; + } elsif ($cmd eq 'enable-lineno') { + $enable_lineno = 1; + } elsif ($cmd eq 'show-not-found') { + $show_not_found = 1; # A no-op but don't fail + } elsif ($cmd eq "sphinx-version") { + my $ver_string = shift @ARGV; + if ($ver_string =~ m/^(\d+)(\.\d+)?(\.\d+)?/) { + $sphinx_major = $1; + if (defined($2)) { + $sphinx_minor = substr($2,1); + } else { + $sphinx_minor = 0; + } + if (defined($3)) { + $sphinx_patch = substr($3,1) + } else { + $sphinx_patch = 0; + } + } else { + die "Sphinx version should either major.minor or major.minor.patch format\n"; + } + } else { + # Unknown argument + usage(); + } +} + +# continue execution near EOF; + +# The C domain dialect changed on Sphinx 3. So, we need to check the +# version in order to produce the right tags. +sub findprog($) +{ + foreach(split(/:/, $ENV{PATH})) { + return "$_/$_[0]" if(-x "$_/$_[0]"); + } +} + +sub get_sphinx_version() +{ + my $ver; + + my $cmd = "sphinx-build"; + if (!findprog($cmd)) { + my $cmd = "sphinx-build3"; + if (!findprog($cmd)) { + $sphinx_major = 1; + $sphinx_minor = 2; + $sphinx_patch = 0; + printf STDERR "Warning: Sphinx version not found. Using default (Sphinx version %d.%d.%d)\n", + $sphinx_major, $sphinx_minor, $sphinx_patch; + return; + } + } + + open IN, "$cmd --version 2>&1 |"; + while (<IN>) { + if (m/^\s*sphinx-build\s+([\d]+)\.([\d\.]+)(\+\/[\da-f]+)?$/) { + $sphinx_major = $1; + $sphinx_minor = $2; + $sphinx_patch = $3; + last; + } + # Sphinx 1.2.x uses a different format + if (m/^\s*Sphinx.*\s+([\d]+)\.([\d\.]+)$/) { + $sphinx_major = $1; + $sphinx_minor = $2; + $sphinx_patch = $3; + last; + } + } + close IN; +} + +# get kernel version from env +sub get_kernel_version() { + my $version = 'unknown kernel version'; + + if (defined($ENV{'KERNELVERSION'})) { + $version = $ENV{'KERNELVERSION'}; + } + return $version; +} + +# +sub print_lineno { + my $lineno = shift; + if ($enable_lineno && defined($lineno)) { + print "#define LINENO " . $lineno . "\n"; + } +} +## +# dumps section contents to arrays/hashes intended for that purpose. +# +sub dump_section { + my $file = shift; + my $name = shift; + my $contents = join "\n", @_; + + if ($name =~ m/$type_param/) { + $name = $1; + $parameterdescs{$name} = $contents; + $sectcheck = $sectcheck . $name . " "; + $parameterdesc_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } elsif ($name eq "@\.\.\.") { + $name = "..."; + $parameterdescs{$name} = $contents; + $sectcheck = $sectcheck . $name . " "; + $parameterdesc_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } else { + if (defined($sections{$name}) && ($sections{$name} ne "")) { + # Only warn on user specified duplicate section names. + if ($name ne $section_default) { + print STDERR "${file}:$.: warning: duplicate section name '$name'\n"; + ++$warnings; + } + $sections{$name} .= $contents; + } else { + $sections{$name} = $contents; + push @sectionlist, $name; + $section_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } + } +} + +## +# dump DOC: section after checking that it should go out +# +sub dump_doc_section { + my $file = shift; + my $name = shift; + my $contents = join "\n", @_; + + if ($no_doc_sections) { + return; + } + + return if (defined($nosymbol_table{$name})); + + if (($output_selection == OUTPUT_ALL) || + (($output_selection == OUTPUT_INCLUDE) && + defined($function_table{$name}))) + { + dump_section($file, $name, $contents); + output_blockhead({'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'module' => $modulename, + 'content-only' => ($output_selection != OUTPUT_ALL), }); + } +} + +## +# output function +# +# parameterdescs, a hash. +# function => "function name" +# parameterlist => @list of parameters +# parameterdescs => %parameter descriptions +# sectionlist => @list of sections +# sections => %section descriptions +# + +sub output_highlight { + my $contents = join "\n",@_; + my $line; + +# DEBUG +# if (!defined $contents) { +# use Carp; +# confess "output_highlight got called with no args?\n"; +# } + +# print STDERR "contents b4:$contents\n"; + eval $dohighlight; + die $@ if $@; +# print STDERR "contents af:$contents\n"; + + foreach $line (split "\n", $contents) { + if (! $output_preformatted) { + $line =~ s/^\s*//; + } + if ($line eq ""){ + if (! $output_preformatted) { + print $lineprefix, $blankline; + } + } else { + if ($output_mode eq "man" && substr($line, 0, 1) eq ".") { + print "\\&$line"; + } else { + print $lineprefix, $line; + } + } + print "\n"; + } +} + +## +# output function in man +sub output_function_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + + print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n"; + + print ".SH NAME\n"; + print $args{'function'} . " \\- " . $args{'purpose'} . "\n"; + + print ".SH SYNOPSIS\n"; + if ($args{'functiontype'} ne "") { + print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n"; + } else { + print ".B \"" . $args{'function'} . "\n"; + } + $count = 0; + my $parenth = "("; + my $post = ","; + foreach my $parameter (@{$args{'parameterlist'}}) { + if ($count == $#{$args{'parameterlist'}}) { + $post = ");"; + } + $type = $args{'parametertypes'}{$parameter}; + if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + # pointer-to-function + print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; + } else { + $type =~ s/([^\*])$/$1 /; + print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n"; + } + $count++; + $parenth = ""; + } + + print ".SH ARGUMENTS\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"", uc $section, "\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output enum in man +sub output_enum_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + + print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n"; + + print ".SH SYNOPSIS\n"; + print "enum " . $args{'enum'} . " {\n"; + $count = 0; + foreach my $parameter (@{$args{'parameterlist'}}) { + print ".br\n.BI \" $parameter\"\n"; + if ($count == $#{$args{'parameterlist'}}) { + print "\n};\n"; + last; + } + else { + print ", \n.br\n"; + } + $count++; + } + + print ".SH Constants\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output struct in man +sub output_struct_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n"; + + my $declaration = $args{'definition'}; + $declaration =~ s/\t/ /g; + $declaration =~ s/\n/"\n.br\n.BI \"/g; + print ".SH SYNOPSIS\n"; + print $args{'type'} . " " . $args{'struct'} . " {\n.br\n"; + print ".BI \"$declaration\n};\n.br\n\n"; + + print ".SH Members\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + ($parameter =~ /^#/) && next; + + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output typedef in man +sub output_typedef_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n"; + + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +sub output_blockhead_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + + print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output in restructured text +# + +# +# This could use some work; it's used to output the DOC: sections, and +# starts by putting out the name of the doc section itself, but that tends +# to duplicate a header already in the template file. +# +sub output_blockhead_rst(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + foreach $section (@{$args{'sectionlist'}}) { + next if (defined($nosymbol_table{$section})); + + if ($output_selection != OUTPUT_INCLUDE) { + print "**$section**\n\n"; + } + print_lineno($section_start_lines{$section}); + output_highlight_rst($args{'sections'}{$section}); + print "\n"; + } +} + +# +# Apply the RST highlights to a sub-block of text. +# +sub highlight_block($) { + # The dohighlight kludge requires the text be called $contents + my $contents = shift; + eval $dohighlight; + die $@ if $@; + return $contents; +} + +# +# Regexes used only here. +# +my $sphinx_literal = '^[^.].*::$'; +my $sphinx_cblock = '^\.\.\ +code-block::'; + +sub output_highlight_rst { + my $input = join "\n",@_; + my $output = ""; + my $line; + my $in_literal = 0; + my $litprefix; + my $block = ""; + + foreach $line (split "\n",$input) { + # + # If we're in a literal block, see if we should drop out + # of it. Otherwise pass the line straight through unmunged. + # + if ($in_literal) { + if (! ($line =~ /^\s*$/)) { + # + # If this is the first non-blank line in a literal + # block we need to figure out what the proper indent is. + # + if ($litprefix eq "") { + $line =~ /^(\s*)/; + $litprefix = '^' . $1; + $output .= $line . "\n"; + } elsif (! ($line =~ /$litprefix/)) { + $in_literal = 0; + } else { + $output .= $line . "\n"; + } + } else { + $output .= $line . "\n"; + } + } + # + # Not in a literal block (or just dropped out) + # + if (! $in_literal) { + $block .= $line . "\n"; + if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) { + $in_literal = 1; + $litprefix = ""; + $output .= highlight_block($block); + $block = "" + } + } + } + + if ($block) { + $output .= highlight_block($block); + } + foreach $line (split "\n", $output) { + print $lineprefix . $line . "\n"; + } +} + +sub output_function_rst(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $oldprefix = $lineprefix; + my $start = ""; + my $is_macro = 0; + + if ($sphinx_major < 3) { + if ($args{'typedef'}) { + print ".. c:type:: ". $args{'function'} . "\n\n"; + print_lineno($declaration_start_line); + print " **Typedef**: "; + $lineprefix = ""; + output_highlight_rst($args{'purpose'}); + $start = "\n\n**Syntax**\n\n ``"; + $is_macro = 1; + } else { + print ".. c:function:: "; + } + } else { + if ($args{'typedef'} || $args{'functiontype'} eq "") { + $is_macro = 1; + print ".. c:macro:: ". $args{'function'} . "\n\n"; + } else { + print ".. c:function:: "; + } + + if ($args{'typedef'}) { + print_lineno($declaration_start_line); + print " **Typedef**: "; + $lineprefix = ""; + output_highlight_rst($args{'purpose'}); + $start = "\n\n**Syntax**\n\n ``"; + } else { + print "``" if ($is_macro); + } + } + if ($args{'functiontype'} ne "") { + $start .= $args{'functiontype'} . " " . $args{'function'} . " ("; + } else { + $start .= $args{'function'} . " ("; + } + print $start; + + my $count = 0; + foreach my $parameter (@{$args{'parameterlist'}}) { + if ($count ne 0) { + print ", "; + } + $count++; + $type = $args{'parametertypes'}{$parameter}; + + if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + # pointer-to-function + print $1 . $parameter . ") (" . $2 . ")"; + } else { + print $type; + } + } + if ($is_macro) { + print ")``\n\n"; + } else { + print ")\n\n"; + } + if (!$args{'typedef'}) { + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + } + + print "**Parameters**\n\n"; + $lineprefix = " "; + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + $type = $args{'parametertypes'}{$parameter}; + + if ($type ne "") { + print "``$type``\n"; + } else { + print "``$parameter``\n"; + } + + print_lineno($parameterdesc_start_lines{$parameter_name}); + + if (defined($args{'parameterdescs'}{$parameter_name}) && + $args{'parameterdescs'}{$parameter_name} ne $undescribed) { + output_highlight_rst($args{'parameterdescs'}{$parameter_name}); + } else { + print " *undescribed*\n"; + } + print "\n"; + } + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +sub output_section_rst(%) { + my %args = %{$_[0]}; + my $section; + my $oldprefix = $lineprefix; + $lineprefix = ""; + + foreach $section (@{$args{'sectionlist'}}) { + print "**$section**\n\n"; + print_lineno($section_start_lines{$section}); + output_highlight_rst($args{'sections'}{$section}); + print "\n"; + } + print "\n"; + $lineprefix = $oldprefix; +} + +sub output_enum_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + my $count; + + if ($sphinx_major < 3) { + my $name = "enum " . $args{'enum'}; + print "\n\n.. c:type:: " . $name . "\n\n"; + } else { + my $name = $args{'enum'}; + print "\n\n.. c:enum:: " . $name . "\n\n"; + } + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + print "**Constants**\n\n"; + $lineprefix = " "; + foreach $parameter (@{$args{'parameterlist'}}) { + print "``$parameter``\n"; + if ($args{'parameterdescs'}{$parameter} ne $undescribed) { + output_highlight_rst($args{'parameterdescs'}{$parameter}); + } else { + print " *undescribed*\n"; + } + print "\n"; + } + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +sub output_typedef_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + my $name; + + if ($sphinx_major < 3) { + $name = "typedef " . $args{'typedef'}; + } else { + $name = $args{'typedef'}; + } + print "\n\n.. c:type:: " . $name . "\n\n"; + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +sub output_struct_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + + if ($sphinx_major < 3) { + my $name = $args{'type'} . " " . $args{'struct'}; + print "\n\n.. c:type:: " . $name . "\n\n"; + } else { + my $name = $args{'struct'}; + if ($args{'type'} eq 'union') { + print "\n\n.. c:union:: " . $name . "\n\n"; + } else { + print "\n\n.. c:struct:: " . $name . "\n\n"; + } + } + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + print "**Definition**\n\n"; + print "::\n\n"; + my $declaration = $args{'definition'}; + $declaration =~ s/\t/ /g; + print " " . $args{'type'} . " " . $args{'struct'} . " {\n$declaration };\n\n"; + + print "**Members**\n\n"; + $lineprefix = " "; + foreach $parameter (@{$args{'parameterlist'}}) { + ($parameter =~ /^#/) && next; + + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; + $type = $args{'parametertypes'}{$parameter}; + print_lineno($parameterdesc_start_lines{$parameter_name}); + print "``" . $parameter . "``\n"; + output_highlight_rst($args{'parameterdescs'}{$parameter_name}); + print "\n"; + } + print "\n"; + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +## none mode output functions + +sub output_function_none(%) { +} + +sub output_enum_none(%) { +} + +sub output_typedef_none(%) { +} + +sub output_struct_none(%) { +} + +sub output_blockhead_none(%) { +} + +## +# generic output function for all types (function, struct/union, typedef, enum); +# calls the generated, variable output_ function name based on +# functype and output_mode +sub output_declaration { + no strict 'refs'; + my $name = shift; + my $functype = shift; + my $func = "output_${functype}_$output_mode"; + + return if (defined($nosymbol_table{$name})); + + if (($output_selection == OUTPUT_ALL) || + (($output_selection == OUTPUT_INCLUDE || + $output_selection == OUTPUT_EXPORTED) && + defined($function_table{$name})) || + ($output_selection == OUTPUT_INTERNAL && + !($functype eq "function" && defined($function_table{$name})))) + { + &$func(@_); + $section_counter++; + } +} + +## +# generic output function - calls the right one based on current output mode. +sub output_blockhead { + no strict 'refs'; + my $func = "output_blockhead_" . $output_mode; + &$func(@_); + $section_counter++; +} + +## +# takes a declaration (struct, union, enum, typedef) and +# invokes the right handler. NOT called for functions. +sub dump_declaration($$) { + no strict 'refs'; + my ($prototype, $file) = @_; + my $func = "dump_" . $decl_type; + &$func(@_); +} + +sub dump_union($$) { + dump_struct(@_); +} + +sub dump_struct($$) { + my $x = shift; + my $file = shift; + + if ($x =~ /(struct|union)\s+(\w+)\s*\{(.*)\}(\s*(__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*/) { + my $decl_type = $1; + $declaration_name = $2; + my $members = $3; + + # ignore members marked private: + $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; + $members =~ s/\/\*\s*private:.*//gosi; + # strip comments: + $members =~ s/\/\*.*?\*\///gos; + # strip attributes + $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; + $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; + $members =~ s/\s*__packed\s*/ /gos; + $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; + $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; + $members =~ s/\s*____cacheline_aligned/ /gos; + + # replace DECLARE_BITMAP + $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; + $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + # replace DECLARE_HASHTABLE + $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + # replace DECLARE_KFIFO + $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + # replace DECLARE_KFIFO_PTR + $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + + my $declaration = $members; + + # Split nested struct/union elements as newer ones + while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + my $newmember; + my $maintype = $1; + my $ids = $4; + my $content = $3; + foreach my $id(split /,/, $ids) { + $newmember .= "$maintype $id; "; + + $id =~ s/[:\[].*//; + $id =~ s/^\s*\**(\S+)\s*/$1/; + foreach my $arg (split /;/, $content) { + next if ($arg =~ m/^\s*$/); + if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) { + # pointer-to-function + my $type = $1; + my $name = $2; + my $extra = $3; + next if (!$name); + if ($id =~ m/^\s*$/) { + # anonymous struct/union + $newmember .= "$type$name$extra; "; + } else { + $newmember .= "$type$id.$name$extra; "; + } + } else { + my $type; + my $names; + $arg =~ s/^\s+//; + $arg =~ s/\s+$//; + # Handle bitmaps + $arg =~ s/:\s*\d+\s*//g; + # Handle arrays + $arg =~ s/\[.*\]//g; + # The type may have multiple words, + # and multiple IDs can be defined, like: + # const struct foo, *bar, foobar + # So, we remove spaces when parsing the + # names, in order to match just names + # and commas for the names + $arg =~ s/\s*,\s*/,/g; + if ($arg =~ m/(.*)\s+([\S+,]+)/) { + $type = $1; + $names = $2; + } else { + $newmember .= "$arg; "; + next; + } + foreach my $name (split /,/, $names) { + $name =~ s/^\s*\**(\S+)\s*/$1/; + next if (($name =~ m/^\s*$/)); + if ($id =~ m/^\s*$/) { + # anonymous struct/union + $newmember .= "$type $name; "; + } else { + $newmember .= "$type $id.$name; "; + } + } + } + } + } + $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + } + + # Ignore other nested elements, like enums + $members =~ s/(\{[^\{\}]*\})//g; + + create_parameterlist($members, ';', $file, $declaration_name); + check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual); + + # Adjust declaration for better display + $declaration =~ s/([\{;])/$1\n/g; + $declaration =~ s/\}\s+;/};/g; + # Better handle inlined enums + do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/); + + my @def_args = split /\n/, $declaration; + my $level = 1; + $declaration = ""; + foreach my $clause (@def_args) { + $clause =~ s/^\s+//; + $clause =~ s/\s+$//; + $clause =~ s/\s+/ /; + next if (!$clause); + $level-- if ($clause =~ m/(\})/ && $level > 1); + if (!($clause =~ m/^\s*#/)) { + $declaration .= "\t" x $level; + } + $declaration .= "\t" . $clause . "\n"; + $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/)); + } + output_declaration($declaration_name, + 'struct', + {'struct' => $declaration_name, + 'module' => $modulename, + 'definition' => $declaration, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose, + 'type' => $decl_type + }); + } + else { + print STDERR "${file}:$.: error: Cannot parse struct or union!\n"; + ++$errors; + } +} + + +sub show_warnings($$) { + my $functype = shift; + my $name = shift; + + return 0 if (defined($nosymbol_table{$name})); + + return 1 if ($output_selection == OUTPUT_ALL); + + if ($output_selection == OUTPUT_EXPORTED) { + if (defined($function_table{$name})) { + return 1; + } else { + return 0; + } + } + if ($output_selection == OUTPUT_INTERNAL) { + if (!($functype eq "function" && defined($function_table{$name}))) { + return 1; + } else { + return 0; + } + } + if ($output_selection == OUTPUT_INCLUDE) { + if (defined($function_table{$name})) { + return 1; + } else { + return 0; + } + } + die("Please add the new output type at show_warnings()"); +} + +sub dump_enum($$) { + my $x = shift; + my $file = shift; + my $members; + + + $x =~ s@/\*.*?\*/@@gos; # strip comments. + # strip #define macros inside enums + $x =~ s@#\s*((define|ifdef)\s+|endif)[^;]*;@@gos; + + if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) { + $declaration_name = $2; + $members = $1; + } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) { + $declaration_name = $1; + $members = $2; + } + + if ($declaration_name) { + my %_members; + + $members =~ s/\s+$//; + + foreach my $arg (split ',', $members) { + $arg =~ s/^\s*(\w+).*/$1/; + push @parameterlist, $arg; + if (!$parameterdescs{$arg}) { + $parameterdescs{$arg} = $undescribed; + if (show_warnings("enum", $declaration_name)) { + print STDERR "${file}:$.: warning: Enum value '$arg' not described in enum '$declaration_name'\n"; + } + } + $_members{$arg} = 1; + } + + while (my ($k, $v) = each %parameterdescs) { + if (!exists($_members{$k})) { + if (show_warnings("enum", $declaration_name)) { + print STDERR "${file}:$.: warning: Excess enum value '$k' description in '$declaration_name'\n"; + } + } + } + + output_declaration($declaration_name, + 'enum', + {'enum' => $declaration_name, + 'module' => $modulename, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + } else { + print STDERR "${file}:$.: error: Cannot parse enum!\n"; + ++$errors; + } +} + +my $typedef_type = qr { ((?:\s+[\w\*]+){1,8})\s* }x; +my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x; +my $typedef_args = qr { \s*\((.*)\); }x; + +my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x; +my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x; + +sub dump_typedef($$) { + my $x = shift; + my $file = shift; + + $x =~ s@/\*.*?\*/@@gos; # strip comments. + + # Parse function typedef prototypes + if ($x =~ $typedef1 || $x =~ $typedef2) { + $return_type = $1; + $declaration_name = $2; + my $args = $3; + $return_type =~ s/^\s+//; + + create_parameterlist($args, ',', $file, $declaration_name); + + output_declaration($declaration_name, + 'function', + {'function' => $declaration_name, + 'typedef' => 1, + 'module' => $modulename, + 'functiontype' => $return_type, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + return; + } + + while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) { + $x =~ s/\(*.\)\s*;$/;/; + $x =~ s/\[*.\]\s*;$/;/; + } + + if ($x =~ /typedef.*\s+(\w+)\s*;/) { + $declaration_name = $1; + + output_declaration($declaration_name, + 'typedef', + {'typedef' => $declaration_name, + 'module' => $modulename, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + } + else { + print STDERR "${file}:$.: error: Cannot parse typedef!\n"; + ++$errors; + } +} + +sub save_struct_actual($) { + my $actual = shift; + + # strip all spaces from the actual param so that it looks like one string item + $actual =~ s/\s*//g; + $struct_actual = $struct_actual . $actual . " "; +} + +sub create_parameterlist($$$$) { + my $args = shift; + my $splitter = shift; + my $file = shift; + my $declaration_name = shift; + my $type; + my $param; + + # temporarily replace commas inside function pointer definition + while ($args =~ /(\([^\),]+),/) { + $args =~ s/(\([^\),]+),/$1#/g; + } + + foreach my $arg (split($splitter, $args)) { + # strip comments + $arg =~ s/\/\*.*\*\///; + # strip leading/trailing spaces + $arg =~ s/^\s*//; + $arg =~ s/\s*$//; + $arg =~ s/\s+/ /; + + if ($arg =~ /^#/) { + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + push_parameter($arg, "", "", $file); + } elsif ($arg =~ m/\(.+\)\s*\(/) { + # pointer-to-function + $arg =~ tr/#/,/; + $arg =~ m/[^\(]+\(\*?\s*([\w\.]*)\s*\)/; + $param = $1; + $type = $arg; + $type =~ s/([^\(]+\(\*?)\s*$param/$1/; + save_struct_actual($param); + push_parameter($param, $type, $arg, $file, $declaration_name); + } elsif ($arg) { + $arg =~ s/\s*:\s*/:/g; + $arg =~ s/\s*\[/\[/g; + + my @args = split('\s*,\s*', $arg); + if ($args[0] =~ m/\*/) { + $args[0] =~ s/(\*+)\s*/ $1/; + } + + my @first_arg; + if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) { + shift @args; + push(@first_arg, split('\s+', $1)); + push(@first_arg, $2); + } else { + @first_arg = split('\s+', shift @args); + } + + unshift(@args, pop @first_arg); + $type = join " ", @first_arg; + + foreach $param (@args) { + if ($param =~ m/^(\*+)\s*(.*)/) { + save_struct_actual($2); + + push_parameter($2, "$type $1", $arg, $file, $declaration_name); + } + elsif ($param =~ m/(.*?):(\d+)/) { + if ($type ne "") { # skip unnamed bit-fields + save_struct_actual($1); + push_parameter($1, "$type:$2", $arg, $file, $declaration_name) + } + } + else { + save_struct_actual($param); + push_parameter($param, $type, $arg, $file, $declaration_name); + } + } + } + } +} + +sub push_parameter($$$$$) { + my $param = shift; + my $type = shift; + my $org_arg = shift; + my $file = shift; + my $declaration_name = shift; + + if (($anon_struct_union == 1) && ($type eq "") && + ($param eq "}")) { + return; # ignore the ending }; from anon. struct/union + } + + $anon_struct_union = 0; + $param =~ s/[\[\)].*//; + + if ($type eq "" && $param =~ /\.\.\.$/) + { + if (!$param =~ /\w\.\.\.$/) { + # handles unnamed variable parameters + $param = "..."; + } + elsif ($param =~ /\w\.\.\.$/) { + # for named variable parameters of the form `x...`, remove the dots + $param =~ s/\.\.\.$//; + } + if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") { + $parameterdescs{$param} = "variable arguments"; + } + } + elsif ($type eq "" && ($param eq "" or $param eq "void")) + { + $param="void"; + $parameterdescs{void} = "no arguments"; + } + elsif ($type eq "" && ($param eq "struct" or $param eq "union")) + # handle unnamed (anonymous) union or struct: + { + $type = $param; + $param = "{unnamed_" . $param . "}"; + $parameterdescs{$param} = "anonymous\n"; + $anon_struct_union = 1; + } + + # warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements); + # Note: It will also ignore void params and unnamed structs/unions + if (!defined $parameterdescs{$param} && $param !~ /^#/) { + $parameterdescs{$param} = $undescribed; + + if (show_warnings($type, $declaration_name) && $param !~ /\./) { + print STDERR + "${file}:$.: warning: Function parameter or member '$param' not described in '$declaration_name'\n"; + ++$warnings; + } + } + + # strip spaces from $param so that it is one continuous string + # on @parameterlist; + # this fixes a problem where check_sections() cannot find + # a parameter like "addr[6 + 2]" because it actually appears + # as "addr[6", "+", "2]" on the parameter list; + # but it's better to maintain the param string unchanged for output, + # so just weaken the string compare in check_sections() to ignore + # "[blah" in a parameter string; + ###$param =~ s/\s*//g; + push @parameterlist, $param; + $org_arg =~ s/\s\s+/ /g; + $parametertypes{$param} = $org_arg; +} + +sub check_sections($$$$$) { + my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_; + my @sects = split ' ', $sectcheck; + my @prms = split ' ', $prmscheck; + my $err; + my ($px, $sx); + my $prm_clean; # strip trailing "[array size]" and/or beginning "*" + + foreach $sx (0 .. $#sects) { + $err = 1; + foreach $px (0 .. $#prms) { + $prm_clean = $prms[$px]; + $prm_clean =~ s/\[.*\]//; + $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; + # ignore array size in a parameter string; + # however, the original param string may contain + # spaces, e.g.: addr[6 + 2] + # and this appears in @prms as "addr[6" since the + # parameter list is split at spaces; + # hence just ignore "[..." for the sections check; + $prm_clean =~ s/\[.*//; + + ##$prm_clean =~ s/^\**//; + if ($prm_clean eq $sects[$sx]) { + $err = 0; + last; + } + } + if ($err) { + if ($decl_type eq "function") { + print STDERR "${file}:$.: warning: " . + "Excess function parameter " . + "'$sects[$sx]' " . + "description in '$decl_name'\n"; + ++$warnings; + } + } + } +} + +## +# Checks the section describing the return value of a function. +sub check_return_section { + my $file = shift; + my $declaration_name = shift; + my $return_type = shift; + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type. (But don't ignore "void *") + if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) { + return; + } + + if (!defined($sections{$section_return}) || + $sections{$section_return} eq "") { + print STDERR "${file}:$.: warning: " . + "No description found for return value of " . + "'$declaration_name'\n"; + ++$warnings; + } +} + +## +# takes a function prototype and the name of the current file being +# processed and spits out all the details stored in the global +# arrays/hashes. +sub dump_function($$) { + my $prototype = shift; + my $file = shift; + my $noret = 0; + + print_lineno($new_start_line); + + $prototype =~ s/^static +//; + $prototype =~ s/^extern +//; + $prototype =~ s/^asmlinkage +//; + $prototype =~ s/^inline +//; + $prototype =~ s/^__inline__ +//; + $prototype =~ s/^__inline +//; + $prototype =~ s/^__always_inline +//; + $prototype =~ s/^noinline +//; + $prototype =~ s/__init +//; + $prototype =~ s/__init_or_module +//; + $prototype =~ s/__meminit +//; + $prototype =~ s/__must_check +//; + $prototype =~ s/__weak +//; + $prototype =~ s/__sched +//; + $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; + my $define = $prototype =~ s/^#\s*define\s+//; #ak added + $prototype =~ s/__attribute__\s*\(\( + (?: + [\w\s]++ # attribute name + (?:\([^)]*+\))? # attribute arguments + \s*+,? # optional comma at the end + )+ + \)\)\s+//x; + + # Strip QEMU specific compiler annotations + $prototype =~ s/QEMU_[A-Z_]+ +//; + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + # This is an object-like macro, it has no return type and no parameter + # list. + # Function-like macros are not allowed to have spaces between + # declaration_name and opening parenthesis (notice the \s+). + $return_type = $1; + $declaration_name = $2; + $noret = 1; + } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + $return_type = $1; + $declaration_name = $2; + my $args = $3; + + create_parameterlist($args, ',', $file, $declaration_name); + } else { + print STDERR "${file}:$.: warning: cannot understand function prototype: '$prototype'\n"; + return; + } + + my $prms = join " ", @parameterlist; + check_sections($file, $declaration_name, "function", $sectcheck, $prms); + + # This check emits a lot of warnings at the moment, because many + # functions don't have a 'Return' doc section. So until the number + # of warnings goes sufficiently down, the check is only performed in + # verbose mode. + # TODO: always perform the check. + if ($verbose && !$noret) { + check_return_section($file, $declaration_name, $return_type); + } + + # The function parser can be called with a typedef parameter. + # Handle it. + if ($return_type =~ /typedef/) { + output_declaration($declaration_name, + 'function', + {'function' => $declaration_name, + 'typedef' => 1, + 'module' => $modulename, + 'functiontype' => $return_type, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + } else { + output_declaration($declaration_name, + 'function', + {'function' => $declaration_name, + 'module' => $modulename, + 'functiontype' => $return_type, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + } +} + +sub reset_state { + $function = ""; + %parameterdescs = (); + %parametertypes = (); + @parameterlist = (); + %sections = (); + @sectionlist = (); + $sectcheck = ""; + $struct_actual = ""; + $prototype = ""; + + $state = STATE_NORMAL; + $inline_doc_state = STATE_INLINE_NA; +} + +sub tracepoint_munge($) { + my $file = shift; + my $tracepointname = 0; + my $tracepointargs = 0; + + if ($prototype =~ m/TRACE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) { + $tracepointname = $2; + } + $tracepointname =~ s/^\s+//; #strip leading whitespace + if ($prototype =~ m/TP_PROTO\((.*?)\)/) { + $tracepointargs = $1; + } + if (($tracepointname eq 0) || ($tracepointargs eq 0)) { + print STDERR "${file}:$.: warning: Unrecognized tracepoint format: \n". + "$prototype\n"; + } else { + $prototype = "static inline void trace_$tracepointname($tracepointargs)"; + } +} + +sub syscall_munge() { + my $void = 0; + + $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's +## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) { + if ($prototype =~ m/SYSCALL_DEFINE0/) { + $void = 1; +## $prototype = "long sys_$1(void)"; + } + + $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name + if ($prototype =~ m/long (sys_.*?),/) { + $prototype =~ s/,/\(/; + } elsif ($void) { + $prototype =~ s/\)/\(void\)/; + } + + # now delete all of the odd-number commas in $prototype + # so that arg types & arg names don't have a comma between them + my $count = 0; + my $len = length($prototype); + if ($void) { + $len = 0; # skip the for-loop + } + for (my $ix = 0; $ix < $len; $ix++) { + if (substr($prototype, $ix, 1) eq ',') { + $count++; + if ($count % 2 == 1) { + substr($prototype, $ix, 1) = ' '; + } + } + } +} + +sub process_proto_function($$) { + my $x = shift; + my $file = shift; + + $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line + + if ($x =~ m#\s*/\*\s+MACDOC\s*#io || ($x =~ /^#/ && $x !~ /^#\s*define/)) { + # do nothing + } + elsif ($x =~ /([^\{]*)/) { + $prototype .= $1; + } + + if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) { + $prototype =~ s@/\*.*?\*/@@gos; # strip comments. + $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's. + $prototype =~ s@^\s+@@gos; # strip leading spaces + + # Handle prototypes for function pointers like: + # int (*pcs_config)(struct foo) + $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos; + + if ($prototype =~ /SYSCALL_DEFINE/) { + syscall_munge(); + } + if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ || + $prototype =~ /DEFINE_SINGLE_EVENT/) + { + tracepoint_munge($file); + } + dump_function($prototype, $file); + reset_state(); + } +} + +sub process_proto_type($$) { + my $x = shift; + my $file = shift; + + $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's. + $x =~ s@^\s+@@gos; # strip leading spaces + $x =~ s@\s+$@@gos; # strip trailing spaces + $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line + + if ($x =~ /^#/) { + # To distinguish preprocessor directive from regular declaration later. + $x .= ";"; + } + + while (1) { + if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) { + if( length $prototype ) { + $prototype .= " " + } + $prototype .= $1 . $2; + ($2 eq '{') && $brcount++; + ($2 eq '}') && $brcount--; + if (($2 eq ';') && ($brcount == 0)) { + dump_declaration($prototype, $file); + reset_state(); + last; + } + $x = $3; + } else { + $prototype .= $x; + last; + } + } +} + + +sub map_filename($) { + my $file; + my ($orig_file) = @_; + + if (defined($ENV{'SRCTREE'})) { + $file = "$ENV{'SRCTREE'}" . "/" . $orig_file; + } else { + $file = $orig_file; + } + + if (defined($source_map{$file})) { + $file = $source_map{$file}; + } + + return $file; +} + +sub process_export_file($) { + my ($orig_file) = @_; + my $file = map_filename($orig_file); + + if (!open(IN,"<$file")) { + print STDERR "Error: Cannot open file $file\n"; + ++$errors; + return; + } + + while (<IN>) { + if (/$export_symbol/) { + next if (defined($nosymbol_table{$2})); + $function_table{$2} = 1; + } + } + + close(IN); +} + +# +# Parsers for the various processing states. +# +# STATE_NORMAL: looking for the /** to begin everything. +# +sub process_normal() { + if (/$doc_start/o) { + $state = STATE_NAME; # next line is always the function name + $in_doc_sect = 0; + $declaration_start_line = $. + 1; + } +} + +# +# STATE_NAME: Looking for the "name - description" line +# +sub process_name($$) { + my $file = shift; + my $identifier; + my $descr; + + if (/$doc_block/o) { + $state = STATE_DOCBLOCK; + $contents = ""; + $new_start_line = $.; + + if ( $1 eq "" ) { + $section = $section_intro; + } else { + $section = $1; + } + } + elsif (/$doc_decl/o) { + $identifier = $1; + if (/\s*([\w\s]+?)(\s*-|:)/) { + $identifier = $1; + } + + $state = STATE_BODY; + # if there's no @param blocks need to set up default section + # here + $contents = ""; + $section = $section_default; + $new_start_line = $. + 1; + if (/[-:](.*)/) { + # strip leading/trailing/multiple spaces + $descr= $1; + $descr =~ s/^\s*//; + $descr =~ s/\s*$//; + $descr =~ s/\s+/ /g; + $declaration_purpose = $descr; + $state = STATE_BODY_MAYBE; + } else { + $declaration_purpose = ""; + } + + if (($declaration_purpose eq "") && $verbose) { + print STDERR "${file}:$.: warning: missing initial short description on line:\n"; + print STDERR $_; + ++$warnings; + } + + if ($identifier =~ m/^struct\b/) { + $decl_type = 'struct'; + } elsif ($identifier =~ m/^union\b/) { + $decl_type = 'union'; + } elsif ($identifier =~ m/^enum\b/) { + $decl_type = 'enum'; + } elsif ($identifier =~ m/^typedef\b/) { + $decl_type = 'typedef'; + } else { + $decl_type = 'function'; + } + + if ($verbose) { + print STDERR "${file}:$.: info: Scanning doc for $identifier\n"; + } + } else { + print STDERR "${file}:$.: warning: Cannot understand $_ on line $.", + " - I thought it was a doc line\n"; + ++$warnings; + $state = STATE_NORMAL; + } +} + + +# +# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. +# +sub process_body($$) { + my $file = shift; + + # Until all named variable macro parameters are + # documented using the bare name (`x`) rather than with + # dots (`x...`), strip the dots: + if ($section =~ /\w\.\.\.$/) { + $section =~ s/\.\.\.$//; + + if ($verbose) { + print STDERR "${file}:$.: warning: Variable macro arguments should be documented without dots\n"; + ++$warnings; + } + } + + if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) { + dump_section($file, $section, $contents); + $section = $section_default; + $new_start_line = $.; + $contents = ""; + } + + if (/$doc_sect/i) { # case insensitive for supported section names + $newsection = $1; + $newcontents = $2; + + # map the supported section names to the canonical names + if ($newsection =~ m/^description$/i) { + $newsection = $section_default; + } elsif ($newsection =~ m/^context$/i) { + $newsection = $section_context; + } elsif ($newsection =~ m/^returns?$/i) { + $newsection = $section_return; + } elsif ($newsection =~ m/^\@return$/) { + # special: @return is a section, not a param description + $newsection = $section_return; + } + + if (($contents ne "") && ($contents ne "\n")) { + if (!$in_doc_sect && $verbose) { + print STDERR "${file}:$.: warning: contents before sections\n"; + ++$warnings; + } + dump_section($file, $section, $contents); + $section = $section_default; + } + + $in_doc_sect = 1; + $state = STATE_BODY; + $contents = $newcontents; + $new_start_line = $.; + while (substr($contents, 0, 1) eq " ") { + $contents = substr($contents, 1); + } + if ($contents ne "") { + $contents .= "\n"; + } + $section = $newsection; + $leading_space = undef; + } elsif (/$doc_end/) { + if (($contents ne "") && ($contents ne "\n")) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + # look for doc_com + <text> + doc_end: + if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') { + print STDERR "${file}:$.: warning: suspicious ending line: $_"; + ++$warnings; + } + + $prototype = ""; + $state = STATE_PROTO; + $brcount = 0; + $new_start_line = $. + 1; + } elsif (/$doc_content/) { + if ($1 eq "") { + if ($section eq $section_context) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + $new_start_line = $.; + $state = STATE_BODY; + } else { + if ($section ne $section_default) { + $state = STATE_BODY_WITH_BLANK_LINE; + } else { + $state = STATE_BODY; + } + $contents .= "\n"; + } + } elsif ($state == STATE_BODY_MAYBE) { + # Continued declaration purpose + chomp($declaration_purpose); + $declaration_purpose .= " " . $1; + $declaration_purpose =~ s/\s+/ /g; + } else { + my $cont = $1; + if ($section =~ m/^@/ || $section eq $section_context) { + if (!defined $leading_space) { + if ($cont =~ m/^(\s+)/) { + $leading_space = $1; + } else { + $leading_space = ""; + } + } + $cont =~ s/^$leading_space//; + } + $contents .= $cont . "\n"; + } + } else { + # i dont know - bad line? ignore. + print STDERR "${file}:$.: warning: bad line: $_"; + ++$warnings; + } +} + + +# +# STATE_PROTO: reading a function/whatever prototype. +# +sub process_proto($$) { + my $file = shift; + + if (/$doc_inline_oneline/) { + $section = $1; + $contents = $2; + if ($contents ne "") { + $contents .= "\n"; + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + } elsif (/$doc_inline_start/) { + $state = STATE_INLINE; + $inline_doc_state = STATE_INLINE_NAME; + } elsif ($decl_type eq 'function') { + process_proto_function($_, $file); + } else { + process_proto_type($_, $file); + } +} + +# +# STATE_DOCBLOCK: within a DOC: block. +# +sub process_docblock($$) { + my $file = shift; + + if (/$doc_end/) { + dump_doc_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + $function = ""; + %parameterdescs = (); + %parametertypes = (); + @parameterlist = (); + %sections = (); + @sectionlist = (); + $prototype = ""; + $state = STATE_NORMAL; + } elsif (/$doc_content/) { + if ( $1 eq "" ) { + $contents .= $blankline; + } else { + $contents .= $1 . "\n"; + } + } +} + +# +# STATE_INLINE: docbook comments within a prototype. +# +sub process_inline($$) { + my $file = shift; + + # First line (state 1) needs to be a @parameter + if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) { + $section = $1; + $contents = $2; + $new_start_line = $.; + if ($contents ne "") { + while (substr($contents, 0, 1) eq " ") { + $contents = substr($contents, 1); + } + $contents .= "\n"; + } + $inline_doc_state = STATE_INLINE_TEXT; + # Documentation block end */ + } elsif (/$doc_inline_end/) { + if (($contents ne "") && ($contents ne "\n")) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + $state = STATE_PROTO; + $inline_doc_state = STATE_INLINE_NA; + # Regular text + } elsif (/$doc_content/) { + if ($inline_doc_state == STATE_INLINE_TEXT) { + $contents .= $1 . "\n"; + # nuke leading blank lines + if ($contents =~ /^\s*$/) { + $contents = ""; + } + } elsif ($inline_doc_state == STATE_INLINE_NAME) { + $inline_doc_state = STATE_INLINE_ERROR; + print STDERR "${file}:$.: warning: "; + print STDERR "Incorrect use of kernel-doc format: $_"; + ++$warnings; + } + } +} + + +sub process_file($) { + my $file; + my $initial_section_counter = $section_counter; + my ($orig_file) = @_; + + $file = map_filename($orig_file); + + if (!open(IN_FILE,"<$file")) { + print STDERR "Error: Cannot open file $file\n"; + ++$errors; + return; + } + + $. = 1; + + $section_counter = 0; + while (<IN_FILE>) { + while (s/\\\s*$//) { + $_ .= <IN_FILE>; + } + # Replace tabs by spaces + while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {}; + # Hand this line to the appropriate state handler + if ($state == STATE_NORMAL) { + process_normal(); + } elsif ($state == STATE_NAME) { + process_name($file, $_); + } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE || + $state == STATE_BODY_WITH_BLANK_LINE) { + process_body($file, $_); + } elsif ($state == STATE_INLINE) { # scanning for inline parameters + process_inline($file, $_); + } elsif ($state == STATE_PROTO) { + process_proto($file, $_); + } elsif ($state == STATE_DOCBLOCK) { + process_docblock($file, $_); + } + } + + # Make sure we got something interesting. + if ($initial_section_counter == $section_counter && $ + output_mode ne "none") { + if ($output_selection == OUTPUT_INCLUDE) { + print STDERR "${file}:1: warning: '$_' not found\n" + for keys %function_table; + } + else { + print STDERR "${file}:1: warning: no structured comments found\n"; + } + } + close IN_FILE; +} + + +if ($output_mode eq "rst") { + get_sphinx_version() if (!$sphinx_major); +} + +$kernelversion = get_kernel_version(); + +# generate a sequence of code that will splice in highlighting information +# using the s// operator. +for (my $k = 0; $k < @highlights; $k++) { + my $pattern = $highlights[$k][0]; + my $result = $highlights[$k][1]; +# print STDERR "scanning pattern:$pattern, highlight:($result)\n"; + $dohighlight .= "\$contents =~ s:$pattern:$result:gs;\n"; +} + +# Read the file that maps relative names to absolute names for +# separate source and object directories and for shadow trees. +if (open(SOURCE_MAP, "<.tmp_filelist.txt")) { + my ($relname, $absname); + while(<SOURCE_MAP>) { + chop(); + ($relname, $absname) = (split())[0..1]; + $relname =~ s:^/+::; + $source_map{$relname} = $absname; + } + close(SOURCE_MAP); +} + +if ($output_selection == OUTPUT_EXPORTED || + $output_selection == OUTPUT_INTERNAL) { + + push(@export_file_list, @ARGV); + + foreach (@export_file_list) { + chomp; + process_export_file($_); + } +} + +foreach (@ARGV) { + chomp; + process_file($_); +} +if ($verbose && $errors) { + print STDERR "$errors errors\n"; +} +if ($verbose && $warnings) { + print STDERR "$warnings warnings\n"; +} + +if ($Werror && $warnings) { + print STDERR "$warnings warnings as Errors\n"; + exit($warnings); +} else { + exit($output_mode eq "none" ? 0 : $errors) +} diff --git a/scripts/kvm/kvm_flightrecorder b/scripts/kvm/kvm_flightrecorder new file mode 100755 index 000000000..78ca3af9c --- /dev/null +++ b/scripts/kvm/kvm_flightrecorder @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# +# KVM Flight Recorder - ring buffer tracing script +# +# Copyright (C) 2012 IBM Corp +# +# Author: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> +# +# This script provides a command-line interface to kvm ftrace and is designed +# to be used as a flight recorder that is always running. To start in-memory +# recording: +# +# sudo kvm_flightrecorder start 8192 # 8 MB per-cpu ring buffers +# +# The per-cpu ring buffer size can be given in KB as an optional argument to +# the 'start' subcommand. +# +# To stop the flight recorder: +# +# sudo kvm_flightrecorder stop +# +# To dump the contents of the flight recorder (this can be done when the +# recorder is stopped or while it is running): +# +# sudo kvm_flightrecorder dump >/path/to/dump.txt +# +# To observe the trace while it is running, use the 'tail' subcommand: +# +# sudo kvm_flightrecorder tail +# +# Note that the flight recorder may impact overall system performance by +# consuming CPU cycles. No disk I/O is performed since the ring buffer holds a +# fixed-size in-memory trace. + +import sys +import os + +tracing_dir = '/sys/kernel/debug/tracing' + +def trace_path(*args): + return os.path.join(tracing_dir, *args) + +def write_file(path, data): + open(path, 'wb').write(data) + +def enable_event(subsystem, event, enable): + write_file(trace_path('events', subsystem, event, 'enable'), '1' if enable else '0') + +def enable_subsystem(subsystem, enable): + write_file(trace_path('events', subsystem, 'enable'), '1' if enable else '0') + +def start_tracing(): + enable_subsystem('kvm', True) + write_file(trace_path('tracing_on'), '1') + +def stop_tracing(): + write_file(trace_path('tracing_on'), '0') + enable_subsystem('kvm', False) + write_file(trace_path('events', 'enable'), '0') + write_file(trace_path('current_tracer'), 'nop') + +def dump_trace(): + tracefile = open(trace_path('trace'), 'r') + try: + lines = True + while lines: + lines = tracefile.readlines(64 * 1024) + sys.stdout.writelines(lines) + except KeyboardInterrupt: + pass + +def tail_trace(): + try: + for line in open(trace_path('trace_pipe'), 'r'): + sys.stdout.write(line) + except KeyboardInterrupt: + pass + +def usage(): + print('Usage: %s start [buffer_size_kb] | stop | dump | tail' % sys.argv[0]) + print('Control the KVM flight recorder tracing.') + sys.exit(0) + +def main(): + if len(sys.argv) < 2: + usage() + + cmd = sys.argv[1] + if cmd == '--version': + print('kvm_flightrecorder version 1.0') + sys.exit(0) + + if not os.path.isdir(tracing_dir): + print('Unable to tracing debugfs directory, try:') + print('mount -t debugfs none /sys/kernel/debug') + sys.exit(1) + if not os.access(tracing_dir, os.W_OK): + print('Unable to write to tracing debugfs directory, please run as root') + sys.exit(1) + + if cmd == 'start': + stop_tracing() # clean up first + + if len(sys.argv) == 3: + try: + buffer_size_kb = int(sys.argv[2]) + except ValueError: + print('Invalid per-cpu trace buffer size in KB') + sys.exit(1) + write_file(trace_path('buffer_size_kb'), str(buffer_size_kb)) + print('Per-CPU ring buffer size set to %d KB' % buffer_size_kb) + + start_tracing() + print('KVM flight recorder enabled') + elif cmd == 'stop': + stop_tracing() + print('KVM flight recorder disabled') + elif cmd == 'dump': + dump_trace() + elif cmd == 'tail': + tail_trace() + else: + usage() + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap new file mode 100755 index 000000000..6fe66d5f5 --- /dev/null +++ b/scripts/kvm/vmxcap @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +# +# tool for querying VMX capabilities +# +# Copyright 2009-2010 Red Hat, Inc. +# +# Authors: +# Avi Kivity <avi@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. + +MSR_IA32_VMX_BASIC = 0x480 +MSR_IA32_VMX_PINBASED_CTLS = 0x481 +MSR_IA32_VMX_PROCBASED_CTLS = 0x482 +MSR_IA32_VMX_EXIT_CTLS = 0x483 +MSR_IA32_VMX_ENTRY_CTLS = 0x484 +MSR_IA32_VMX_MISC_CTLS = 0x485 +MSR_IA32_VMX_PROCBASED_CTLS2 = 0x48B +MSR_IA32_VMX_EPT_VPID_CAP = 0x48C +MSR_IA32_VMX_TRUE_PINBASED_CTLS = 0x48D +MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E +MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F +MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490 +MSR_IA32_VMX_VMFUNC = 0x491 + +class msr(object): + def __init__(self): + try: + self.f = open('/dev/cpu/0/msr', 'rb', 0) + except: + self.f = open('/dev/msr0', 'rb', 0) + def read(self, index, default = None): + import struct + self.f.seek(index) + try: + return struct.unpack('Q', self.f.read(8))[0] + except: + return default + +class Control(object): + def __init__(self, name, bits, cap_msr, true_cap_msr = None): + self.name = name + self.bits = bits + self.cap_msr = cap_msr + self.true_cap_msr = true_cap_msr + def read2(self, nr): + m = msr() + val = m.read(nr, 0) + return (val & 0xffffffff, val >> 32) + def show(self): + print(self.name) + mb1, cb1 = self.read2(self.cap_msr) + tmb1, tcb1 = 0, 0 + if self.true_cap_msr: + tmb1, tcb1 = self.read2(self.true_cap_msr) + for bit in sorted(self.bits.keys()): + zero = not (mb1 & (1 << bit)) + one = cb1 & (1 << bit) + true_zero = not (tmb1 & (1 << bit)) + true_one = tcb1 & (1 << bit) + s= '?' + if (self.true_cap_msr and true_zero and true_one + and one and not zero): + s = 'default' + elif zero and not one: + s = 'no' + elif one and not zero: + s = 'forced' + elif one and zero: + s = 'yes' + print(' %-40s %s' % (self.bits[bit], s)) + +class Misc(object): + def __init__(self, name, bits, msr): + self.name = name + self.bits = bits + self.msr = msr + def show(self): + print(self.name) + value = msr().read(self.msr, 0) + print(' Hex: 0x%x' % (value)) + def first_bit(key): + if type(key) is tuple: + return key[0] + else: + return key + for bits in sorted(self.bits.keys(), key = first_bit): + if type(bits) is tuple: + lo, hi = bits + fmt = int + else: + lo = hi = bits + def fmt(x): + return { True: 'yes', False: 'no' }[x] + v = (value >> lo) & ((1 << (hi - lo + 1)) - 1) + print(' %-40s %s' % (self.bits[bits], fmt(v))) + +controls = [ + Misc( + name = 'Basic VMX Information', + bits = { + (0, 30): 'Revision', + (32,44): 'VMCS size', + 48: 'VMCS restricted to 32 bit addresses', + 49: 'Dual-monitor support', + (50, 53): 'VMCS memory type', + 54: 'INS/OUTS instruction information', + 55: 'IA32_VMX_TRUE_*_CTLS support', + }, + msr = MSR_IA32_VMX_BASIC, + ), + Control( + name = 'pin-based controls', + bits = { + 0: 'External interrupt exiting', + 3: 'NMI exiting', + 5: 'Virtual NMIs', + 6: 'Activate VMX-preemption timer', + 7: 'Process posted interrupts', + }, + cap_msr = MSR_IA32_VMX_PINBASED_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_PINBASED_CTLS, + ), + + Control( + name = 'primary processor-based controls', + bits = { + 2: 'Interrupt window exiting', + 3: 'Use TSC offsetting', + 7: 'HLT exiting', + 9: 'INVLPG exiting', + 10: 'MWAIT exiting', + 11: 'RDPMC exiting', + 12: 'RDTSC exiting', + 15: 'CR3-load exiting', + 16: 'CR3-store exiting', + 19: 'CR8-load exiting', + 20: 'CR8-store exiting', + 21: 'Use TPR shadow', + 22: 'NMI-window exiting', + 23: 'MOV-DR exiting', + 24: 'Unconditional I/O exiting', + 25: 'Use I/O bitmaps', + 27: 'Monitor trap flag', + 28: 'Use MSR bitmaps', + 29: 'MONITOR exiting', + 30: 'PAUSE exiting', + 31: 'Activate secondary control', + }, + cap_msr = MSR_IA32_VMX_PROCBASED_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + ), + + Control( + name = 'secondary processor-based controls', + bits = { + 0: 'Virtualize APIC accesses', + 1: 'Enable EPT', + 2: 'Descriptor-table exiting', + 3: 'Enable RDTSCP', + 4: 'Virtualize x2APIC mode', + 5: 'Enable VPID', + 6: 'WBINVD exiting', + 7: 'Unrestricted guest', + 8: 'APIC register emulation', + 9: 'Virtual interrupt delivery', + 10: 'PAUSE-loop exiting', + 11: 'RDRAND exiting', + 12: 'Enable INVPCID', + 13: 'Enable VM functions', + 14: 'VMCS shadowing', + 15: 'Enable ENCLS exiting', + 16: 'RDSEED exiting', + 17: 'Enable PML', + 18: 'EPT-violation #VE', + 19: 'Conceal non-root operation from PT', + 20: 'Enable XSAVES/XRSTORS', + 22: 'Mode-based execute control (XS/XU)', + 23: 'Sub-page write permissions', + 24: 'GPA translation for PT', + 25: 'TSC scaling', + 26: 'User wait and pause', + 28: 'ENCLV exiting', + }, + cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, + ), + + Control( + name = 'VM-Exit controls', + bits = { + 2: 'Save debug controls', + 9: 'Host address-space size', + 12: 'Load IA32_PERF_GLOBAL_CTRL', + 15: 'Acknowledge interrupt on exit', + 18: 'Save IA32_PAT', + 19: 'Load IA32_PAT', + 20: 'Save IA32_EFER', + 21: 'Load IA32_EFER', + 22: 'Save VMX-preemption timer value', + 23: 'Clear IA32_BNDCFGS', + 24: 'Conceal VM exits from PT', + 25: 'Clear IA32_RTIT_CTL', + }, + cap_msr = MSR_IA32_VMX_EXIT_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS, + ), + + Control( + name = 'VM-Entry controls', + bits = { + 2: 'Load debug controls', + 9: 'IA-32e mode guest', + 10: 'Entry to SMM', + 11: 'Deactivate dual-monitor treatment', + 13: 'Load IA32_PERF_GLOBAL_CTRL', + 14: 'Load IA32_PAT', + 15: 'Load IA32_EFER', + 16: 'Load IA32_BNDCFGS', + 17: 'Conceal VM entries from PT', + 18: 'Load IA32_RTIT_CTL', + }, + cap_msr = MSR_IA32_VMX_ENTRY_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS, + ), + + Misc( + name = 'Miscellaneous data', + bits = { + (0,4): 'VMX-preemption timer scale (log2)', + 5: 'Store EFER.LMA into IA-32e mode guest control', + 6: 'HLT activity state', + 7: 'Shutdown activity state', + 8: 'Wait-for-SIPI activity state', + 14: 'PT in VMX operation', + 15: 'IA32_SMBASE support', + (16,24): 'Number of CR3-target values', + (25,27): 'MSR-load/store count recommendation', + 28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1', + 29: 'VMWRITE to VM-exit information fields', + 30: 'Inject event with insn length=0', + (32,63): 'MSEG revision identifier', + }, + msr = MSR_IA32_VMX_MISC_CTLS, + ), + + Misc( + name = 'VPID and EPT capabilities', + bits = { + 0: 'Execute-only EPT translations', + 6: 'Page-walk length 4', + 8: 'Paging-structure memory type UC', + 14: 'Paging-structure memory type WB', + 16: '2MB EPT pages', + 17: '1GB EPT pages', + 20: 'INVEPT supported', + 21: 'EPT accessed and dirty flags', + 22: 'Advanced VM-exit information for EPT violations', + 25: 'Single-context INVEPT', + 26: 'All-context INVEPT', + 32: 'INVVPID supported', + 40: 'Individual-address INVVPID', + 41: 'Single-context INVVPID', + 42: 'All-context INVVPID', + 43: 'Single-context-retaining-globals INVVPID', + }, + msr = MSR_IA32_VMX_EPT_VPID_CAP, + ), + Misc( + name = 'VM Functions', + bits = { + 0: 'EPTP Switching', + }, + msr = MSR_IA32_VMX_VMFUNC, + ), + ] + +if __name__ == '__main__': + for c in controls: + c.show() diff --git a/scripts/make-release b/scripts/make-release new file mode 100755 index 000000000..05b14ecc9 --- /dev/null +++ b/scripts/make-release @@ -0,0 +1,38 @@ +#!/bin/bash -e +# +# QEMU Release Script +# +# Copyright IBM, Corp. 2012 +# +# Authors: +# Anthony Liguori <aliguori@us.ibm.com> +# +# This work is licensed under the terms of the GNU GPLv2 or later. +# See the COPYING file in the top-level directory. + +src="$1" +version="$2" +destination=qemu-${version} + +git clone "${src}" ${destination} +pushd ${destination} +git checkout "v${version}" +git submodule update --init +(cd roms/seabios && git describe --tags --long --dirty > .version) +(cd roms/skiboot && ./make_version.sh > .version) +# Fetch edk2 submodule's submodules, since it won't have access to them via +# the tarball later. +# +# A more uniform way to handle this sort of situation would be nice, but we +# don't necessarily have much control over how a submodule handles its +# submodule dependencies, so we continue to handle these on a case-by-case +# basis for now. +(cd roms/edk2 && \ + git submodule update --init -- \ + ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3 \ + BaseTools/Source/C/BrotliCompress/brotli \ + CryptoPkg/Library/OpensslLib/openssl \ + MdeModulePkg/Library/BrotliCustomDecompressLib/brotli) +popd +tar --exclude=.git -cjf ${destination}.tar.bz2 ${destination} +rm -rf ${destination} diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py new file mode 100755 index 000000000..96969d89e --- /dev/null +++ b/scripts/meson-buildoptions.py @@ -0,0 +1,156 @@ +#! /usr/bin/env python3 + +# Generate configure command line options handling code, based on Meson's +# user build options introspection data +# +# Copyright (C) 2021 Red Hat, Inc. +# +# Author: Paolo Bonzini <pbonzini@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import json +import textwrap +import shlex +import sys + +SKIP_OPTIONS = { + "audio_drv_list", + "default_devices", + "docdir", + "fuzzing_engine", + "qemu_firmwarepath", + "qemu_suffix", + "sphinx_build", + "trace_file", +} + +LINE_WIDTH = 76 + + +# Convert the default value of an option to the string used in +# the help message +def value_to_help(value): + if isinstance(value, list): + return ",".join(value) + if isinstance(value, bool): + return "enabled" if value else "disabled" + return str(value) + + +def wrap(left, text, indent): + spaces = " " * indent + if len(left) >= indent: + yield left + left = spaces + else: + left = (left + spaces)[0:indent] + yield from textwrap.wrap( + text, width=LINE_WIDTH, initial_indent=left, subsequent_indent=spaces + ) + + +def sh_print(line=""): + print(' printf "%s\\n"', shlex.quote(line)) + + +def help_line(left, opt, indent, long): + right = f'{opt["description"]}' + if long: + value = value_to_help(opt["value"]) + if value != "auto": + right += f" [{value}]" + if "choices" in opt and long: + choices = "/".join(sorted(opt["choices"])) + right += f" (choices: {choices})" + for x in wrap(" " + left, right, indent): + sh_print(x) + + +# Return whether the option (a dictionary) can be used with +# arguments. Booleans can never be used with arguments; +# combos allow an argument only if they accept other values +# than "auto", "enabled", and "disabled". +def allow_arg(opt): + if opt["type"] == "boolean": + return False + if opt["type"] != "combo": + return True + return not (set(opt["choices"]) <= {"auto", "disabled", "enabled"}) + + +def load_options(json): + json = [ + x + for x in json + if x["section"] == "user" + and ":" not in x["name"] + and x["name"] not in SKIP_OPTIONS + ] + return sorted(json, key=lambda x: x["name"]) + + +def print_help(options): + print("meson_options_help() {") + for opt in options: + key = opt["name"].replace("_", "-") + # The first section includes options that have an arguments, + # and booleans (i.e., only one of enable/disable makes sense) + if opt["type"] == "boolean": + left = f"--disable-{key}" if opt["value"] else f"--enable-{key}" + help_line(left, opt, 27, False) + elif allow_arg(opt): + if opt["type"] == "combo" and "enabled" in opt["choices"]: + left = f"--enable-{key}[=CHOICE]" + else: + left = f"--enable-{key}=CHOICE" + help_line(left, opt, 27, True) + + sh_print() + sh_print("Optional features, enabled with --enable-FEATURE and") + sh_print("disabled with --disable-FEATURE, default is enabled if available") + sh_print("(unless built with --without-default-features):") + sh_print() + for opt in options: + key = opt["name"].replace("_", "-") + if opt["type"] != "boolean" and not allow_arg(opt): + help_line(key, opt, 18, False) + print("}") + + +def print_parse(options): + print("_meson_option_parse() {") + print(" case $1 in") + for opt in options: + key = opt["name"].replace("_", "-") + name = opt["name"] + if opt["type"] == "boolean": + print(f' --enable-{key}) printf "%s" -D{name}=true ;;') + print(f' --disable-{key}) printf "%s" -D{name}=false ;;') + else: + if opt["type"] == "combo" and "enabled" in opt["choices"]: + print(f' --enable-{key}) printf "%s" -D{name}=enabled ;;') + if opt["type"] == "combo" and "disabled" in opt["choices"]: + print(f' --disable-{key}) printf "%s" -D{name}=disabled ;;') + if allow_arg(opt): + print(f' --enable-{key}=*) quote_sh "-D{name}=$2" ;;') + print(" *) return 1 ;;") + print(" esac") + print("}") + + +options = load_options(json.load(sys.stdin)) +print("# This file is generated by meson-buildoptions.py, do not edit!") +print_help(options) +print_parse(options) diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh new file mode 100644 index 000000000..7a17ff421 --- /dev/null +++ b/scripts/meson-buildoptions.sh @@ -0,0 +1,275 @@ +# This file is generated by meson-buildoptions.py, do not edit! +meson_options_help() { + printf "%s\n" ' --enable-capstone[=CHOICE]' + printf "%s\n" ' Whether and how to find the capstone library' + printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' + printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' + printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' + printf "%s\n" ' --enable-fdt[=CHOICE] Whether and how to find the libfdt library' + printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' + printf "%s\n" ' --enable-fuzzing build fuzzing targets' + printf "%s\n" ' --disable-install-blobs install provided firmware blobs' + printf "%s\n" ' --enable-malloc=CHOICE choose memory allocator to use [system] (choices:' + printf "%s\n" ' jemalloc/system/tcmalloc)' + printf "%s\n" ' --enable-slirp[=CHOICE] Whether and how to find the slirp library' + printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' + printf "%s\n" ' --enable-tcg-interpreter TCG with bytecode interpreter (slow)' + printf "%s\n" ' --enable-trace-backends=CHOICE' + printf "%s\n" ' Set available tracing backends [log] (choices:' + printf "%s\n" ' dtrace/ftrace/log/nop/simple/syslog/ust)' + printf "%s\n" '' + printf "%s\n" 'Optional features, enabled with --enable-FEATURE and' + printf "%s\n" 'disabled with --disable-FEATURE, default is enabled if available' + printf "%s\n" '(unless built with --without-default-features):' + printf "%s\n" '' + printf "%s\n" ' alsa ALSA sound support' + printf "%s\n" ' attr attr/xattr support' + printf "%s\n" ' auth-pam PAM access control' + printf "%s\n" ' bpf eBPF support' + printf "%s\n" ' brlapi brlapi character device driver' + printf "%s\n" ' bzip2 bzip2 support for DMG images' + printf "%s\n" ' cap-ng cap_ng support' + printf "%s\n" ' cocoa Cocoa user interface (macOS only)' + printf "%s\n" ' coreaudio CoreAudio sound support' + printf "%s\n" ' curl CURL block device driver' + printf "%s\n" ' curses curses UI' + printf "%s\n" ' docs Documentations build support' + printf "%s\n" ' dsound DirectSound sound support' + printf "%s\n" ' fuse FUSE block device export' + printf "%s\n" ' fuse-lseek SEEK_HOLE/SEEK_DATA support for FUSE exports' + printf "%s\n" ' gcrypt libgcrypt cryptography support' + printf "%s\n" ' gettext Localization of the GTK+ user interface' + printf "%s\n" ' glusterfs Glusterfs block device driver' + printf "%s\n" ' gnutls GNUTLS cryptography support' + printf "%s\n" ' gtk GTK+ user interface' + printf "%s\n" ' guest-agent-msi Build MSI package for the QEMU Guest Agent' + printf "%s\n" ' hax HAX acceleration support' + printf "%s\n" ' hvf HVF acceleration support' + printf "%s\n" ' iconv Font glyph conversion support' + printf "%s\n" ' jack JACK sound support' + printf "%s\n" ' kvm KVM acceleration support' + printf "%s\n" ' l2tpv3 l2tpv3 network backend support' + printf "%s\n" ' libdaxctl libdaxctl support' + printf "%s\n" ' libiscsi libiscsi userspace initiator' + printf "%s\n" ' libnfs libnfs block device driver' + printf "%s\n" ' libpmem libpmem support' + printf "%s\n" ' libudev Use libudev to enumerate host devices' + printf "%s\n" ' libusb libusb support for USB passthrough' + printf "%s\n" ' libxml2 libxml2 support for Parallels image format' + printf "%s\n" ' linux-aio Linux AIO support' + printf "%s\n" ' linux-io-uring Linux io_uring support' + printf "%s\n" ' lzfse lzfse support for DMG images' + printf "%s\n" ' lzo lzo compression support' + printf "%s\n" ' malloc-trim enable libc malloc_trim() for memory optimization' + printf "%s\n" ' mpath Multipath persistent reservation passthrough' + printf "%s\n" ' multiprocess Out of process device emulation support' + printf "%s\n" ' netmap netmap network backend support' + printf "%s\n" ' nettle nettle cryptography support' + printf "%s\n" ' nvmm NVMM acceleration support' + printf "%s\n" ' oss OSS sound support' + printf "%s\n" ' pa PulseAudio sound support' + printf "%s\n" ' rbd Ceph block device driver' + printf "%s\n" ' sdl SDL user interface' + printf "%s\n" ' sdl-image SDL Image support for icons' + printf "%s\n" ' seccomp seccomp support' + printf "%s\n" ' selinux SELinux support in qemu-nbd' + printf "%s\n" ' smartcard CA smartcard emulation support' + printf "%s\n" ' snappy snappy compression support' + printf "%s\n" ' sparse sparse checker' + printf "%s\n" ' spice Spice server support' + printf "%s\n" ' spice-protocol Spice protocol support' + printf "%s\n" ' tcg TCG support' + printf "%s\n" ' u2f U2F emulation support' + printf "%s\n" ' usb-redir libusbredir support' + printf "%s\n" ' vde vde network backend support' + printf "%s\n" ' vhost-user-blk-server' + printf "%s\n" ' build vhost-user-blk server' + printf "%s\n" ' virglrenderer virgl rendering support' + printf "%s\n" ' virtfs virtio-9p support' + printf "%s\n" ' virtiofsd build virtiofs daemon (virtiofsd)' + printf "%s\n" ' vnc VNC server' + printf "%s\n" ' vnc-jpeg JPEG lossy compression for VNC server' + printf "%s\n" ' vnc-png PNG compression for VNC server' + printf "%s\n" ' vnc-sasl SASL authentication for VNC server' + printf "%s\n" ' vte vte support for the gtk UI' + printf "%s\n" ' whpx WHPX acceleration support' + printf "%s\n" ' xen Xen backend support' + printf "%s\n" ' xen-pci-passthrough' + printf "%s\n" ' Xen PCI passthrough support' + printf "%s\n" ' xkbcommon xkbcommon support' + printf "%s\n" ' zstd zstd compression support' +} +_meson_option_parse() { + case $1 in + --enable-alsa) printf "%s" -Dalsa=enabled ;; + --disable-alsa) printf "%s" -Dalsa=disabled ;; + --enable-attr) printf "%s" -Dattr=enabled ;; + --disable-attr) printf "%s" -Dattr=disabled ;; + --enable-auth-pam) printf "%s" -Dauth_pam=enabled ;; + --disable-auth-pam) printf "%s" -Dauth_pam=disabled ;; + --enable-bpf) printf "%s" -Dbpf=enabled ;; + --disable-bpf) printf "%s" -Dbpf=disabled ;; + --enable-brlapi) printf "%s" -Dbrlapi=enabled ;; + --disable-brlapi) printf "%s" -Dbrlapi=disabled ;; + --enable-bzip2) printf "%s" -Dbzip2=enabled ;; + --disable-bzip2) printf "%s" -Dbzip2=disabled ;; + --enable-cap-ng) printf "%s" -Dcap_ng=enabled ;; + --disable-cap-ng) printf "%s" -Dcap_ng=disabled ;; + --enable-capstone) printf "%s" -Dcapstone=enabled ;; + --disable-capstone) printf "%s" -Dcapstone=disabled ;; + --enable-capstone=*) quote_sh "-Dcapstone=$2" ;; + --enable-cfi) printf "%s" -Dcfi=true ;; + --disable-cfi) printf "%s" -Dcfi=false ;; + --enable-cfi-debug) printf "%s" -Dcfi_debug=true ;; + --disable-cfi-debug) printf "%s" -Dcfi_debug=false ;; + --enable-cocoa) printf "%s" -Dcocoa=enabled ;; + --disable-cocoa) printf "%s" -Dcocoa=disabled ;; + --enable-coreaudio) printf "%s" -Dcoreaudio=enabled ;; + --disable-coreaudio) printf "%s" -Dcoreaudio=disabled ;; + --enable-curl) printf "%s" -Dcurl=enabled ;; + --disable-curl) printf "%s" -Dcurl=disabled ;; + --enable-curses) printf "%s" -Dcurses=enabled ;; + --disable-curses) printf "%s" -Dcurses=disabled ;; + --enable-docs) printf "%s" -Ddocs=enabled ;; + --disable-docs) printf "%s" -Ddocs=disabled ;; + --enable-dsound) printf "%s" -Ddsound=enabled ;; + --disable-dsound) printf "%s" -Ddsound=disabled ;; + --enable-fdt) printf "%s" -Dfdt=enabled ;; + --disable-fdt) printf "%s" -Dfdt=disabled ;; + --enable-fdt=*) quote_sh "-Dfdt=$2" ;; + --enable-fuse) printf "%s" -Dfuse=enabled ;; + --disable-fuse) printf "%s" -Dfuse=disabled ;; + --enable-fuse-lseek) printf "%s" -Dfuse_lseek=enabled ;; + --disable-fuse-lseek) printf "%s" -Dfuse_lseek=disabled ;; + --enable-fuzzing) printf "%s" -Dfuzzing=true ;; + --disable-fuzzing) printf "%s" -Dfuzzing=false ;; + --enable-gcrypt) printf "%s" -Dgcrypt=enabled ;; + --disable-gcrypt) printf "%s" -Dgcrypt=disabled ;; + --enable-gettext) printf "%s" -Dgettext=enabled ;; + --disable-gettext) printf "%s" -Dgettext=disabled ;; + --enable-glusterfs) printf "%s" -Dglusterfs=enabled ;; + --disable-glusterfs) printf "%s" -Dglusterfs=disabled ;; + --enable-gnutls) printf "%s" -Dgnutls=enabled ;; + --disable-gnutls) printf "%s" -Dgnutls=disabled ;; + --enable-gtk) printf "%s" -Dgtk=enabled ;; + --disable-gtk) printf "%s" -Dgtk=disabled ;; + --enable-guest-agent-msi) printf "%s" -Dguest_agent_msi=enabled ;; + --disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;; + --enable-hax) printf "%s" -Dhax=enabled ;; + --disable-hax) printf "%s" -Dhax=disabled ;; + --enable-hvf) printf "%s" -Dhvf=enabled ;; + --disable-hvf) printf "%s" -Dhvf=disabled ;; + --enable-iconv) printf "%s" -Diconv=enabled ;; + --disable-iconv) printf "%s" -Diconv=disabled ;; + --enable-install-blobs) printf "%s" -Dinstall_blobs=true ;; + --disable-install-blobs) printf "%s" -Dinstall_blobs=false ;; + --enable-jack) printf "%s" -Djack=enabled ;; + --disable-jack) printf "%s" -Djack=disabled ;; + --enable-kvm) printf "%s" -Dkvm=enabled ;; + --disable-kvm) printf "%s" -Dkvm=disabled ;; + --enable-l2tpv3) printf "%s" -Dl2tpv3=enabled ;; + --disable-l2tpv3) printf "%s" -Dl2tpv3=disabled ;; + --enable-libdaxctl) printf "%s" -Dlibdaxctl=enabled ;; + --disable-libdaxctl) printf "%s" -Dlibdaxctl=disabled ;; + --enable-libiscsi) printf "%s" -Dlibiscsi=enabled ;; + --disable-libiscsi) printf "%s" -Dlibiscsi=disabled ;; + --enable-libnfs) printf "%s" -Dlibnfs=enabled ;; + --disable-libnfs) printf "%s" -Dlibnfs=disabled ;; + --enable-libpmem) printf "%s" -Dlibpmem=enabled ;; + --disable-libpmem) printf "%s" -Dlibpmem=disabled ;; + --enable-libudev) printf "%s" -Dlibudev=enabled ;; + --disable-libudev) printf "%s" -Dlibudev=disabled ;; + --enable-libusb) printf "%s" -Dlibusb=enabled ;; + --disable-libusb) printf "%s" -Dlibusb=disabled ;; + --enable-libxml2) printf "%s" -Dlibxml2=enabled ;; + --disable-libxml2) printf "%s" -Dlibxml2=disabled ;; + --enable-linux-aio) printf "%s" -Dlinux_aio=enabled ;; + --disable-linux-aio) printf "%s" -Dlinux_aio=disabled ;; + --enable-linux-io-uring) printf "%s" -Dlinux_io_uring=enabled ;; + --disable-linux-io-uring) printf "%s" -Dlinux_io_uring=disabled ;; + --enable-lzfse) printf "%s" -Dlzfse=enabled ;; + --disable-lzfse) printf "%s" -Dlzfse=disabled ;; + --enable-lzo) printf "%s" -Dlzo=enabled ;; + --disable-lzo) printf "%s" -Dlzo=disabled ;; + --enable-malloc=*) quote_sh "-Dmalloc=$2" ;; + --enable-malloc-trim) printf "%s" -Dmalloc_trim=enabled ;; + --disable-malloc-trim) printf "%s" -Dmalloc_trim=disabled ;; + --enable-mpath) printf "%s" -Dmpath=enabled ;; + --disable-mpath) printf "%s" -Dmpath=disabled ;; + --enable-multiprocess) printf "%s" -Dmultiprocess=enabled ;; + --disable-multiprocess) printf "%s" -Dmultiprocess=disabled ;; + --enable-netmap) printf "%s" -Dnetmap=enabled ;; + --disable-netmap) printf "%s" -Dnetmap=disabled ;; + --enable-nettle) printf "%s" -Dnettle=enabled ;; + --disable-nettle) printf "%s" -Dnettle=disabled ;; + --enable-nvmm) printf "%s" -Dnvmm=enabled ;; + --disable-nvmm) printf "%s" -Dnvmm=disabled ;; + --enable-oss) printf "%s" -Doss=enabled ;; + --disable-oss) printf "%s" -Doss=disabled ;; + --enable-pa) printf "%s" -Dpa=enabled ;; + --disable-pa) printf "%s" -Dpa=disabled ;; + --enable-rbd) printf "%s" -Drbd=enabled ;; + --disable-rbd) printf "%s" -Drbd=disabled ;; + --enable-sdl) printf "%s" -Dsdl=enabled ;; + --disable-sdl) printf "%s" -Dsdl=disabled ;; + --enable-sdl-image) printf "%s" -Dsdl_image=enabled ;; + --disable-sdl-image) printf "%s" -Dsdl_image=disabled ;; + --enable-seccomp) printf "%s" -Dseccomp=enabled ;; + --disable-seccomp) printf "%s" -Dseccomp=disabled ;; + --enable-selinux) printf "%s" -Dselinux=enabled ;; + --disable-selinux) printf "%s" -Dselinux=disabled ;; + --enable-slirp) printf "%s" -Dslirp=enabled ;; + --disable-slirp) printf "%s" -Dslirp=disabled ;; + --enable-slirp=*) quote_sh "-Dslirp=$2" ;; + --enable-smartcard) printf "%s" -Dsmartcard=enabled ;; + --disable-smartcard) printf "%s" -Dsmartcard=disabled ;; + --enable-snappy) printf "%s" -Dsnappy=enabled ;; + --disable-snappy) printf "%s" -Dsnappy=disabled ;; + --enable-sparse) printf "%s" -Dsparse=enabled ;; + --disable-sparse) printf "%s" -Dsparse=disabled ;; + --enable-spice) printf "%s" -Dspice=enabled ;; + --disable-spice) printf "%s" -Dspice=disabled ;; + --enable-spice-protocol) printf "%s" -Dspice_protocol=enabled ;; + --disable-spice-protocol) printf "%s" -Dspice_protocol=disabled ;; + --enable-tcg) printf "%s" -Dtcg=enabled ;; + --disable-tcg) printf "%s" -Dtcg=disabled ;; + --enable-tcg-interpreter) printf "%s" -Dtcg_interpreter=true ;; + --disable-tcg-interpreter) printf "%s" -Dtcg_interpreter=false ;; + --enable-trace-backends=*) quote_sh "-Dtrace_backends=$2" ;; + --enable-u2f) printf "%s" -Du2f=enabled ;; + --disable-u2f) printf "%s" -Du2f=disabled ;; + --enable-usb-redir) printf "%s" -Dusb_redir=enabled ;; + --disable-usb-redir) printf "%s" -Dusb_redir=disabled ;; + --enable-vde) printf "%s" -Dvde=enabled ;; + --disable-vde) printf "%s" -Dvde=disabled ;; + --enable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=enabled ;; + --disable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=disabled ;; + --enable-virglrenderer) printf "%s" -Dvirglrenderer=enabled ;; + --disable-virglrenderer) printf "%s" -Dvirglrenderer=disabled ;; + --enable-virtfs) printf "%s" -Dvirtfs=enabled ;; + --disable-virtfs) printf "%s" -Dvirtfs=disabled ;; + --enable-virtiofsd) printf "%s" -Dvirtiofsd=enabled ;; + --disable-virtiofsd) printf "%s" -Dvirtiofsd=disabled ;; + --enable-vnc) printf "%s" -Dvnc=enabled ;; + --disable-vnc) printf "%s" -Dvnc=disabled ;; + --enable-vnc-jpeg) printf "%s" -Dvnc_jpeg=enabled ;; + --disable-vnc-jpeg) printf "%s" -Dvnc_jpeg=disabled ;; + --enable-vnc-png) printf "%s" -Dvnc_png=enabled ;; + --disable-vnc-png) printf "%s" -Dvnc_png=disabled ;; + --enable-vnc-sasl) printf "%s" -Dvnc_sasl=enabled ;; + --disable-vnc-sasl) printf "%s" -Dvnc_sasl=disabled ;; + --enable-vte) printf "%s" -Dvte=enabled ;; + --disable-vte) printf "%s" -Dvte=disabled ;; + --enable-whpx) printf "%s" -Dwhpx=enabled ;; + --disable-whpx) printf "%s" -Dwhpx=disabled ;; + --enable-xen) printf "%s" -Dxen=enabled ;; + --disable-xen) printf "%s" -Dxen=disabled ;; + --enable-xen-pci-passthrough) printf "%s" -Dxen_pci_passthrough=enabled ;; + --disable-xen-pci-passthrough) printf "%s" -Dxen_pci_passthrough=disabled ;; + --enable-xkbcommon) printf "%s" -Dxkbcommon=enabled ;; + --disable-xkbcommon) printf "%s" -Dxkbcommon=disabled ;; + --enable-zstd) printf "%s" -Dzstd=enabled ;; + --disable-zstd) printf "%s" -Dzstd=disabled ;; + *) return 1 ;; + esac +} diff --git a/scripts/meson.build b/scripts/meson.build new file mode 100644 index 000000000..1c89e10a7 --- /dev/null +++ b/scripts/meson.build @@ -0,0 +1,3 @@ +if stap.found() + install_data('qemu-trace-stap', install_dir: get_option('bindir')) +endif diff --git a/scripts/minikconf.py b/scripts/minikconf.py new file mode 100644 index 000000000..bcd91015d --- /dev/null +++ b/scripts/minikconf.py @@ -0,0 +1,711 @@ +#!/usr/bin/env python3 +# +# Mini-Kconfig parser +# +# Copyright (c) 2015 Red Hat Inc. +# +# Authors: +# Paolo Bonzini <pbonzini@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 +# or, at your option, any later version. See the COPYING file in +# the top-level directory. + +import os +import sys +import re +import random + +__all__ = [ 'KconfigDataError', 'KconfigParserError', + 'KconfigData', 'KconfigParser' , + 'defconfig', 'allyesconfig', 'allnoconfig', 'randconfig' ] + +def debug_print(*args): + #print('# ' + (' '.join(str(x) for x in args))) + pass + +# ------------------------------------------- +# KconfigData implements the Kconfig semantics. For now it can only +# detect undefined symbols, i.e. symbols that were referenced in +# assignments or dependencies but were not declared with "config FOO". +# +# Semantic actions are represented by methods called do_*. The do_var +# method return the semantic value of a variable (which right now is +# just its name). +# ------------------------------------------- + +class KconfigDataError(Exception): + def __init__(self, msg): + self.msg = msg + + def __str__(self): + return self.msg + +allyesconfig = lambda x: True +allnoconfig = lambda x: False +defconfig = lambda x: x +randconfig = lambda x: random.randint(0, 1) == 1 + +class KconfigData: + class Expr: + def __and__(self, rhs): + return KconfigData.AND(self, rhs) + def __or__(self, rhs): + return KconfigData.OR(self, rhs) + def __invert__(self): + return KconfigData.NOT(self) + + # Abstract methods + def add_edges_to(self, var): + pass + def evaluate(self): + assert False + + class AND(Expr): + def __init__(self, lhs, rhs): + self.lhs = lhs + self.rhs = rhs + def __str__(self): + return "(%s && %s)" % (self.lhs, self.rhs) + + def add_edges_to(self, var): + self.lhs.add_edges_to(var) + self.rhs.add_edges_to(var) + def evaluate(self): + return self.lhs.evaluate() and self.rhs.evaluate() + + class OR(Expr): + def __init__(self, lhs, rhs): + self.lhs = lhs + self.rhs = rhs + def __str__(self): + return "(%s || %s)" % (self.lhs, self.rhs) + + def add_edges_to(self, var): + self.lhs.add_edges_to(var) + self.rhs.add_edges_to(var) + def evaluate(self): + return self.lhs.evaluate() or self.rhs.evaluate() + + class NOT(Expr): + def __init__(self, lhs): + self.lhs = lhs + def __str__(self): + return "!%s" % (self.lhs) + + def add_edges_to(self, var): + self.lhs.add_edges_to(var) + def evaluate(self): + return not self.lhs.evaluate() + + class Var(Expr): + def __init__(self, name): + self.name = name + self.value = None + self.outgoing = set() + self.clauses_for_var = list() + def __str__(self): + return self.name + + def has_value(self): + return not (self.value is None) + def set_value(self, val, clause): + self.clauses_for_var.append(clause) + if self.has_value() and self.value != val: + print("The following clauses were found for " + self.name) + for i in self.clauses_for_var: + print(" " + str(i), file=sys.stderr) + raise KconfigDataError('contradiction between clauses when setting %s' % self) + debug_print("=> %s is now %s" % (self.name, val)) + self.value = val + + # depth first search of the dependency graph + def dfs(self, visited, f): + if self in visited: + return + visited.add(self) + for v in self.outgoing: + v.dfs(visited, f) + f(self) + + def add_edges_to(self, var): + self.outgoing.add(var) + def evaluate(self): + if not self.has_value(): + raise KconfigDataError('cycle found including %s' % self) + return self.value + + class Clause: + def __init__(self, dest): + self.dest = dest + def priority(self): + return 0 + def process(self): + pass + + class AssignmentClause(Clause): + def __init__(self, dest, value): + KconfigData.Clause.__init__(self, dest) + self.value = value + def __str__(self): + return "CONFIG_%s=%s" % (self.dest, 'y' if self.value else 'n') + + def process(self): + self.dest.set_value(self.value, self) + + class DefaultClause(Clause): + def __init__(self, dest, value, cond=None): + KconfigData.Clause.__init__(self, dest) + self.value = value + self.cond = cond + if not (self.cond is None): + self.cond.add_edges_to(self.dest) + def __str__(self): + value = 'y' if self.value else 'n' + if self.cond is None: + return "config %s default %s" % (self.dest, value) + else: + return "config %s default %s if %s" % (self.dest, value, self.cond) + + def priority(self): + # Defaults are processed just before leaving the variable + return -1 + def process(self): + if not self.dest.has_value() and \ + (self.cond is None or self.cond.evaluate()): + self.dest.set_value(self.value, self) + + class DependsOnClause(Clause): + def __init__(self, dest, expr): + KconfigData.Clause.__init__(self, dest) + self.expr = expr + self.expr.add_edges_to(self.dest) + def __str__(self): + return "config %s depends on %s" % (self.dest, self.expr) + + def process(self): + if not self.expr.evaluate(): + self.dest.set_value(False, self) + + class SelectClause(Clause): + def __init__(self, dest, cond): + KconfigData.Clause.__init__(self, dest) + self.cond = cond + self.cond.add_edges_to(self.dest) + def __str__(self): + return "select %s if %s" % (self.dest, self.cond) + + def process(self): + if self.cond.evaluate(): + self.dest.set_value(True, self) + + def __init__(self, value_mangler=defconfig): + self.value_mangler = value_mangler + self.previously_included = [] + self.incl_info = None + self.defined_vars = set() + self.referenced_vars = dict() + self.clauses = list() + + # semantic analysis ------------- + + def check_undefined(self): + undef = False + for i in self.referenced_vars: + if not (i in self.defined_vars): + print("undefined symbol %s" % (i), file=sys.stderr) + undef = True + return undef + + def compute_config(self): + if self.check_undefined(): + raise KconfigDataError("there were undefined symbols") + return None + + debug_print("Input:") + for clause in self.clauses: + debug_print(clause) + + debug_print("\nDependency graph:") + for i in self.referenced_vars: + debug_print(i, "->", [str(x) for x in self.referenced_vars[i].outgoing]) + + # The reverse of the depth-first order is the topological sort + dfo = dict() + visited = set() + debug_print("\n") + def visit_fn(var): + debug_print(var, "has DFS number", len(dfo)) + dfo[var] = len(dfo) + + for name, v in self.referenced_vars.items(): + self.do_default(v, False) + v.dfs(visited, visit_fn) + + # Put higher DFS numbers and higher priorities first. This + # places the clauses in topological order and places defaults + # after assignments and dependencies. + self.clauses.sort(key=lambda x: (-dfo[x.dest], -x.priority())) + + debug_print("\nSorted clauses:") + for clause in self.clauses: + debug_print(clause) + clause.process() + + debug_print("") + values = dict() + for name, v in self.referenced_vars.items(): + debug_print("Evaluating", name) + values[name] = v.evaluate() + + return values + + # semantic actions ------------- + + def do_declaration(self, var): + if (var in self.defined_vars): + raise KconfigDataError('variable "' + var + '" defined twice') + + self.defined_vars.add(var.name) + + # var is a string with the variable's name. + def do_var(self, var): + if (var in self.referenced_vars): + return self.referenced_vars[var] + + var_obj = self.referenced_vars[var] = KconfigData.Var(var) + return var_obj + + def do_assignment(self, var, val): + self.clauses.append(KconfigData.AssignmentClause(var, val)) + + def do_default(self, var, val, cond=None): + val = self.value_mangler(val) + self.clauses.append(KconfigData.DefaultClause(var, val, cond)) + + def do_depends_on(self, var, expr): + self.clauses.append(KconfigData.DependsOnClause(var, expr)) + + def do_select(self, var, symbol, cond=None): + cond = (cond & var) if cond is not None else var + self.clauses.append(KconfigData.SelectClause(symbol, cond)) + + def do_imply(self, var, symbol, cond=None): + # "config X imply Y [if COND]" is the same as + # "config Y default y if X [&& COND]" + cond = (cond & var) if cond is not None else var + self.do_default(symbol, True, cond) + +# ------------------------------------------- +# KconfigParser implements a recursive descent parser for (simplified) +# Kconfig syntax. +# ------------------------------------------- + +# tokens table +TOKENS = {} +TOK_NONE = -1 +TOK_LPAREN = 0; TOKENS[TOK_LPAREN] = '"("'; +TOK_RPAREN = 1; TOKENS[TOK_RPAREN] = '")"'; +TOK_EQUAL = 2; TOKENS[TOK_EQUAL] = '"="'; +TOK_AND = 3; TOKENS[TOK_AND] = '"&&"'; +TOK_OR = 4; TOKENS[TOK_OR] = '"||"'; +TOK_NOT = 5; TOKENS[TOK_NOT] = '"!"'; +TOK_DEPENDS = 6; TOKENS[TOK_DEPENDS] = '"depends"'; +TOK_ON = 7; TOKENS[TOK_ON] = '"on"'; +TOK_SELECT = 8; TOKENS[TOK_SELECT] = '"select"'; +TOK_IMPLY = 9; TOKENS[TOK_IMPLY] = '"imply"'; +TOK_CONFIG = 10; TOKENS[TOK_CONFIG] = '"config"'; +TOK_DEFAULT = 11; TOKENS[TOK_DEFAULT] = '"default"'; +TOK_Y = 12; TOKENS[TOK_Y] = '"y"'; +TOK_N = 13; TOKENS[TOK_N] = '"n"'; +TOK_SOURCE = 14; TOKENS[TOK_SOURCE] = '"source"'; +TOK_BOOL = 15; TOKENS[TOK_BOOL] = '"bool"'; +TOK_IF = 16; TOKENS[TOK_IF] = '"if"'; +TOK_ID = 17; TOKENS[TOK_ID] = 'identifier'; +TOK_EOF = 18; TOKENS[TOK_EOF] = 'end of file'; + +class KconfigParserError(Exception): + def __init__(self, parser, msg, tok=None): + self.loc = parser.location() + tok = tok or parser.tok + if tok != TOK_NONE: + location = TOKENS.get(tok, None) or ('"%s"' % tok) + msg = '%s before %s' % (msg, location) + self.msg = msg + + def __str__(self): + return "%s: %s" % (self.loc, self.msg) + +class KconfigParser: + + @classmethod + def parse(self, fp, mode=None): + data = KconfigData(mode or KconfigParser.defconfig) + parser = KconfigParser(data) + parser.parse_file(fp) + return data + + def __init__(self, data): + self.data = data + + def parse_file(self, fp): + self.abs_fname = os.path.abspath(fp.name) + self.fname = fp.name + self.data.previously_included.append(self.abs_fname) + self.src = fp.read() + if self.src == '' or self.src[-1] != '\n': + self.src += '\n' + self.cursor = 0 + self.line = 1 + self.line_pos = 0 + self.get_token() + self.parse_config() + + def do_assignment(self, var, val): + if not var.startswith("CONFIG_"): + raise Error('assigned variable should start with CONFIG_') + var = self.data.do_var(var[7:]) + self.data.do_assignment(var, val) + + # file management ----- + + def error_path(self): + inf = self.data.incl_info + res = "" + while inf: + res = ("In file included from %s:%d:\n" % (inf['file'], + inf['line'])) + res + inf = inf['parent'] + return res + + def location(self): + col = 1 + for ch in self.src[self.line_pos:self.pos]: + if ch == '\t': + col += 8 - ((col - 1) % 8) + else: + col += 1 + return '%s%s:%d:%d' %(self.error_path(), self.fname, self.line, col) + + def do_include(self, include): + incl_abs_fname = os.path.join(os.path.dirname(self.abs_fname), + include) + # catch inclusion cycle + inf = self.data.incl_info + while inf: + if incl_abs_fname == os.path.abspath(inf['file']): + raise KconfigParserError(self, "Inclusion loop for %s" + % include) + inf = inf['parent'] + + # skip multiple include of the same file + if incl_abs_fname in self.data.previously_included: + return + try: + fp = open(incl_abs_fname, 'rt', encoding='utf-8') + except IOError as e: + raise KconfigParserError(self, + '%s: %s' % (e.strerror, include)) + + inf = self.data.incl_info + self.data.incl_info = { 'file': self.fname, 'line': self.line, + 'parent': inf } + KconfigParser(self.data).parse_file(fp) + self.data.incl_info = inf + + # recursive descent parser ----- + + # y_or_n: Y | N + def parse_y_or_n(self): + if self.tok == TOK_Y: + self.get_token() + return True + if self.tok == TOK_N: + self.get_token() + return False + raise KconfigParserError(self, 'Expected "y" or "n"') + + # var: ID + def parse_var(self): + if self.tok == TOK_ID: + val = self.val + self.get_token() + return self.data.do_var(val) + else: + raise KconfigParserError(self, 'Expected identifier') + + # assignment_var: ID (starting with "CONFIG_") + def parse_assignment_var(self): + if self.tok == TOK_ID: + val = self.val + if not val.startswith("CONFIG_"): + raise KconfigParserError(self, + 'Expected identifier starting with "CONFIG_"', TOK_NONE) + self.get_token() + return self.data.do_var(val[7:]) + else: + raise KconfigParserError(self, 'Expected identifier') + + # assignment: var EQUAL y_or_n + def parse_assignment(self): + var = self.parse_assignment_var() + if self.tok != TOK_EQUAL: + raise KconfigParserError(self, 'Expected "="') + self.get_token() + self.data.do_assignment(var, self.parse_y_or_n()) + + # primary: NOT primary + # | LPAREN expr RPAREN + # | var + def parse_primary(self): + if self.tok == TOK_NOT: + self.get_token() + val = ~self.parse_primary() + elif self.tok == TOK_LPAREN: + self.get_token() + val = self.parse_expr() + if self.tok != TOK_RPAREN: + raise KconfigParserError(self, 'Expected ")"') + self.get_token() + elif self.tok == TOK_ID: + val = self.parse_var() + else: + raise KconfigParserError(self, 'Expected "!" or "(" or identifier') + return val + + # disj: primary (OR primary)* + def parse_disj(self): + lhs = self.parse_primary() + while self.tok == TOK_OR: + self.get_token() + lhs = lhs | self.parse_primary() + return lhs + + # expr: disj (AND disj)* + def parse_expr(self): + lhs = self.parse_disj() + while self.tok == TOK_AND: + self.get_token() + lhs = lhs & self.parse_disj() + return lhs + + # condition: IF expr + # | empty + def parse_condition(self): + if self.tok == TOK_IF: + self.get_token() + return self.parse_expr() + else: + return None + + # property: DEFAULT y_or_n condition + # | DEPENDS ON expr + # | SELECT var condition + # | BOOL + def parse_property(self, var): + if self.tok == TOK_DEFAULT: + self.get_token() + val = self.parse_y_or_n() + cond = self.parse_condition() + self.data.do_default(var, val, cond) + elif self.tok == TOK_DEPENDS: + self.get_token() + if self.tok != TOK_ON: + raise KconfigParserError(self, 'Expected "on"') + self.get_token() + self.data.do_depends_on(var, self.parse_expr()) + elif self.tok == TOK_SELECT: + self.get_token() + symbol = self.parse_var() + cond = self.parse_condition() + self.data.do_select(var, symbol, cond) + elif self.tok == TOK_IMPLY: + self.get_token() + symbol = self.parse_var() + cond = self.parse_condition() + self.data.do_imply(var, symbol, cond) + elif self.tok == TOK_BOOL: + self.get_token() + else: + raise KconfigParserError(self, 'Error in recursive descent?') + + # properties: properties property + # | /* empty */ + def parse_properties(self, var): + had_default = False + while self.tok == TOK_DEFAULT or self.tok == TOK_DEPENDS or \ + self.tok == TOK_SELECT or self.tok == TOK_BOOL or \ + self.tok == TOK_IMPLY: + self.parse_property(var) + + # for nicer error message + if self.tok != TOK_SOURCE and self.tok != TOK_CONFIG and \ + self.tok != TOK_ID and self.tok != TOK_EOF: + raise KconfigParserError(self, 'expected "source", "config", identifier, ' + + '"default", "depends on", "imply" or "select"') + + # declaration: config var properties + def parse_declaration(self): + if self.tok == TOK_CONFIG: + self.get_token() + var = self.parse_var() + self.data.do_declaration(var) + self.parse_properties(var) + else: + raise KconfigParserError(self, 'Error in recursive descent?') + + # clause: SOURCE + # | declaration + # | assignment + def parse_clause(self): + if self.tok == TOK_SOURCE: + val = self.val + self.get_token() + self.do_include(val) + elif self.tok == TOK_CONFIG: + self.parse_declaration() + elif self.tok == TOK_ID: + self.parse_assignment() + else: + raise KconfigParserError(self, 'expected "source", "config" or identifier') + + # config: clause+ EOF + def parse_config(self): + while self.tok != TOK_EOF: + self.parse_clause() + return self.data + + # scanner ----- + + def get_token(self): + while True: + self.tok = self.src[self.cursor] + self.pos = self.cursor + self.cursor += 1 + + self.val = None + self.tok = self.scan_token() + if self.tok is not None: + return + + def check_keyword(self, rest): + if not self.src.startswith(rest, self.cursor): + return False + length = len(rest) + if self.src[self.cursor + length].isalnum() or self.src[self.cursor + length] == '_': + return False + self.cursor += length + return True + + def scan_token(self): + if self.tok == '#': + self.cursor = self.src.find('\n', self.cursor) + return None + elif self.tok == '=': + return TOK_EQUAL + elif self.tok == '(': + return TOK_LPAREN + elif self.tok == ')': + return TOK_RPAREN + elif self.tok == '&' and self.src[self.pos+1] == '&': + self.cursor += 1 + return TOK_AND + elif self.tok == '|' and self.src[self.pos+1] == '|': + self.cursor += 1 + return TOK_OR + elif self.tok == '!': + return TOK_NOT + elif self.tok == 'd' and self.check_keyword("epends"): + return TOK_DEPENDS + elif self.tok == 'o' and self.check_keyword("n"): + return TOK_ON + elif self.tok == 's' and self.check_keyword("elect"): + return TOK_SELECT + elif self.tok == 'i' and self.check_keyword("mply"): + return TOK_IMPLY + elif self.tok == 'c' and self.check_keyword("onfig"): + return TOK_CONFIG + elif self.tok == 'd' and self.check_keyword("efault"): + return TOK_DEFAULT + elif self.tok == 'b' and self.check_keyword("ool"): + return TOK_BOOL + elif self.tok == 'i' and self.check_keyword("f"): + return TOK_IF + elif self.tok == 'y' and self.check_keyword(""): + return TOK_Y + elif self.tok == 'n' and self.check_keyword(""): + return TOK_N + elif (self.tok == 's' and self.check_keyword("ource")) or \ + self.tok == 'i' and self.check_keyword("nclude"): + # source FILENAME + # include FILENAME + while self.src[self.cursor].isspace(): + self.cursor += 1 + start = self.cursor + self.cursor = self.src.find('\n', self.cursor) + self.val = self.src[start:self.cursor] + return TOK_SOURCE + elif self.tok.isalnum(): + # identifier + while self.src[self.cursor].isalnum() or self.src[self.cursor] == '_': + self.cursor += 1 + self.val = self.src[self.pos:self.cursor] + return TOK_ID + elif self.tok == '\n': + if self.cursor == len(self.src): + return TOK_EOF + self.line += 1 + self.line_pos = self.cursor + elif not self.tok.isspace(): + raise KconfigParserError(self, 'invalid input') + + return None + +if __name__ == '__main__': + argv = sys.argv + mode = defconfig + if len(sys.argv) > 1: + if argv[1] == '--defconfig': + del argv[1] + elif argv[1] == '--randconfig': + random.seed() + mode = randconfig + del argv[1] + elif argv[1] == '--allyesconfig': + mode = allyesconfig + del argv[1] + elif argv[1] == '--allnoconfig': + mode = allnoconfig + del argv[1] + + if len(argv) == 1: + print ("%s: at least one argument is required" % argv[0], file=sys.stderr) + sys.exit(1) + + if argv[1].startswith('-'): + print ("%s: invalid option %s" % (argv[0], argv[1]), file=sys.stderr) + sys.exit(1) + + data = KconfigData(mode) + parser = KconfigParser(data) + external_vars = set() + for arg in argv[3:]: + m = re.match(r'^(CONFIG_[A-Z0-9_]+)=([yn]?)$', arg) + if m is not None: + name, value = m.groups() + parser.do_assignment(name, value == 'y') + external_vars.add(name[7:]) + else: + fp = open(arg, 'rt', encoding='utf-8') + parser.parse_file(fp) + fp.close() + + config = data.compute_config() + for key in sorted(config.keys()): + if key not in external_vars and config[key]: + print ('CONFIG_%s=y' % key) + + deps = open(argv[2], 'wt', encoding='utf-8') + for fname in data.previously_included: + print ('%s: %s' % (argv[1], fname), file=deps) + deps.close() diff --git a/scripts/modinfo-collect.py b/scripts/modinfo-collect.py new file mode 100755 index 000000000..4acb188c3 --- /dev/null +++ b/scripts/modinfo-collect.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import sys +import json +import shlex +import subprocess + +def find_command(src, target, compile_commands): + for command in compile_commands: + if command['file'] != src: + continue + if target != '' and command['command'].find(target) == -1: + continue + return command['command'] + return 'false' + +def process_command(src, command): + skip = False + arg = False + out = [] + for item in shlex.split(command): + if arg: + out.append(x) + arg = False + continue + if skip: + skip = False + continue + if item == '-MF' or item == '-MQ' or item == '-o': + skip = True + continue + if item == '-c': + skip = True + continue + out.append(item) + out.append('-DQEMU_MODINFO') + out.append('-E') + out.append(src) + return out + +def main(args): + target = '' + if args[0] == '--target': + args.pop(0) + target = args.pop(0) + print("MODINFO_DEBUG target %s" % target) + arch = target[:-8] # cut '-softmmu' + print("MODINFO_START arch \"%s\" MODINFO_END" % arch) + with open('compile_commands.json') as f: + compile_commands = json.load(f) + for src in args: + print("MODINFO_DEBUG src %s" % src) + command = find_command(src, target, compile_commands) + cmdline = process_command(src, command) + print("MODINFO_DEBUG cmd", cmdline) + result = subprocess.run(cmdline, stdout = subprocess.PIPE, + universal_newlines = True) + if result.returncode != 0: + sys.exit(result.returncode) + for line in result.stdout.split('\n'): + if line.find('MODINFO') != -1: + print(line) + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/scripts/modinfo-generate.py b/scripts/modinfo-generate.py new file mode 100755 index 000000000..f559eed00 --- /dev/null +++ b/scripts/modinfo-generate.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import sys + +def print_array(name, values): + if len(values) == 0: + return + list = ", ".join(values) + print(" .%s = ((const char*[]){ %s, NULL })," % (name, list)) + +def parse_line(line): + kind = "" + data = "" + get_kind = False + get_data = False + for item in line.split(): + if item == "MODINFO_START": + get_kind = True + continue + if item.startswith("MODINFO_END"): + get_data = False + continue + if get_kind: + kind = item + get_kind = False + get_data = True + continue + if get_data: + data += " " + item + continue + return (kind, data) + +def generate(name, lines): + arch = "" + objs = [] + deps = [] + opts = [] + for line in lines: + if line.find("MODINFO_START") != -1: + (kind, data) = parse_line(line) + if kind == 'obj': + objs.append(data) + elif kind == 'dep': + deps.append(data) + elif kind == 'opts': + opts.append(data) + elif kind == 'arch': + arch = data; + else: + print("unknown:", kind) + exit(1) + + print(" .name = \"%s\"," % name) + if arch != "": + print(" .arch = %s," % arch) + print_array("objs", objs) + print_array("deps", deps) + print_array("opts", opts) + print("},{"); + return deps + +def print_pre(): + print("/* generated by scripts/modinfo-generate.py */") + print("#include \"qemu/osdep.h\"") + print("#include \"qemu/module.h\"") + print("const QemuModinfo qemu_modinfo[] = {{") + +def print_post(): + print(" /* end of list */") + print("}};") + +def main(args): + deps = {} + print_pre() + for modinfo in args: + with open(modinfo) as f: + lines = f.readlines() + print(" /* %s */" % modinfo) + (basename, ext) = os.path.splitext(modinfo) + deps[basename] = generate(basename, lines) + print_post() + + flattened_deps = {flat.strip('" ') for dep in deps.values() for flat in dep} + error = False + for dep in flattened_deps: + if dep not in deps.keys(): + print("Dependency {} cannot be satisfied".format(dep), + file=sys.stderr) + error = True + + if error: + exit(1) + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/scripts/modules/module_block.py b/scripts/modules/module_block.py new file mode 100644 index 000000000..1109df827 --- /dev/null +++ b/scripts/modules/module_block.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# +# Module information generator +# +# Copyright Red Hat, Inc. 2015 - 2016 +# +# Authors: +# Marc Mari <markmb@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. + +import sys +import os + +def get_string_struct(line): + data = line.split() + + # data[0] -> struct element name + # data[1] -> = + # data[2] -> value + + return data[2].replace('"', '')[:-1] + +def add_module(fheader, library, format_name, protocol_name): + lines = [] + lines.append('.library_name = "' + library + '",') + if format_name != "": + lines.append('.format_name = "' + format_name + '",') + if protocol_name != "": + lines.append('.protocol_name = "' + protocol_name + '",') + + text = '\n '.join(lines) + fheader.write('\n {\n ' + text + '\n },') + +def process_file(fheader, filename): + # This parser assumes the coding style rules are being followed + with open(filename, "r") as cfile: + found_start = False + library, _ = os.path.splitext(os.path.basename(filename)) + for line in cfile: + if found_start: + line = line.replace('\n', '') + if line.find(".format_name") != -1: + format_name = get_string_struct(line) + elif line.find(".protocol_name") != -1: + protocol_name = get_string_struct(line) + elif line == "};": + add_module(fheader, library, format_name, protocol_name) + found_start = False + elif line.find("static BlockDriver") != -1: + found_start = True + format_name = "" + protocol_name = "" + +def print_top(fheader): + fheader.write('''/* AUTOMATICALLY GENERATED, DO NOT MODIFY */ +/* + * QEMU Block Module Infrastructure + * + * Authors: + * Marc Mari <markmb@redhat.com> + */ + +''') + + fheader.write('''#ifndef QEMU_MODULE_BLOCK_H +#define QEMU_MODULE_BLOCK_H + +static const struct { + const char *format_name; + const char *protocol_name; + const char *library_name; +} block_driver_modules[] = {''') + +def print_bottom(fheader): + fheader.write(''' +}; + +#endif +''') + +if __name__ == '__main__': + # First argument: output file + # All other arguments: modules source files (.c) + output_file = sys.argv[1] + with open(output_file, 'w') as fheader: + print_top(fheader) + + for filename in sys.argv[2:]: + if os.path.isfile(filename): + process_file(fheader, filename) + else: + print("File " + filename + " does not exist.", file=sys.stderr) + sys.exit(1) + + print_bottom(fheader) + + sys.exit(0) diff --git a/scripts/mtest2make.py b/scripts/mtest2make.py new file mode 100644 index 000000000..02c0453e6 --- /dev/null +++ b/scripts/mtest2make.py @@ -0,0 +1,129 @@ +#! /usr/bin/env python3 + +# Create Makefile targets to run tests, from Meson's test introspection data. +# +# Author: Paolo Bonzini <pbonzini@redhat.com> + +from collections import defaultdict +import itertools +import json +import os +import shlex +import sys + +class Suite(object): + def __init__(self): + self.tests = list() + self.slow_tests = list() + self.executables = set() + +print(''' +SPEED = quick + +# $1 = environment, $2 = test command, $3 = test name, $4 = dir +.test-human-tap = $1 $(if $4,(cd $4 && $2),$2) -m $(SPEED) < /dev/null | ./scripts/tap-driver.pl --test-name="$3" $(if $(V),,--show-failures-only) +.test-human-exitcode = $1 $(PYTHON) scripts/test-driver.py $(if $4,-C$4) $(if $(V),--verbose) -- $2 < /dev/null +.test-tap-tap = $1 $(if $4,(cd $4 && $2),$2) < /dev/null | sed "s/^[a-z][a-z]* [0-9]*/& $3/" || true +.test-tap-exitcode = printf "%s\\n" 1..1 "`$1 $(if $4,(cd $4 && $2),$2) < /dev/null > /dev/null || echo "not "`ok 1 $3" +.test.human-print = echo $(if $(V),'$1 $2','Running test $3') && +.test.env = MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$(( $${RANDOM:-0} % 255 + 1))} + +# $1 = test name, $2 = test target (human or tap) +.test.run = $(call .test.$2-print,$(.test.env.$1),$(.test.cmd.$1),$(.test.name.$1)) $(call .test-$2-$(.test.driver.$1),$(.test.env.$1),$(.test.cmd.$1),$(.test.name.$1),$(.test.dir.$1)) + +.test.output-format = human +''') + +introspect = json.load(sys.stdin) +i = 0 + +def process_tests(test, targets, suites): + global i + env = ' '.join(('%s=%s' % (shlex.quote(k), shlex.quote(v)) + for k, v in test['env'].items())) + executable = test['cmd'][0] + try: + executable = os.path.relpath(executable) + except: + pass + if test['workdir'] is not None: + try: + test['cmd'][0] = os.path.relpath(executable, test['workdir']) + except: + test['cmd'][0] = executable + else: + test['cmd'][0] = executable + cmd = ' '.join((shlex.quote(x) for x in test['cmd'])) + driver = test['protocol'] if 'protocol' in test else 'exitcode' + + i += 1 + if test['workdir'] is not None: + print('.test.dir.%d := %s' % (i, shlex.quote(test['workdir']))) + + deps = (targets.get(x, []) for x in test['depends']) + deps = itertools.chain.from_iterable(deps) + + print('.test.name.%d := %s' % (i, test['name'])) + print('.test.driver.%d := %s' % (i, driver)) + print('.test.env.%d := $(.test.env) %s' % (i, env)) + print('.test.cmd.%d := %s' % (i, cmd)) + print('.test.deps.%d := %s' % (i, ' '.join(deps))) + print('.PHONY: run-test-%d' % (i,)) + print('run-test-%d: $(.test.deps.%d)' % (i,i)) + print('\t@$(call .test.run,%d,$(.test.output-format))' % (i,)) + + test_suites = test['suite'] or ['default'] + is_slow = any(s.endswith('-slow') for s in test_suites) + for s in test_suites: + # The suite name in the introspection info is "PROJECT:SUITE" + s = s.split(':')[1] + if s.endswith('-slow'): + s = s[:-5] + if is_slow: + suites[s].slow_tests.append(i) + else: + suites[s].tests.append(i) + suites[s].executables.add(executable) + +def emit_prolog(suites, prefix): + all_tap = ' '.join(('%s-report-%s.tap' % (prefix, k) for k in suites.keys())) + print('.PHONY: %s %s-report.tap %s' % (prefix, prefix, all_tap)) + print('%s: run-tests' % (prefix,)) + print('%s-report.tap %s: %s-report%%.tap: all' % (prefix, all_tap, prefix)) + print('''\t$(MAKE) .test.output-format=tap --quiet -Otarget V=1 %s$* | ./scripts/tap-merge.pl | tee "$@" \\ + | ./scripts/tap-driver.pl $(if $(V),, --show-failures-only)''' % (prefix, )) + +def emit_suite(name, suite, prefix): + executables = ' '.join(suite.executables) + slow_test_numbers = ' '.join((str(x) for x in suite.slow_tests)) + test_numbers = ' '.join((str(x) for x in suite.tests)) + target = '%s-%s' % (prefix, name) + print('.test.quick.%s := %s' % (target, test_numbers)) + print('.test.slow.%s := $(.test.quick.%s) %s' % (target, target, slow_test_numbers)) + print('%s-build: %s' % (prefix, executables)) + print('.PHONY: %s' % (target, )) + print('.PHONY: %s-report-%s.tap' % (prefix, name)) + print('%s: run-tests' % (target, )) + print('ifneq ($(filter %s %s, $(MAKECMDGOALS)),)' % (target, prefix)) + print('.tests += $(.test.$(SPEED).%s)' % (target, )) + print('endif') + print('all-%s-targets += %s' % (prefix, target)) + +targets = {t['id']: [os.path.relpath(f) for f in t['filename']] + for t in introspect['targets']} + +testsuites = defaultdict(Suite) +for test in introspect['tests']: + process_tests(test, targets, testsuites) +emit_prolog(testsuites, 'check') +for name, suite in testsuites.items(): + emit_suite(name, suite, 'check') + +benchsuites = defaultdict(Suite) +for test in introspect['benchmarks']: + process_tests(test, targets, benchsuites) +emit_prolog(benchsuites, 'bench') +for name, suite in benchsuites.items(): + emit_suite(name, suite, 'bench') + +print('run-tests: $(patsubst %, run-test-%, $(.tests))') diff --git a/scripts/nsis.py b/scripts/nsis.py new file mode 100644 index 000000000..5135a0583 --- /dev/null +++ b/scripts/nsis.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2020 Red Hat, Inc. +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import argparse +import glob +import os +import shutil +import subprocess +import tempfile + + +def signcode(path): + cmd = os.environ.get("SIGNCODE") + if not cmd: + return + subprocess.run([cmd, path]) + + +def main(): + parser = argparse.ArgumentParser(description="QEMU NSIS build helper.") + parser.add_argument("outfile") + parser.add_argument("prefix") + parser.add_argument("srcdir") + parser.add_argument("cpu") + parser.add_argument("nsisargs", nargs="*") + args = parser.parse_args() + + destdir = tempfile.mkdtemp() + try: + subprocess.run(["make", "install", "DESTDIR=" + destdir + os.path.sep]) + with open( + os.path.join(destdir + args.prefix, "system-emulations.nsh"), "w" + ) as nsh: + for exe in glob.glob( + os.path.join(destdir + args.prefix, "qemu-system-*.exe") + ): + exe = os.path.basename(exe) + arch = exe[12:-4] + nsh.write( + """ + Section "{0}" Section_{0} + SetOutPath "$INSTDIR" + File "${{BINDIR}}\\{1}" + SectionEnd + """.format( + arch, exe + ) + ) + + for exe in glob.glob(os.path.join(destdir + args.prefix, "*.exe")): + signcode(exe) + + makensis = [ + "makensis", + "-V2", + "-NOCD", + "-DSRCDIR=" + args.srcdir, + "-DBINDIR=" + destdir + args.prefix, + ] + dlldir = "w32" + if args.cpu == "x86_64": + dlldir = "w64" + makensis += ["-DW64"] + if os.path.exists(os.path.join(args.srcdir, "dll")): + makensis += ["-DDLLDIR={0}/dll/{1}".format(args.srcdir, dlldir)] + + makensis += ["-DOUTFILE=" + args.outfile] + args.nsisargs + subprocess.run(makensis) + signcode(args.outfile) + finally: + shutil.rmtree(destdir) + + +if __name__ == "__main__": + main() diff --git a/scripts/oss-fuzz/build.sh b/scripts/oss-fuzz/build.sh new file mode 100755 index 000000000..98b56e052 --- /dev/null +++ b/scripts/oss-fuzz/build.sh @@ -0,0 +1,115 @@ +#!/bin/sh -e +# +# OSS-Fuzz build script. See: +# https://google.github.io/oss-fuzz/getting-started/new-project-guide/#buildsh +# +# The file is consumed by: +# https://github.com/google/oss-fuzz/blob/master/projects/qemu/Dockerfiles +# +# This code is licensed under the GPL version 2 or later. See +# the COPYING file in the top-level directory. +# + +# build project +# e.g. +# ./autogen.sh +# ./configure +# make -j$(nproc) all + +# build fuzzers +# e.g. +# $CXX $CXXFLAGS -std=c++11 -Iinclude \ +# /path/to/name_of_fuzzer.cc -o $OUT/name_of_fuzzer \ +# -fsanitize=fuzzer /path/to/library.a + +fatal () { + echo "Error : ${*}, exiting." + exit 1 +} + +OSS_FUZZ_BUILD_DIR="./build-oss-fuzz/" + +# There seems to be a bug in clang-11 (used for builds on oss-fuzz) : +# accel/tcg/cputlb.o: In function `load_memop': +# accel/tcg/cputlb.c:1505: undefined reference to `qemu_build_not_reached' +# +# When building with optimization, the compiler is expected to prove that the +# statement cannot be reached, and remove it. For some reason clang-11 doesn't +# remove it, resulting in an unresolved reference to qemu_build_not_reached +# Undefine the __OPTIMIZE__ macro which compiler.h relies on to choose whether +# to " #define qemu_build_not_reached() g_assert_not_reached() " +EXTRA_CFLAGS="$CFLAGS -U __OPTIMIZE__" + +if ! { [ -e "./COPYING" ] && + [ -e "./MAINTAINERS" ] && + [ -e "./Makefile" ] && + [ -e "./docs" ] && + [ -e "./VERSION" ] && + [ -e "./linux-user" ] && + [ -e "./softmmu" ];} ; then + fatal "Please run the script from the top of the QEMU tree" +fi + +mkdir -p $OSS_FUZZ_BUILD_DIR || fatal "mkdir $OSS_FUZZ_BUILD_DIR failed" +cd $OSS_FUZZ_BUILD_DIR || fatal "cd $OSS_FUZZ_BUILD_DIR failed" + + +if [ -z ${OUT+x} ]; then + DEST_DIR=$(realpath "./DEST_DIR") +else + DEST_DIR=$OUT +fi + +mkdir -p "$DEST_DIR/lib/" # Copy the shared libraries here + +# Build once to get the list of dynamic lib paths, and copy them over +../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \ + --prefix="$DEST_DIR" --bindir="$DEST_DIR" --datadir="$DEST_DIR/data/" \ + --extra-cflags="$EXTRA_CFLAGS" --target-list="i386-softmmu" + +if ! make "-j$(nproc)" qemu-fuzz-i386; then + fatal "Build failed. Please specify a compiler with fuzzing support"\ + "using the \$CC and \$CXX environment variables"\ + "\nFor example: CC=clang CXX=clang++ $0" +fi + +if [ "$GITLAB_CI" != "true" ]; then + for i in $(ldd ./qemu-fuzz-i386 | cut -f3 -d' '); do + cp "$i" "$DEST_DIR/lib/" + done + rm qemu-fuzz-i386 + + # Build a second time to build the final binary with correct rpath + ../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \ + --prefix="$DEST_DIR" --bindir="$DEST_DIR" --datadir="$DEST_DIR/data/" \ + --extra-cflags="$EXTRA_CFLAGS" --extra-ldflags="-Wl,-rpath,\$ORIGIN/lib" \ + --target-list="i386-softmmu" + make "-j$(nproc)" qemu-fuzz-i386 V=1 +fi + +# Copy over the datadir +cp -r ../pc-bios/ "$DEST_DIR/pc-bios" + +targets=$(./qemu-fuzz-i386 | awk '$1 ~ /\*/ {print $2}') +base_copy="$DEST_DIR/qemu-fuzz-i386-target-$(echo "$targets" | head -n 1)" + +cp "./qemu-fuzz-i386" "$base_copy" + +# Run the fuzzer with no arguments, to print the help-string and get the list +# of available fuzz-targets. Copy over the qemu-fuzz-i386, naming it according +# to each available fuzz target (See 05509c8e6d fuzz: select fuzz target using +# executable name) +for target in $(echo "$targets" | tail -n +2); +do + # Ignore the generic-fuzz target, as it requires some environment variables + # to be configured. We have some generic-fuzz-{pc-q35, floppy, ...} targets + # that are thin wrappers around this target that set the required + # environment variables according to predefined configs. + if [ "$target" != "generic-fuzz" ]; then + ln $base_copy \ + "$DEST_DIR/qemu-fuzz-i386-target-$target" + fi +done + +echo "Done. The fuzzers are located in $DEST_DIR" +exit 0 diff --git a/scripts/oss-fuzz/instrumentation-filter-template b/scripts/oss-fuzz/instrumentation-filter-template new file mode 100644 index 000000000..76d2b6139 --- /dev/null +++ b/scripts/oss-fuzz/instrumentation-filter-template @@ -0,0 +1,15 @@ +# Code that we actually want the fuzzer to target +# See: https://clang.llvm.org/docs/SanitizerCoverage.html#disabling-instrumentation-without-source-modification +# +src:*/hw/* +src:*/include/hw/* +src:*/slirp/* +src:*/net/* + +# We don't care about coverage over fuzzer-specific code, however we should +# instrument the fuzzer entry-point so libFuzzer always sees at least some +# coverage - otherwise it will exit after the first input +src:*/tests/qtest/fuzz/fuzz.c + +# Enable instrumentation for all functions in those files +fun:* diff --git a/scripts/oss-fuzz/minimize_qtest_trace.py b/scripts/oss-fuzz/minimize_qtest_trace.py new file mode 100755 index 000000000..20825768c --- /dev/null +++ b/scripts/oss-fuzz/minimize_qtest_trace.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +This takes a crashing qtest trace and tries to remove superflous operations +""" + +import sys +import os +import subprocess +import time +import struct + +QEMU_ARGS = None +QEMU_PATH = None +TIMEOUT = 5 +CRASH_TOKEN = None + +# Minimization levels +M1 = False # try removing IO commands iteratively +M2 = False # try setting bits in operand of write/out to zero + +write_suffix_lookup = {"b": (1, "B"), + "w": (2, "H"), + "l": (4, "L"), + "q": (8, "Q")} + +def usage(): + sys.exit("""\ +Usage: + +QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} [Options] input_trace output_trace + +By default, will try to use the second-to-last line in the output to identify +whether the crash occred. Optionally, manually set a string that idenitifes the +crash by setting CRASH_TOKEN= + +Options: + +-M1: enable a loop around the remove minimizer, which may help decrease some + timing dependant instructions. Off by default. +-M2: try setting bits in operand of write/out to zero. Off by default. + +""".format((sys.argv[0]))) + +deduplication_note = """\n\ +Note: While trimming the input, sometimes the mutated trace triggers a different +type crash but indicates the same bug. Under this situation, our minimizer is +incapable of recognizing and stopped from removing it. In the future, we may +use a more sophisticated crash case deduplication method. +\n""" + +def check_if_trace_crashes(trace, path): + with open(path, "w") as tracefile: + tracefile.write("".join(trace)) + + rc = subprocess.Popen("timeout -s 9 {timeout}s {qemu_path} {qemu_args} 2>&1\ + < {trace_path}".format(timeout=TIMEOUT, + qemu_path=QEMU_PATH, + qemu_args=QEMU_ARGS, + trace_path=path), + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + encoding="utf-8") + global CRASH_TOKEN + if CRASH_TOKEN is None: + try: + outs, _ = rc.communicate(timeout=5) + CRASH_TOKEN = " ".join(outs.splitlines()[-2].split()[0:3]) + except subprocess.TimeoutExpired: + print("subprocess.TimeoutExpired") + return False + print("Identifying Crashes by this string: {}".format(CRASH_TOKEN)) + global deduplication_note + print(deduplication_note) + return True + + for line in iter(rc.stdout.readline, ""): + if "CLOSED" in line: + return False + if CRASH_TOKEN in line: + return True + + print("\nWarning:") + print(" There is no 'CLOSED'or CRASH_TOKEN in the stdout of subprocess.") + print(" Usually this indicates a different type of crash.\n") + return False + + +# If previous write commands write the same length of data at the same +# interval, we view it as a hint. +def split_write_hint(newtrace, i): + HINT_LEN = 3 # > 2 + if i <=(HINT_LEN-1): + return None + + #find previous continuous write traces + k = 0 + l = i-1 + writes = [] + while (k != HINT_LEN and l >= 0): + if newtrace[l].startswith("write "): + writes.append(newtrace[l]) + k += 1 + l -= 1 + elif newtrace[l] == "": + l -= 1 + else: + return None + if k != HINT_LEN: + return None + + length = int(writes[0].split()[2], 16) + for j in range(1, HINT_LEN): + if length != int(writes[j].split()[2], 16): + return None + + step = int(writes[0].split()[1], 16) - int(writes[1].split()[1], 16) + for j in range(1, HINT_LEN-1): + if step != int(writes[j].split()[1], 16) - \ + int(writes[j+1].split()[1], 16): + return None + + return (int(writes[0].split()[1], 16)+step, length) + + +def remove_lines(newtrace, outpath): + remove_step = 1 + i = 0 + while i < len(newtrace): + # 1.) Try to remove lines completely and reproduce the crash. + # If it works, we're done. + if (i+remove_step) >= len(newtrace): + remove_step = 1 + prior = newtrace[i:i+remove_step] + for j in range(i, i+remove_step): + newtrace[j] = "" + print("Removing {lines} ...\n".format(lines=prior)) + if check_if_trace_crashes(newtrace, outpath): + i += remove_step + # Double the number of lines to remove for next round + remove_step *= 2 + continue + # Failed to remove multiple IOs, fast recovery + if remove_step > 1: + for j in range(i, i+remove_step): + newtrace[j] = prior[j-i] + remove_step = 1 + continue + newtrace[i] = prior[0] # remove_step = 1 + + # 2.) Try to replace write{bwlq} commands with a write addr, len + # command. Since this can require swapping endianness, try both LE and + # BE options. We do this, so we can "trim" the writes in (3) + + if (newtrace[i].startswith("write") and not + newtrace[i].startswith("write ")): + suffix = newtrace[i].split()[0][-1] + assert(suffix in write_suffix_lookup) + addr = int(newtrace[i].split()[1], 16) + value = int(newtrace[i].split()[2], 16) + for endianness in ['<', '>']: + data = struct.pack("{end}{size}".format(end=endianness, + size=write_suffix_lookup[suffix][1]), + value) + newtrace[i] = "write {addr} {size} 0x{data}\n".format( + addr=hex(addr), + size=hex(write_suffix_lookup[suffix][0]), + data=data.hex()) + if(check_if_trace_crashes(newtrace, outpath)): + break + else: + newtrace[i] = prior[0] + + # 3.) If it is a qtest write command: write addr len data, try to split + # it into two separate write commands. If splitting the data operand + # from length/2^n bytes to the left does not work, try to move the pivot + # to the right side, then add one to n, until length/2^n == 0. The idea + # is to prune unneccessary bytes from long writes, while accommodating + # arbitrary MemoryRegion access sizes and alignments. + + # This algorithm will fail under some rare situations. + # e.g., xxxxxxxxxuxxxxxx (u is the unnecessary byte) + + if newtrace[i].startswith("write "): + addr = int(newtrace[i].split()[1], 16) + length = int(newtrace[i].split()[2], 16) + data = newtrace[i].split()[3][2:] + if length > 1: + + # Can we get a hint from previous writes? + hint = split_write_hint(newtrace, i) + if hint is not None: + hint_addr = hint[0] + hint_len = hint[1] + if hint_addr >= addr and hint_addr+hint_len <= addr+length: + newtrace[i] = "write {addr} {size} 0x{data}\n".format( + addr=hex(hint_addr), + size=hex(hint_len), + data=data[(hint_addr-addr)*2:\ + (hint_addr-addr)*2+hint_len*2]) + if check_if_trace_crashes(newtrace, outpath): + # next round + i += 1 + continue + newtrace[i] = prior[0] + + # Try splitting it using a binary approach + leftlength = int(length/2) + rightlength = length - leftlength + newtrace.insert(i+1, "") + power = 1 + while leftlength > 0: + newtrace[i] = "write {addr} {size} 0x{data}\n".format( + addr=hex(addr), + size=hex(leftlength), + data=data[:leftlength*2]) + newtrace[i+1] = "write {addr} {size} 0x{data}\n".format( + addr=hex(addr+leftlength), + size=hex(rightlength), + data=data[leftlength*2:]) + if check_if_trace_crashes(newtrace, outpath): + break + # move the pivot to right side + if leftlength < rightlength: + rightlength, leftlength = leftlength, rightlength + continue + power += 1 + leftlength = int(length/pow(2, power)) + rightlength = length - leftlength + if check_if_trace_crashes(newtrace, outpath): + i -= 1 + else: + newtrace[i] = prior[0] + del newtrace[i+1] + i += 1 + + +def clear_bits(newtrace, outpath): + # try setting bits in operands of out/write to zero + i = 0 + while i < len(newtrace): + if (not newtrace[i].startswith("write ") and not + newtrace[i].startswith("out")): + i += 1 + continue + # write ADDR SIZE DATA + # outx ADDR VALUE + print("\nzero setting bits: {}".format(newtrace[i])) + + prefix = " ".join(newtrace[i].split()[:-1]) + data = newtrace[i].split()[-1] + data_bin = bin(int(data, 16)) + data_bin_list = list(data_bin) + + for j in range(2, len(data_bin_list)): + prior = newtrace[i] + if (data_bin_list[j] == '1'): + data_bin_list[j] = '0' + data_try = hex(int("".join(data_bin_list), 2)) + # It seems qtest only accepts padded hex-values. + if len(data_try) % 2 == 1: + data_try = data_try[:2] + "0" + data_try[2:] + + newtrace[i] = "{prefix} {data_try}\n".format( + prefix=prefix, + data_try=data_try) + + if not check_if_trace_crashes(newtrace, outpath): + data_bin_list[j] = '1' + newtrace[i] = prior + i += 1 + + +def minimize_trace(inpath, outpath): + global TIMEOUT + with open(inpath) as f: + trace = f.readlines() + start = time.time() + if not check_if_trace_crashes(trace, outpath): + sys.exit("The input qtest trace didn't cause a crash...") + end = time.time() + print("Crashed in {} seconds".format(end-start)) + TIMEOUT = (end-start)*5 + print("Setting the timeout for {} seconds".format(TIMEOUT)) + + newtrace = trace[:] + global M1, M2 + + # remove lines + old_len = len(newtrace) + 1 + while(old_len > len(newtrace)): + old_len = len(newtrace) + print("trace lenth = ", old_len) + remove_lines(newtrace, outpath) + if not M1 and not M2: + break + newtrace = list(filter(lambda s: s != "", newtrace)) + assert(check_if_trace_crashes(newtrace, outpath)) + + # set bits to zero + if M2: + clear_bits(newtrace, outpath) + assert(check_if_trace_crashes(newtrace, outpath)) + + +if __name__ == '__main__': + if len(sys.argv) < 3: + usage() + if "-M1" in sys.argv: + M1 = True + if "-M2" in sys.argv: + M2 = True + QEMU_PATH = os.getenv("QEMU_PATH") + QEMU_ARGS = os.getenv("QEMU_ARGS") + if QEMU_PATH is None or QEMU_ARGS is None: + usage() + # if "accel" not in QEMU_ARGS: + # QEMU_ARGS += " -accel qtest" + CRASH_TOKEN = os.getenv("CRASH_TOKEN") + QEMU_ARGS += " -qtest stdio -monitor none -serial none " + minimize_trace(sys.argv[-2], sys.argv[-1]) diff --git a/scripts/oss-fuzz/output_reproducer.py b/scripts/oss-fuzz/output_reproducer.py new file mode 100755 index 000000000..3608b0600 --- /dev/null +++ b/scripts/oss-fuzz/output_reproducer.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Convert plain qtest traces to C or Bash reproducers + +Use this to help build bug-reports or create in-tree reproducers for bugs. +Note: This will not format C code for you. Pipe the output through +clang-format -style="{BasedOnStyle: llvm, IndentWidth: 4, ColumnLimit: 90}" +or similar +""" + +import sys +import os +import argparse +import textwrap +from datetime import date + +__author__ = "Alexander Bulekov <alxndr@bu.edu>" +__copyright__ = "Copyright (C) 2021, Red Hat, Inc." +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Alexander Bulekov" +__email__ = "alxndr@bu.edu" + + +def c_header(owner): + return """/* + * Autogenerated Fuzzer Test Case + * + * Copyright (c) {date} {owner} + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "libqos/libqtest.h" + + """.format(date=date.today().year, owner=owner) + +def c_comment(s): + """ Return a multi-line C comment. Assume the text is already wrapped """ + return "/*\n * " + "\n * ".join(s.splitlines()) + "\n*/" + +def print_c_function(s): + print("/* ") + for l in s.splitlines(): + print(" * {}".format(l)) + +def bash_reproducer(path, args, trace): + result = '\\\n'.join(textwrap.wrap("cat << EOF | {} {}".format(path, args), + 72, break_on_hyphens=False, + drop_whitespace=False)) + for l in trace.splitlines(): + result += "\n" + '\\\n'.join(textwrap.wrap(l,72,drop_whitespace=False)) + result += "\nEOF" + return result + +def c_reproducer(name, args, trace): + result = [] + result.append("""static void {}(void)\n{{""".format(name)) + + # libqtest will add its own qtest args, so get rid of them + args = args.replace("-accel qtest","") + args = args.replace(",accel=qtest","") + args = args.replace("-machine accel=qtest","") + args = args.replace("-qtest stdio","") + result.append("""QTestState *s = qtest_init("{}");""".format(args)) + for l in trace.splitlines(): + param = l.split() + cmd = param[0] + if cmd == "write": + buf = param[3][2:] #Get the 0x... buffer and trim the "0x" + assert len(buf)%2 == 0 + bufbytes = [buf[i:i+2] for i in range(0, len(buf), 2)] + bufstring = '\\x'+'\\x'.join(bufbytes) + addr = param[1] + size = param[2] + result.append("""qtest_bufwrite(s, {}, "{}", {});""".format( + addr, bufstring, size)) + elif cmd.startswith("in") or cmd.startswith("read"): + result.append("qtest_{}(s, {});".format( + cmd, param[1])) + elif cmd.startswith("out") or cmd.startswith("write"): + result.append("qtest_{}(s, {}, {});".format( + cmd, param[1], param[2])) + elif cmd == "clock_step": + if len(param) ==1: + result.append("qtest_clock_step_next(s);") + else: + result.append("qtest_clock_step(s, {});".format(param[1])) + result.append("qtest_quit(s);\n}") + return "\n".join(result) + +def c_main(name, arch): + return """int main(int argc, char **argv) +{{ + const char *arch = qtest_get_arch(); + + g_test_init(&argc, &argv, NULL); + + if (strcmp(arch, "{arch}") == 0) {{ + qtest_add_func("fuzz/{name}",{name}); + }} + + return g_test_run(); +}}""".format(name=name, arch=arch) + +def main(): + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group() + group.add_argument("-bash", help="Only output a copy-pastable bash command", + action="store_true") + group.add_argument("-c", help="Only output a c function", + action="store_true") + parser.add_argument('-owner', help="If generating complete C source code, \ + this specifies the Copyright owner", + nargs='?', default="<name of author>") + parser.add_argument("-no_comment", help="Don't include a bash reproducer \ + as a comment in the C reproducers", + action="store_true") + parser.add_argument('-name', help="The name of the c function", + nargs='?', default="test_fuzz") + parser.add_argument('input_trace', help="input QTest command sequence \ + (stdin by default)", + nargs='?', type=argparse.FileType('r'), + default=sys.stdin) + args = parser.parse_args() + + qemu_path = os.getenv("QEMU_PATH") + qemu_args = os.getenv("QEMU_ARGS") + if not qemu_args or not qemu_path: + print("Please set QEMU_PATH and QEMU_ARGS environment variables") + sys.exit(1) + + bash_args = qemu_args + if " -qtest stdio" not in qemu_args: + bash_args += " -qtest stdio" + + arch = qemu_path.split("-")[-1] + trace = args.input_trace.read().strip() + + if args.bash : + print(bash_reproducer(qemu_path, bash_args, trace)) + else: + output = "" + if not args.c: + output += c_header(args.owner) + "\n" + if not args.no_comment: + output += c_comment(bash_reproducer(qemu_path, bash_args, trace)) + output += c_reproducer(args.name, qemu_args, trace) + if not args.c: + output += c_main(args.name, arch) + print(output) + + +if __name__ == '__main__': + main() diff --git a/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py b/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py new file mode 100755 index 000000000..b154a2550 --- /dev/null +++ b/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Use this to convert qtest log info from a generic fuzzer input into a qtest +trace that you can feed into a standard qemu-system process. Example usage: + +QEMU_FUZZ_ARGS="-machine q35,accel=qtest" QEMU_FUZZ_OBJECTS="*" \ + ./i386-softmmu/qemu-fuzz-i386 --fuzz-target=generic-pci-fuzz +# .. Finds some crash +QTEST_LOG=1 FUZZ_SERIALIZE_QTEST=1 \ +QEMU_FUZZ_ARGS="-machine q35,accel=qtest" QEMU_FUZZ_OBJECTS="*" \ + ./i386-softmmu/qemu-fuzz-i386 --fuzz-target=generic-pci-fuzz + /path/to/crash 2> qtest_log_output +scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py qtest_log_output > qtest_trace +./i386-softmmu/qemu-fuzz-i386 -machine q35,accel=qtest \ + -qtest stdio < qtest_trace + +### Details ### + +Some fuzzer make use of hooks that allow us to populate some memory range, just +before a DMA read from that range. This means that the fuzzer can produce +activity that looks like: + [start] read from mmio addr + [end] read from mmio addr + [start] write to pio addr + [start] fill a DMA buffer just in time + [end] fill a DMA buffer just in time + [start] fill a DMA buffer just in time + [end] fill a DMA buffer just in time + [end] write to pio addr + [start] read from mmio addr + [end] read from mmio addr + +We annotate these "nested" DMA writes, so with QTEST_LOG=1 the QTest trace +might look something like: +[R +0.028431] readw 0x10000 +[R +0.028434] outl 0xc000 0xbeef # Triggers a DMA read from 0xbeef and 0xbf00 +[DMA][R +0.034639] write 0xbeef 0x2 0xAAAA +[DMA][R +0.034639] write 0xbf00 0x2 0xBBBB +[R +0.028431] readw 0xfc000 + +This script would reorder the above trace so it becomes: +readw 0x10000 +write 0xbeef 0x2 0xAAAA +write 0xbf00 0x2 0xBBBB +outl 0xc000 0xbeef +readw 0xfc000 + +I.e. by the time, 0xc000 tries to read from DMA, those DMA buffers have already +been set up, removing the need for the DMA hooks. We can simply provide this +reordered trace via -qtest stdio to reproduce the input + +Note: this won't work for traces where the device tries to read from the same +DMA region twice in between MMIO/PIO commands. E.g: + [R +0.028434] outl 0xc000 0xbeef + [DMA][R +0.034639] write 0xbeef 0x2 0xAAAA + [DMA][R +0.034639] write 0xbeef 0x2 0xBBBB + +The fuzzer will annotate suspected double-fetches with [DOUBLE-FETCH]. This +script looks for these tags and warns the users that the resulting trace might +not reproduce the bug. +""" + +import sys + +__author__ = "Alexander Bulekov <alxndr@bu.edu>" +__copyright__ = "Copyright (C) 2020, Red Hat, Inc." +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Alexander Bulekov" +__email__ = "alxndr@bu.edu" + + +def usage(): + sys.exit("Usage: {} /path/to/qtest_log_output".format((sys.argv[0]))) + + +def main(filename): + with open(filename, "r") as f: + trace = f.readlines() + + # Leave only lines that look like logged qtest commands + trace[:] = [x.strip() for x in trace if "[R +" in x + or "[S +" in x and "CLOSED" not in x] + + for i in range(len(trace)): + if i+1 < len(trace): + if "[DMA]" in trace[i+1]: + if "[DOUBLE-FETCH]" in trace[i+1]: + sys.stderr.write("Warning: Likely double fetch on line" + "{}.\n There will likely be problems " + "reproducing behavior with the " + "resulting qtest trace\n\n".format(i+1)) + trace[i], trace[i+1] = trace[i+1], trace[i] + for line in trace: + print(line.split("]")[-1].strip()) + + +if __name__ == '__main__': + if len(sys.argv) == 1: + usage() + main(sys.argv[1]) diff --git a/scripts/performance/dissect.py b/scripts/performance/dissect.py new file mode 100755 index 000000000..bf24f5092 --- /dev/null +++ b/scripts/performance/dissect.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + +# Print the percentage of instructions spent in each phase of QEMU +# execution. +# +# Syntax: +# dissect.py [-h] -- <qemu executable> [<qemu executable options>] \ +# <target executable> [<target executable options>] +# +# [-h] - Print the script arguments help message. +# +# Example of usage: +# dissect.py -- qemu-arm coulomb_double-arm +# +# This file is a part of the project "TCG Continuous Benchmarking". +# +# Copyright (C) 2020 Ahmed Karaman <ahmedkhaledkaraman@gmail.com> +# Copyright (C) 2020 Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import argparse +import os +import subprocess +import sys +import tempfile + + +def get_JIT_line(callgrind_data): + """ + Search for the first instance of the JIT call in + the callgrind_annotate output when ran using --tree=caller + This is equivalent to the self number of instructions of JIT. + + Parameters: + callgrind_data (list): callgrind_annotate output + + Returns: + (int): Line number + """ + line = -1 + for i in range(len(callgrind_data)): + if callgrind_data[i].strip('\n') and \ + callgrind_data[i].split()[-1] == "[???]": + line = i + break + if line == -1: + sys.exit("Couldn't locate the JIT call ... Exiting.") + return line + + +def main(): + # Parse the command line arguments + parser = argparse.ArgumentParser( + usage='dissect.py [-h] -- ' + '<qemu executable> [<qemu executable options>] ' + '<target executable> [<target executable options>]') + + parser.add_argument('command', type=str, nargs='+', help=argparse.SUPPRESS) + + args = parser.parse_args() + + # Extract the needed variables from the args + command = args.command + + # Insure that valgrind is installed + check_valgrind = subprocess.run( + ["which", "valgrind"], stdout=subprocess.DEVNULL) + if check_valgrind.returncode: + sys.exit("Please install valgrind before running the script.") + + # Save all intermediate files in a temporary directory + with tempfile.TemporaryDirectory() as tmpdirname: + # callgrind output file path + data_path = os.path.join(tmpdirname, "callgrind.data") + # callgrind_annotate output file path + annotate_out_path = os.path.join(tmpdirname, "callgrind_annotate.out") + + # Run callgrind + callgrind = subprocess.run((["valgrind", + "--tool=callgrind", + "--callgrind-out-file=" + data_path] + + command), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE) + if callgrind.returncode: + sys.exit(callgrind.stderr.decode("utf-8")) + + # Save callgrind_annotate output + with open(annotate_out_path, "w") as output: + callgrind_annotate = subprocess.run( + ["callgrind_annotate", data_path, "--tree=caller"], + stdout=output, + stderr=subprocess.PIPE) + if callgrind_annotate.returncode: + sys.exit(callgrind_annotate.stderr.decode("utf-8")) + + # Read the callgrind_annotate output to callgrind_data[] + callgrind_data = [] + with open(annotate_out_path, 'r') as data: + callgrind_data = data.readlines() + + # Line number with the total number of instructions + total_instructions_line_number = 20 + # Get the total number of instructions + total_instructions_line_data = \ + callgrind_data[total_instructions_line_number] + total_instructions = total_instructions_line_data.split()[0] + total_instructions = int(total_instructions.replace(',', '')) + + # Line number with the JIT self number of instructions + JIT_self_instructions_line_number = get_JIT_line(callgrind_data) + # Get the JIT self number of instructions + JIT_self_instructions_line_data = \ + callgrind_data[JIT_self_instructions_line_number] + JIT_self_instructions = JIT_self_instructions_line_data.split()[0] + JIT_self_instructions = int(JIT_self_instructions.replace(',', '')) + + # Line number with the JIT self + inclusive number of instructions + # It's the line above the first JIT call when running with --tree=caller + JIT_total_instructions_line_number = JIT_self_instructions_line_number-1 + # Get the JIT self + inclusive number of instructions + JIT_total_instructions_line_data = \ + callgrind_data[JIT_total_instructions_line_number] + JIT_total_instructions = JIT_total_instructions_line_data.split()[0] + JIT_total_instructions = int(JIT_total_instructions.replace(',', '')) + + # Calculate number of instructions in helpers and code generation + helpers_instructions = JIT_total_instructions-JIT_self_instructions + code_generation_instructions = total_instructions-JIT_total_instructions + + # Print results (Insert commas in large numbers) + # Print total number of instructions + print('{:<20}{:>20}\n'. + format("Total Instructions:", + format(total_instructions, ','))) + # Print code generation instructions and percentage + print('{:<20}{:>20}\t{:>6.3f}%'. + format("Code Generation:", + format(code_generation_instructions, ","), + (code_generation_instructions / total_instructions) * 100)) + # Print JIT instructions and percentage + print('{:<20}{:>20}\t{:>6.3f}%'. + format("JIT Execution:", + format(JIT_self_instructions, ","), + (JIT_self_instructions / total_instructions) * 100)) + # Print helpers instructions and percentage + print('{:<20}{:>20}\t{:>6.3f}%'. + format("Helpers:", + format(helpers_instructions, ","), + (helpers_instructions/total_instructions)*100)) + + +if __name__ == "__main__": + main() diff --git a/scripts/performance/topN_callgrind.py b/scripts/performance/topN_callgrind.py new file mode 100755 index 000000000..67c59197a --- /dev/null +++ b/scripts/performance/topN_callgrind.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 + +# Print the top N most executed functions in QEMU using callgrind. +# Syntax: +# topN_callgrind.py [-h] [-n] <number of displayed top functions> -- \ +# <qemu executable> [<qemu executable options>] \ +# <target executable> [<target execurable options>] +# +# [-h] - Print the script arguments help message. +# [-n] - Specify the number of top functions to print. +# - If this flag is not specified, the tool defaults to 25. +# +# Example of usage: +# topN_callgrind.py -n 20 -- qemu-arm coulomb_double-arm +# +# This file is a part of the project "TCG Continuous Benchmarking". +# +# Copyright (C) 2020 Ahmed Karaman <ahmedkhaledkaraman@gmail.com> +# Copyright (C) 2020 Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import argparse +import os +import subprocess +import sys + + +# Parse the command line arguments +parser = argparse.ArgumentParser( + usage='topN_callgrind.py [-h] [-n] <number of displayed top functions> -- ' + '<qemu executable> [<qemu executable options>] ' + '<target executable> [<target executable options>]') + +parser.add_argument('-n', dest='top', type=int, default=25, + help='Specify the number of top functions to print.') + +parser.add_argument('command', type=str, nargs='+', help=argparse.SUPPRESS) + +args = parser.parse_args() + +# Extract the needed variables from the args +command = args.command +top = args.top + +# Insure that valgrind is installed +check_valgrind_presence = subprocess.run(["which", "valgrind"], + stdout=subprocess.DEVNULL) +if check_valgrind_presence.returncode: + sys.exit("Please install valgrind before running the script!") + +# Run callgrind +callgrind = subprocess.run(( + ["valgrind", "--tool=callgrind", "--callgrind-out-file=/tmp/callgrind.data"] + + command), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE) +if callgrind.returncode: + sys.exit(callgrind.stderr.decode("utf-8")) + +# Save callgrind_annotate output to /tmp/callgrind_annotate.out +with open("/tmp/callgrind_annotate.out", "w") as output: + callgrind_annotate = subprocess.run(["callgrind_annotate", + "/tmp/callgrind.data"], + stdout=output, + stderr=subprocess.PIPE) + if callgrind_annotate.returncode: + os.unlink('/tmp/callgrind.data') + output.close() + os.unlink('/tmp/callgrind_annotate.out') + sys.exit(callgrind_annotate.stderr.decode("utf-8")) + +# Read the callgrind_annotate output to callgrind_data[] +callgrind_data = [] +with open('/tmp/callgrind_annotate.out', 'r') as data: + callgrind_data = data.readlines() + +# Line number with the total number of instructions +total_instructions_line_number = 20 + +# Get the total number of instructions +total_instructions_line_data = callgrind_data[total_instructions_line_number] +total_number_of_instructions = total_instructions_line_data.split(' ')[0] +total_number_of_instructions = int( + total_number_of_instructions.replace(',', '')) + +# Line number with the top function +first_func_line = 25 + +# Number of functions recorded by callgrind, last two lines are always empty +number_of_functions = len(callgrind_data) - first_func_line - 2 + +# Limit the number of top functions to "top" +number_of_top_functions = (top if number_of_functions > + top else number_of_functions) + +# Store the data of the top functions in top_functions[] +top_functions = callgrind_data[first_func_line: + first_func_line + number_of_top_functions] + +# Print table header +print('{:>4} {:>10} {:<30} {}\n{} {} {} {}'.format('No.', + 'Percentage', + 'Function Name', + 'Source File', + '-' * 4, + '-' * 10, + '-' * 30, + '-' * 30, + )) + +# Print top N functions +for (index, function) in enumerate(top_functions, start=1): + function_data = function.split() + # Calculate function percentage + function_instructions = float(function_data[0].replace(',', '')) + function_percentage = (function_instructions / + total_number_of_instructions)*100 + # Get function name and source files path + function_source_file, function_name = function_data[1].split(':') + # Print extracted data + print('{:>4} {:>9.3f}% {:<30} {}'.format(index, + round(function_percentage, 3), + function_name, + function_source_file)) + +# Remove intermediate files +os.unlink('/tmp/callgrind.data') +os.unlink('/tmp/callgrind_annotate.out') diff --git a/scripts/performance/topN_perf.py b/scripts/performance/topN_perf.py new file mode 100755 index 000000000..07be195fc --- /dev/null +++ b/scripts/performance/topN_perf.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +# Print the top N most executed functions in QEMU using perf. +# Syntax: +# topN_perf.py [-h] [-n] <number of displayed top functions> -- \ +# <qemu executable> [<qemu executable options>] \ +# <target executable> [<target execurable options>] +# +# [-h] - Print the script arguments help message. +# [-n] - Specify the number of top functions to print. +# - If this flag is not specified, the tool defaults to 25. +# +# Example of usage: +# topN_perf.py -n 20 -- qemu-arm coulomb_double-arm +# +# This file is a part of the project "TCG Continuous Benchmarking". +# +# Copyright (C) 2020 Ahmed Karaman <ahmedkhaledkaraman@gmail.com> +# Copyright (C) 2020 Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import argparse +import os +import subprocess +import sys + + +# Parse the command line arguments +parser = argparse.ArgumentParser( + usage='topN_perf.py [-h] [-n] <number of displayed top functions > -- ' + '<qemu executable> [<qemu executable options>] ' + '<target executable> [<target executable options>]') + +parser.add_argument('-n', dest='top', type=int, default=25, + help='Specify the number of top functions to print.') + +parser.add_argument('command', type=str, nargs='+', help=argparse.SUPPRESS) + +args = parser.parse_args() + +# Extract the needed variables from the args +command = args.command +top = args.top + +# Insure that perf is installed +check_perf_presence = subprocess.run(["which", "perf"], + stdout=subprocess.DEVNULL) +if check_perf_presence.returncode: + sys.exit("Please install perf before running the script!") + +# Insure user has previllage to run perf +check_perf_executability = subprocess.run(["perf", "stat", "ls", "/"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) +if check_perf_executability.returncode: + sys.exit( +""" +Error: +You may not have permission to collect stats. + +Consider tweaking /proc/sys/kernel/perf_event_paranoid, +which controls use of the performance events system by +unprivileged users (without CAP_SYS_ADMIN). + + -1: Allow use of (almost) all events by all users + Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK + 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN + Disallow raw tracepoint access by users without CAP_SYS_ADMIN + 1: Disallow CPU event access by users without CAP_SYS_ADMIN + 2: Disallow kernel profiling by users without CAP_SYS_ADMIN + +To make this setting permanent, edit /etc/sysctl.conf too, e.g.: + kernel.perf_event_paranoid = -1 + +* Alternatively, you can run this script under sudo privileges. +""" +) + +# Run perf record +perf_record = subprocess.run((["perf", "record", "--output=/tmp/perf.data"] + + command), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE) +if perf_record.returncode: + os.unlink('/tmp/perf.data') + sys.exit(perf_record.stderr.decode("utf-8")) + +# Save perf report output to /tmp/perf_report.out +with open("/tmp/perf_report.out", "w") as output: + perf_report = subprocess.run( + ["perf", "report", "--input=/tmp/perf.data", "--stdio"], + stdout=output, + stderr=subprocess.PIPE) + if perf_report.returncode: + os.unlink('/tmp/perf.data') + output.close() + os.unlink('/tmp/perf_report.out') + sys.exit(perf_report.stderr.decode("utf-8")) + +# Read the reported data to functions[] +functions = [] +with open("/tmp/perf_report.out", "r") as data: + # Only read lines that are not comments (comments start with #) + # Only read lines that are not empty + functions = [line for line in data.readlines() if line and line[0] + != '#' and line[0] != "\n"] + +# Limit the number of top functions to "top" +number_of_top_functions = top if len(functions) > top else len(functions) + +# Store the data of the top functions in top_functions[] +top_functions = functions[:number_of_top_functions] + +# Print table header +print('{:>4} {:>10} {:<30} {}\n{} {} {} {}'.format('No.', + 'Percentage', + 'Name', + 'Invoked by', + '-' * 4, + '-' * 10, + '-' * 30, + '-' * 25)) + +# Print top N functions +for (index, function) in enumerate(top_functions, start=1): + function_data = function.split() + function_percentage = function_data[0] + function_name = function_data[-1] + function_invoker = ' '.join(function_data[2:-2]) + print('{:>4} {:>10} {:<30} {}'.format(index, + function_percentage, + function_name, + function_invoker)) + +# Remove intermediate files +os.unlink('/tmp/perf.data') +os.unlink('/tmp/perf_report.out') diff --git a/scripts/qapi-gen.py b/scripts/qapi-gen.py new file mode 100644 index 000000000..f3518d29a --- /dev/null +++ b/scripts/qapi-gen.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. + +""" +QAPI code generation execution shim. + +This standalone script exists primarily to facilitate the running of the QAPI +code generator without needing to install the python module to the current +execution environment. +""" + +import sys + +from qapi import main + +if __name__ == '__main__': + sys.exit(main.main()) diff --git a/scripts/qapi/.flake8 b/scripts/qapi/.flake8 new file mode 100644 index 000000000..6b158c68b --- /dev/null +++ b/scripts/qapi/.flake8 @@ -0,0 +1,2 @@ +[flake8] +extend-ignore = E722 # Prefer pylint's bare-except checks to flake8's diff --git a/scripts/qapi/.isort.cfg b/scripts/qapi/.isort.cfg new file mode 100644 index 000000000..643caa1fb --- /dev/null +++ b/scripts/qapi/.isort.cfg @@ -0,0 +1,7 @@ +[settings] +force_grid_wrap=4 +force_sort_within_sections=True +include_trailing_comma=True +line_length=72 +lines_after_imports=2 +multi_line_output=3 diff --git a/scripts/qapi/__init__.py b/scripts/qapi/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/scripts/qapi/__init__.py diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py new file mode 100644 index 000000000..21001bbd6 --- /dev/null +++ b/scripts/qapi/commands.py @@ -0,0 +1,338 @@ +""" +QAPI command marshaller generator + +Copyright IBM, Corp. 2011 +Copyright (C) 2014-2018 Red Hat, Inc. + +Authors: + Anthony Liguori <aliguori@us.ibm.com> + Michael Roth <mdroth@linux.vnet.ibm.com> + Markus Armbruster <armbru@redhat.com> + +This work is licensed under the terms of the GNU GPL, version 2. +See the COPYING file in the top-level directory. +""" + +from typing import ( + Dict, + List, + Optional, + Set, +) + +from .common import c_name, mcgen +from .gen import ( + QAPIGenC, + QAPISchemaModularCVisitor, + build_params, + ifcontext, + gen_special_features, +) +from .schema import ( + QAPISchema, + QAPISchemaFeature, + QAPISchemaIfCond, + QAPISchemaObjectType, + QAPISchemaType, +) +from .source import QAPISourceInfo + + +def gen_command_decl(name: str, + arg_type: Optional[QAPISchemaObjectType], + boxed: bool, + ret_type: Optional[QAPISchemaType]) -> str: + return mcgen(''' +%(c_type)s qmp_%(c_name)s(%(params)s); +''', + c_type=(ret_type and ret_type.c_type()) or 'void', + c_name=c_name(name), + params=build_params(arg_type, boxed, 'Error **errp')) + + +def gen_call(name: str, + arg_type: Optional[QAPISchemaObjectType], + boxed: bool, + ret_type: Optional[QAPISchemaType]) -> str: + ret = '' + + argstr = '' + if boxed: + assert arg_type + argstr = '&arg, ' + elif arg_type: + assert not arg_type.variants + for memb in arg_type.members: + if memb.optional: + argstr += 'arg.has_%s, ' % c_name(memb.name) + argstr += 'arg.%s, ' % c_name(memb.name) + + lhs = '' + if ret_type: + lhs = 'retval = ' + + ret = mcgen(''' + + %(lhs)sqmp_%(c_name)s(%(args)s&err); + error_propagate(errp, err); +''', + c_name=c_name(name), args=argstr, lhs=lhs) + if ret_type: + ret += mcgen(''' + if (err) { + goto out; + } + + qmp_marshal_output_%(c_name)s(retval, ret, errp); +''', + c_name=ret_type.c_name()) + return ret + + +def gen_marshal_output(ret_type: QAPISchemaType) -> str: + return mcgen(''' + +static void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in, + QObject **ret_out, Error **errp) +{ + Visitor *v; + + v = qobject_output_visitor_new_qmp(ret_out); + if (visit_type_%(c_name)s(v, "unused", &ret_in, errp)) { + visit_complete(v, ret_out); + } + visit_free(v); + v = qapi_dealloc_visitor_new(); + visit_type_%(c_name)s(v, "unused", &ret_in, NULL); + visit_free(v); +} +''', + c_type=ret_type.c_type(), c_name=ret_type.c_name()) + + +def build_marshal_proto(name: str) -> str: + return ('void qmp_marshal_%s(QDict *args, QObject **ret, Error **errp)' + % c_name(name)) + + +def gen_marshal_decl(name: str) -> str: + return mcgen(''' +%(proto)s; +''', + proto=build_marshal_proto(name)) + + +def gen_marshal(name: str, + arg_type: Optional[QAPISchemaObjectType], + boxed: bool, + ret_type: Optional[QAPISchemaType]) -> str: + have_args = boxed or (arg_type and not arg_type.is_empty()) + if have_args: + assert arg_type is not None + arg_type_c_name = arg_type.c_name() + + ret = mcgen(''' + +%(proto)s +{ + Error *err = NULL; + bool ok = false; + Visitor *v; +''', + proto=build_marshal_proto(name)) + + if ret_type: + ret += mcgen(''' + %(c_type)s retval; +''', + c_type=ret_type.c_type()) + + if have_args: + ret += mcgen(''' + %(c_name)s arg = {0}; +''', + c_name=arg_type_c_name) + + ret += mcgen(''' + + v = qobject_input_visitor_new_qmp(QOBJECT(args)); + if (!visit_start_struct(v, NULL, NULL, 0, errp)) { + goto out; + } +''') + + if have_args: + ret += mcgen(''' + if (visit_type_%(c_arg_type)s_members(v, &arg, errp)) { + ok = visit_check_struct(v, errp); + } +''', + c_arg_type=arg_type_c_name) + else: + ret += mcgen(''' + ok = visit_check_struct(v, errp); +''') + + ret += mcgen(''' + visit_end_struct(v, NULL); + if (!ok) { + goto out; + } +''') + + ret += gen_call(name, arg_type, boxed, ret_type) + + ret += mcgen(''' + +out: + visit_free(v); +''') + + ret += mcgen(''' + v = qapi_dealloc_visitor_new(); + visit_start_struct(v, NULL, NULL, 0, NULL); +''') + + if have_args: + ret += mcgen(''' + visit_type_%(c_arg_type)s_members(v, &arg, NULL); +''', + c_arg_type=arg_type_c_name) + + ret += mcgen(''' + visit_end_struct(v, NULL); + visit_free(v); +''') + + ret += mcgen(''' +} +''') + return ret + + +def gen_register_command(name: str, + features: List[QAPISchemaFeature], + success_response: bool, + allow_oob: bool, + allow_preconfig: bool, + coroutine: bool) -> str: + options = [] + + if not success_response: + options += ['QCO_NO_SUCCESS_RESP'] + if allow_oob: + options += ['QCO_ALLOW_OOB'] + if allow_preconfig: + options += ['QCO_ALLOW_PRECONFIG'] + if coroutine: + options += ['QCO_COROUTINE'] + + ret = mcgen(''' + qmp_register_command(cmds, "%(name)s", + qmp_marshal_%(c_name)s, %(opts)s, %(feats)s); +''', + name=name, c_name=c_name(name), + opts=' | '.join(options) or 0, + feats=gen_special_features(features)) + return ret + + +class QAPISchemaGenCommandVisitor(QAPISchemaModularCVisitor): + def __init__(self, prefix: str): + super().__init__( + prefix, 'qapi-commands', + ' * Schema-defined QAPI/QMP commands', None, __doc__) + self._visited_ret_types: Dict[QAPIGenC, Set[QAPISchemaType]] = {} + + def _begin_user_module(self, name: str) -> None: + self._visited_ret_types[self._genc] = set() + commands = self._module_basename('qapi-commands', name) + types = self._module_basename('qapi-types', name) + visit = self._module_basename('qapi-visit', name) + self._genc.add(mcgen(''' +#include "qemu/osdep.h" +#include "qapi/compat-policy.h" +#include "qapi/visitor.h" +#include "qapi/qmp/qdict.h" +#include "qapi/dealloc-visitor.h" +#include "qapi/error.h" +#include "%(visit)s.h" +#include "%(commands)s.h" + +''', + commands=commands, visit=visit)) + self._genh.add(mcgen(''' +#include "%(types)s.h" + +''', + types=types)) + + def visit_begin(self, schema: QAPISchema) -> None: + self._add_module('./init', ' * QAPI Commands initialization') + self._genh.add(mcgen(''' +#include "qapi/qmp/dispatch.h" + +void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds); +''', + c_prefix=c_name(self._prefix, protect=False))) + self._genc.add(mcgen(''' +#include "qemu/osdep.h" +#include "%(prefix)sqapi-commands.h" +#include "%(prefix)sqapi-init-commands.h" + +void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds) +{ + QTAILQ_INIT(cmds); + +''', + prefix=self._prefix, + c_prefix=c_name(self._prefix, protect=False))) + + def visit_end(self) -> None: + with self._temp_module('./init'): + self._genc.add(mcgen(''' +} +''')) + + def visit_command(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + arg_type: Optional[QAPISchemaObjectType], + ret_type: Optional[QAPISchemaType], + gen: bool, + success_response: bool, + boxed: bool, + allow_oob: bool, + allow_preconfig: bool, + coroutine: bool) -> None: + if not gen: + return + # FIXME: If T is a user-defined type, the user is responsible + # for making this work, i.e. to make T's condition the + # conjunction of the T-returning commands' conditions. If T + # is a built-in type, this isn't possible: the + # qmp_marshal_output_T() will be generated unconditionally. + if ret_type and ret_type not in self._visited_ret_types[self._genc]: + self._visited_ret_types[self._genc].add(ret_type) + with ifcontext(ret_type.ifcond, + self._genh, self._genc): + self._genc.add(gen_marshal_output(ret_type)) + with ifcontext(ifcond, self._genh, self._genc): + self._genh.add(gen_command_decl(name, arg_type, boxed, ret_type)) + self._genh.add(gen_marshal_decl(name)) + self._genc.add(gen_marshal(name, arg_type, boxed, ret_type)) + with self._temp_module('./init'): + with ifcontext(ifcond, self._genh, self._genc): + self._genc.add(gen_register_command( + name, features, success_response, allow_oob, + allow_preconfig, coroutine)) + + +def gen_commands(schema: QAPISchema, + output_dir: str, + prefix: str) -> None: + vis = QAPISchemaGenCommandVisitor(prefix) + schema.visit(vis) + vis.write(output_dir) diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py new file mode 100644 index 000000000..489273574 --- /dev/null +++ b/scripts/qapi/common.py @@ -0,0 +1,251 @@ +# +# QAPI helper library +# +# Copyright IBM, Corp. 2011 +# Copyright (c) 2013-2018 Red Hat Inc. +# +# Authors: +# Anthony Liguori <aliguori@us.ibm.com> +# Markus Armbruster <armbru@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. + +import re +from typing import ( + Any, + Dict, + Match, + Optional, + Sequence, + Union, +) + + +#: Magic string that gets removed along with all space to its right. +EATSPACE = '\033EATSPACE.' +POINTER_SUFFIX = ' *' + EATSPACE + + +def camel_to_upper(value: str) -> str: + """ + Converts CamelCase to CAMEL_CASE. + + Examples:: + + ENUMName -> ENUM_NAME + EnumName1 -> ENUM_NAME1 + ENUM_NAME -> ENUM_NAME + ENUM_NAME1 -> ENUM_NAME1 + ENUM_Name2 -> ENUM_NAME2 + ENUM24_Name -> ENUM24_NAME + """ + c_fun_str = c_name(value, False) + if value.isupper(): + return c_fun_str + + new_name = '' + length = len(c_fun_str) + for i in range(length): + char = c_fun_str[i] + # When char is upper case and no '_' appears before, do more checks + if char.isupper() and (i > 0) and c_fun_str[i - 1] != '_': + if i < length - 1 and c_fun_str[i + 1].islower(): + new_name += '_' + elif c_fun_str[i - 1].isdigit(): + new_name += '_' + new_name += char + return new_name.lstrip('_').upper() + + +def c_enum_const(type_name: str, + const_name: str, + prefix: Optional[str] = None) -> str: + """ + Generate a C enumeration constant name. + + :param type_name: The name of the enumeration. + :param const_name: The name of this constant. + :param prefix: Optional, prefix that overrides the type_name. + """ + if prefix is not None: + type_name = prefix + return camel_to_upper(type_name) + '_' + c_name(const_name, False).upper() + + +def c_name(name: str, protect: bool = True) -> str: + """ + Map ``name`` to a valid C identifier. + + Used for converting 'name' from a 'name':'type' qapi definition + into a generated struct member, as well as converting type names + into substrings of a generated C function name. + + '__a.b_c' -> '__a_b_c', 'x-foo' -> 'x_foo' + protect=True: 'int' -> 'q_int'; protect=False: 'int' -> 'int' + + :param name: The name to map. + :param protect: If true, avoid returning certain ticklish identifiers + (like C keywords) by prepending ``q_``. + """ + # ANSI X3J11/88-090, 3.1.1 + c89_words = set(['auto', 'break', 'case', 'char', 'const', 'continue', + 'default', 'do', 'double', 'else', 'enum', 'extern', + 'float', 'for', 'goto', 'if', 'int', 'long', 'register', + 'return', 'short', 'signed', 'sizeof', 'static', + 'struct', 'switch', 'typedef', 'union', 'unsigned', + 'void', 'volatile', 'while']) + # ISO/IEC 9899:1999, 6.4.1 + c99_words = set(['inline', 'restrict', '_Bool', '_Complex', '_Imaginary']) + # ISO/IEC 9899:2011, 6.4.1 + c11_words = set(['_Alignas', '_Alignof', '_Atomic', '_Generic', + '_Noreturn', '_Static_assert', '_Thread_local']) + # GCC http://gcc.gnu.org/onlinedocs/gcc-4.7.1/gcc/C-Extensions.html + # excluding _.* + gcc_words = set(['asm', 'typeof']) + # C++ ISO/IEC 14882:2003 2.11 + cpp_words = set(['bool', 'catch', 'class', 'const_cast', 'delete', + 'dynamic_cast', 'explicit', 'false', 'friend', 'mutable', + 'namespace', 'new', 'operator', 'private', 'protected', + 'public', 'reinterpret_cast', 'static_cast', 'template', + 'this', 'throw', 'true', 'try', 'typeid', 'typename', + 'using', 'virtual', 'wchar_t', + # alternative representations + 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', + 'not_eq', 'or', 'or_eq', 'xor', 'xor_eq']) + # namespace pollution: + polluted_words = set(['unix', 'errno', 'mips', 'sparc', 'i386']) + name = re.sub(r'[^A-Za-z0-9_]', '_', name) + if protect and (name in (c89_words | c99_words | c11_words | gcc_words + | cpp_words | polluted_words) + or name[0].isdigit()): + return 'q_' + name + return name + + +class Indentation: + """ + Indentation level management. + + :param initial: Initial number of spaces, default 0. + """ + def __init__(self, initial: int = 0) -> None: + self._level = initial + + def __repr__(self) -> str: + return "{}({:d})".format(type(self).__name__, self._level) + + def __str__(self) -> str: + """Return the current indentation as a string of spaces.""" + return ' ' * self._level + + def increase(self, amount: int = 4) -> None: + """Increase the indentation level by ``amount``, default 4.""" + self._level += amount + + def decrease(self, amount: int = 4) -> None: + """Decrease the indentation level by ``amount``, default 4.""" + assert amount <= self._level + self._level -= amount + + +#: Global, current indent level for code generation. +indent = Indentation() + + +def cgen(code: str, **kwds: object) -> str: + """ + Generate ``code`` with ``kwds`` interpolated. + + Obey `indent`, and strip `EATSPACE`. + """ + raw = code % kwds + pfx = str(indent) + if pfx: + raw = re.sub(r'^(?!(#|$))', pfx, raw, flags=re.MULTILINE) + return re.sub(re.escape(EATSPACE) + r' *', '', raw) + + +def mcgen(code: str, **kwds: object) -> str: + if code[0] == '\n': + code = code[1:] + return cgen(code, **kwds) + + +def c_fname(filename: str) -> str: + return re.sub(r'[^A-Za-z0-9_]', '_', filename) + + +def guardstart(name: str) -> str: + return mcgen(''' +#ifndef %(name)s +#define %(name)s + +''', + name=c_fname(name).upper()) + + +def guardend(name: str) -> str: + return mcgen(''' + +#endif /* %(name)s */ +''', + name=c_fname(name).upper()) + + +def gen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]], + cond_fmt: str, not_fmt: str, + all_operator: str, any_operator: str) -> str: + + def do_gen(ifcond: Union[str, Dict[str, Any]], + need_parens: bool) -> str: + if isinstance(ifcond, str): + return cond_fmt % ifcond + assert isinstance(ifcond, dict) and len(ifcond) == 1 + if 'not' in ifcond: + return not_fmt % do_gen(ifcond['not'], True) + if 'all' in ifcond: + gen = gen_infix(all_operator, ifcond['all']) + else: + gen = gen_infix(any_operator, ifcond['any']) + if need_parens: + gen = '(' + gen + ')' + return gen + + def gen_infix(operator: str, operands: Sequence[Any]) -> str: + return operator.join([do_gen(o, True) for o in operands]) + + if not ifcond: + return '' + return do_gen(ifcond, False) + + +def cgen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]]) -> str: + return gen_ifcond(ifcond, 'defined(%s)', '!%s', ' && ', ' || ') + + +def docgen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]]) -> str: + # TODO Doc generated for conditions needs polish + return gen_ifcond(ifcond, '%s', 'not %s', ' and ', ' or ') + + +def gen_if(cond: str) -> str: + if not cond: + return '' + return mcgen(''' +#if %(cond)s +''', cond=cond) + + +def gen_endif(cond: str) -> str: + if not cond: + return '' + return mcgen(''' +#endif /* %(cond)s */ +''', cond=cond) + + +def must_match(pattern: str, string: str) -> Match[str]: + match = re.match(pattern, string) + assert match is not None + return match diff --git a/scripts/qapi/error.py b/scripts/qapi/error.py new file mode 100644 index 000000000..e35e4ddb2 --- /dev/null +++ b/scripts/qapi/error.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2017-2019 Red Hat Inc. +# +# Authors: +# Markus Armbruster <armbru@redhat.com> +# Marc-André Lureau <marcandre.lureau@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. + +""" +QAPI error classes + +Common error classes used throughout the package. Additional errors may +be defined in other modules. At present, `QAPIParseError` is defined in +parser.py. +""" + +from typing import Optional + +from .source import QAPISourceInfo + + +class QAPIError(Exception): + """Base class for all exceptions from the QAPI package.""" + + +class QAPISourceError(QAPIError): + """Error class for all exceptions identifying a source location.""" + def __init__(self, + info: Optional[QAPISourceInfo], + msg: str, + col: Optional[int] = None): + super().__init__() + self.info = info + self.msg = msg + self.col = col + + def __str__(self) -> str: + assert self.info is not None + loc = str(self.info) + if self.col is not None: + assert self.info.line is not None + loc += ':%s' % self.col + return loc + ': ' + self.msg + + +class QAPISemError(QAPISourceError): + """Error class for semantic QAPI errors.""" diff --git a/scripts/qapi/events.py b/scripts/qapi/events.py new file mode 100644 index 000000000..27b44c49f --- /dev/null +++ b/scripts/qapi/events.py @@ -0,0 +1,252 @@ +""" +QAPI event generator + +Copyright (c) 2014 Wenchao Xia +Copyright (c) 2015-2018 Red Hat Inc. + +Authors: + Wenchao Xia <wenchaoqemu@gmail.com> + Markus Armbruster <armbru@redhat.com> + +This work is licensed under the terms of the GNU GPL, version 2. +See the COPYING file in the top-level directory. +""" + +from typing import List, Optional + +from .common import c_enum_const, c_name, mcgen +from .gen import QAPISchemaModularCVisitor, build_params, ifcontext +from .schema import ( + QAPISchema, + QAPISchemaEnumMember, + QAPISchemaFeature, + QAPISchemaIfCond, + QAPISchemaObjectType, +) +from .source import QAPISourceInfo +from .types import gen_enum, gen_enum_lookup + + +def build_event_send_proto(name: str, + arg_type: Optional[QAPISchemaObjectType], + boxed: bool) -> str: + return 'void qapi_event_send_%(c_name)s(%(param)s)' % { + 'c_name': c_name(name.lower()), + 'param': build_params(arg_type, boxed)} + + +def gen_event_send_decl(name: str, + arg_type: Optional[QAPISchemaObjectType], + boxed: bool) -> str: + return mcgen(''' + +%(proto)s; +''', + proto=build_event_send_proto(name, arg_type, boxed)) + + +def gen_param_var(typ: QAPISchemaObjectType) -> str: + """ + Generate a struct variable holding the event parameters. + + Initialize it with the function arguments defined in `gen_event_send`. + """ + assert not typ.variants + ret = mcgen(''' + %(c_name)s param = { +''', + c_name=typ.c_name()) + sep = ' ' + for memb in typ.members: + ret += sep + sep = ', ' + if memb.optional: + ret += 'has_' + c_name(memb.name) + sep + if memb.type.name == 'str': + # Cast away const added in build_params() + ret += '(char *)' + ret += c_name(memb.name) + ret += mcgen(''' + + }; +''') + if not typ.is_implicit(): + ret += mcgen(''' + %(c_name)s *arg = ¶m; +''', + c_name=typ.c_name()) + return ret + + +def gen_event_send(name: str, + arg_type: Optional[QAPISchemaObjectType], + features: List[QAPISchemaFeature], + boxed: bool, + event_enum_name: str, + event_emit: str) -> str: + # FIXME: Our declaration of local variables (and of 'errp' in the + # parameter list) can collide with exploded members of the event's + # data type passed in as parameters. If this collision ever hits in + # practice, we can rename our local variables with a leading _ prefix, + # or split the code into a wrapper function that creates a boxed + # 'param' object then calls another to do the real work. + have_args = boxed or (arg_type and not arg_type.is_empty()) + + ret = mcgen(''' + +%(proto)s +{ + QDict *qmp; +''', + proto=build_event_send_proto(name, arg_type, boxed)) + + if have_args: + assert arg_type is not None + ret += mcgen(''' + QObject *obj; + Visitor *v; +''') + if not boxed: + ret += gen_param_var(arg_type) + + for f in features: + if f.is_special(): + ret += mcgen(''' + + if (compat_policy.%(feat)s_output == COMPAT_POLICY_OUTPUT_HIDE) { + return; + } +''', + feat=f.name) + + ret += mcgen(''' + + qmp = qmp_event_build_dict("%(name)s"); + +''', + name=name) + + if have_args: + assert arg_type is not None + ret += mcgen(''' + v = qobject_output_visitor_new_qmp(&obj); +''') + if not arg_type.is_implicit(): + ret += mcgen(''' + visit_type_%(c_name)s(v, "%(name)s", &arg, &error_abort); +''', + name=name, c_name=arg_type.c_name()) + else: + ret += mcgen(''' + + visit_start_struct(v, "%(name)s", NULL, 0, &error_abort); + visit_type_%(c_name)s_members(v, ¶m, &error_abort); + visit_check_struct(v, &error_abort); + visit_end_struct(v, NULL); +''', + name=name, c_name=arg_type.c_name()) + ret += mcgen(''' + + visit_complete(v, &obj); + if (qdict_size(qobject_to(QDict, obj))) { + qdict_put_obj(qmp, "data", obj); + } else { + qobject_unref(obj); + } +''') + + ret += mcgen(''' + %(event_emit)s(%(c_enum)s, qmp); + +''', + event_emit=event_emit, + c_enum=c_enum_const(event_enum_name, name)) + + if have_args: + ret += mcgen(''' + visit_free(v); +''') + ret += mcgen(''' + qobject_unref(qmp); +} +''') + return ret + + +class QAPISchemaGenEventVisitor(QAPISchemaModularCVisitor): + + def __init__(self, prefix: str): + super().__init__( + prefix, 'qapi-events', + ' * Schema-defined QAPI/QMP events', None, __doc__) + self._event_enum_name = c_name(prefix + 'QAPIEvent', protect=False) + self._event_enum_members: List[QAPISchemaEnumMember] = [] + self._event_emit_name = c_name(prefix + 'qapi_event_emit') + + def _begin_user_module(self, name: str) -> None: + events = self._module_basename('qapi-events', name) + types = self._module_basename('qapi-types', name) + visit = self._module_basename('qapi-visit', name) + self._genc.add(mcgen(''' +#include "qemu/osdep.h" +#include "%(prefix)sqapi-emit-events.h" +#include "%(events)s.h" +#include "%(visit)s.h" +#include "qapi/compat-policy.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp-event.h" + +''', + events=events, visit=visit, + prefix=self._prefix)) + self._genh.add(mcgen(''' +#include "qapi/util.h" +#include "%(types)s.h" +''', + types=types)) + + def visit_end(self) -> None: + self._add_module('./emit', ' * QAPI Events emission') + self._genc.preamble_add(mcgen(''' +#include "qemu/osdep.h" +#include "%(prefix)sqapi-emit-events.h" +''', + prefix=self._prefix)) + self._genh.preamble_add(mcgen(''' +#include "qapi/util.h" +''')) + self._genh.add(gen_enum(self._event_enum_name, + self._event_enum_members)) + self._genc.add(gen_enum_lookup(self._event_enum_name, + self._event_enum_members)) + self._genh.add(mcgen(''' + +void %(event_emit)s(%(event_enum)s event, QDict *qdict); +''', + event_emit=self._event_emit_name, + event_enum=self._event_enum_name)) + + def visit_event(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + arg_type: Optional[QAPISchemaObjectType], + boxed: bool) -> None: + with ifcontext(ifcond, self._genh, self._genc): + self._genh.add(gen_event_send_decl(name, arg_type, boxed)) + self._genc.add(gen_event_send(name, arg_type, features, boxed, + self._event_enum_name, + self._event_emit_name)) + # Note: we generate the enum member regardless of @ifcond, to + # keep the enumeration usable in target-independent code. + self._event_enum_members.append(QAPISchemaEnumMember(name, None)) + + +def gen_events(schema: QAPISchema, + output_dir: str, + prefix: str) -> None: + vis = QAPISchemaGenEventVisitor(prefix) + schema.visit(vis) + vis.write(output_dir) diff --git a/scripts/qapi/expr.py b/scripts/qapi/expr.py new file mode 100644 index 000000000..3cb389e87 --- /dev/null +++ b/scripts/qapi/expr.py @@ -0,0 +1,694 @@ +# -*- coding: utf-8 -*- +# +# Copyright IBM, Corp. 2011 +# Copyright (c) 2013-2021 Red Hat Inc. +# +# Authors: +# Anthony Liguori <aliguori@us.ibm.com> +# Markus Armbruster <armbru@redhat.com> +# Eric Blake <eblake@redhat.com> +# Marc-André Lureau <marcandre.lureau@redhat.com> +# John Snow <jsnow@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. + +""" +Normalize and validate (context-free) QAPI schema expression structures. + +`QAPISchemaParser` parses a QAPI schema into abstract syntax trees +consisting of dict, list, str, bool, and int nodes. This module ensures +that these nested structures have the correct type(s) and key(s) where +appropriate for the QAPI context-free grammar. + +The QAPI schema expression language allows for certain syntactic sugar; +this module also handles the normalization process of these nested +structures. + +See `check_exprs` for the main entry point. + +See `schema.QAPISchema` for processing into native Python data +structures and contextual semantic validation. +""" + +import re +from typing import ( + Collection, + Dict, + Iterable, + List, + Optional, + Union, + cast, +) + +from .common import c_name +from .error import QAPISemError +from .parser import QAPIDoc +from .source import QAPISourceInfo + + +# Deserialized JSON objects as returned by the parser. +# The values of this mapping are not necessary to exhaustively type +# here (and also not practical as long as mypy lacks recursive +# types), because the purpose of this module is to interrogate that +# type. +_JSONObject = Dict[str, object] + + +# See check_name_str(), below. +valid_name = re.compile(r'(__[a-z0-9.-]+_)?' + r'(x-)?' + r'([a-z][a-z0-9_-]*)$', re.IGNORECASE) + + +def check_name_is_str(name: object, + info: QAPISourceInfo, + source: str) -> None: + """ + Ensure that ``name`` is a ``str``. + + :raise QAPISemError: When ``name`` fails validation. + """ + if not isinstance(name, str): + raise QAPISemError(info, "%s requires a string name" % source) + + +def check_name_str(name: str, info: QAPISourceInfo, source: str) -> str: + """ + Ensure that ``name`` is a valid QAPI name. + + A valid name consists of ASCII letters, digits, ``-``, and ``_``, + starting with a letter. It may be prefixed by a downstream prefix + of the form __RFQDN_, or the experimental prefix ``x-``. If both + prefixes are present, the __RFDQN_ prefix goes first. + + A valid name cannot start with ``q_``, which is reserved. + + :param name: Name to check. + :param info: QAPI schema source file information. + :param source: Error string describing what ``name`` belongs to. + + :raise QAPISemError: When ``name`` fails validation. + :return: The stem of the valid name, with no prefixes. + """ + # Reserve the entire 'q_' namespace for c_name(), and for 'q_empty' + # and 'q_obj_*' implicit type names. + match = valid_name.match(name) + if not match or c_name(name, False).startswith('q_'): + raise QAPISemError(info, "%s has an invalid name" % source) + return match.group(3) + + +def check_name_upper(name: str, info: QAPISourceInfo, source: str) -> None: + """ + Ensure that ``name`` is a valid event name. + + This means it must be a valid QAPI name as checked by + `check_name_str()`, but where the stem prohibits lowercase + characters and ``-``. + + :param name: Name to check. + :param info: QAPI schema source file information. + :param source: Error string describing what ``name`` belongs to. + + :raise QAPISemError: When ``name`` fails validation. + """ + stem = check_name_str(name, info, source) + if re.search(r'[a-z-]', stem): + raise QAPISemError( + info, "name of %s must not use lowercase or '-'" % source) + + +def check_name_lower(name: str, info: QAPISourceInfo, source: str, + permit_upper: bool = False, + permit_underscore: bool = False) -> None: + """ + Ensure that ``name`` is a valid command or member name. + + This means it must be a valid QAPI name as checked by + `check_name_str()`, but where the stem prohibits uppercase + characters and ``_``. + + :param name: Name to check. + :param info: QAPI schema source file information. + :param source: Error string describing what ``name`` belongs to. + :param permit_upper: Additionally permit uppercase. + :param permit_underscore: Additionally permit ``_``. + + :raise QAPISemError: When ``name`` fails validation. + """ + stem = check_name_str(name, info, source) + if ((not permit_upper and re.search(r'[A-Z]', stem)) + or (not permit_underscore and '_' in stem)): + raise QAPISemError( + info, "name of %s must not use uppercase or '_'" % source) + + +def check_name_camel(name: str, info: QAPISourceInfo, source: str) -> None: + """ + Ensure that ``name`` is a valid user-defined type name. + + This means it must be a valid QAPI name as checked by + `check_name_str()`, but where the stem must be in CamelCase. + + :param name: Name to check. + :param info: QAPI schema source file information. + :param source: Error string describing what ``name`` belongs to. + + :raise QAPISemError: When ``name`` fails validation. + """ + stem = check_name_str(name, info, source) + if not re.match(r'[A-Z][A-Za-z0-9]*[a-z][A-Za-z0-9]*$', stem): + raise QAPISemError(info, "name of %s must use CamelCase" % source) + + +def check_defn_name_str(name: str, info: QAPISourceInfo, meta: str) -> None: + """ + Ensure that ``name`` is a valid definition name. + + Based on the value of ``meta``, this means that: + - 'event' names adhere to `check_name_upper()`. + - 'command' names adhere to `check_name_lower()`. + - Else, meta is a type, and must pass `check_name_camel()`. + These names must not end with ``List``. + + :param name: Name to check. + :param info: QAPI schema source file information. + :param meta: Meta-type name of the QAPI expression. + + :raise QAPISemError: When ``name`` fails validation. + """ + if meta == 'event': + check_name_upper(name, info, meta) + elif meta == 'command': + check_name_lower( + name, info, meta, + permit_underscore=name in info.pragma.command_name_exceptions) + else: + check_name_camel(name, info, meta) + if name.endswith('List'): + raise QAPISemError( + info, "%s name should not end in 'List'" % meta) + + +def check_keys(value: _JSONObject, + info: QAPISourceInfo, + source: str, + required: Collection[str], + optional: Collection[str]) -> None: + """ + Ensure that a dict has a specific set of keys. + + :param value: The dict to check. + :param info: QAPI schema source file information. + :param source: Error string describing this ``value``. + :param required: Keys that *must* be present. + :param optional: Keys that *may* be present. + + :raise QAPISemError: When unknown keys are present. + """ + + def pprint(elems: Iterable[str]) -> str: + return ', '.join("'" + e + "'" for e in sorted(elems)) + + missing = set(required) - set(value) + if missing: + raise QAPISemError( + info, + "%s misses key%s %s" + % (source, 's' if len(missing) > 1 else '', + pprint(missing))) + allowed = set(required) | set(optional) + unknown = set(value) - allowed + if unknown: + raise QAPISemError( + info, + "%s has unknown key%s %s\nValid keys are %s." + % (source, 's' if len(unknown) > 1 else '', + pprint(unknown), pprint(allowed))) + + +def check_flags(expr: _JSONObject, info: QAPISourceInfo) -> None: + """ + Ensure flag members (if present) have valid values. + + :param expr: The expression to validate. + :param info: QAPI schema source file information. + + :raise QAPISemError: + When certain flags have an invalid value, or when + incompatible flags are present. + """ + for key in ('gen', 'success-response'): + if key in expr and expr[key] is not False: + raise QAPISemError( + info, "flag '%s' may only use false value" % key) + for key in ('boxed', 'allow-oob', 'allow-preconfig', 'coroutine'): + if key in expr and expr[key] is not True: + raise QAPISemError( + info, "flag '%s' may only use true value" % key) + if 'allow-oob' in expr and 'coroutine' in expr: + # This is not necessarily a fundamental incompatibility, but + # we don't have a use case and the desired semantics isn't + # obvious. The simplest solution is to forbid it until we get + # a use case for it. + raise QAPISemError(info, "flags 'allow-oob' and 'coroutine' " + "are incompatible") + + +def check_if(expr: _JSONObject, info: QAPISourceInfo, source: str) -> None: + """ + Validate the ``if`` member of an object. + + The ``if`` member may be either a ``str`` or a dict. + + :param expr: The expression containing the ``if`` member to validate. + :param info: QAPI schema source file information. + :param source: Error string describing ``expr``. + + :raise QAPISemError: + When the "if" member fails validation, or when there are no + non-empty conditions. + :return: None + """ + + def _check_if(cond: Union[str, object]) -> None: + if isinstance(cond, str): + if not re.fullmatch(r'[A-Z][A-Z0-9_]*', cond): + raise QAPISemError( + info, + "'if' condition '%s' of %s is not a valid identifier" + % (cond, source)) + return + + if not isinstance(cond, dict): + raise QAPISemError( + info, + "'if' condition of %s must be a string or an object" % source) + check_keys(cond, info, "'if' condition of %s" % source, [], + ["all", "any", "not"]) + if len(cond) != 1: + raise QAPISemError( + info, + "'if' condition of %s has conflicting keys" % source) + + if 'not' in cond: + _check_if(cond['not']) + elif 'all' in cond: + _check_infix('all', cond['all']) + else: + _check_infix('any', cond['any']) + + def _check_infix(operator: str, operands: object) -> None: + if not isinstance(operands, list): + raise QAPISemError( + info, + "'%s' condition of %s must be an array" + % (operator, source)) + if not operands: + raise QAPISemError( + info, "'if' condition [] of %s is useless" % source) + for operand in operands: + _check_if(operand) + + ifcond = expr.get('if') + if ifcond is None: + return + + _check_if(ifcond) + + +def normalize_members(members: object) -> None: + """ + Normalize a "members" value. + + If ``members`` is a dict, for every value in that dict, if that + value is not itself already a dict, normalize it to + ``{'type': value}``. + + :forms: + :sugared: ``Dict[str, Union[str, TypeRef]]`` + :canonical: ``Dict[str, TypeRef]`` + + :param members: The members value to normalize. + + :return: None, ``members`` is normalized in-place as needed. + """ + if isinstance(members, dict): + for key, arg in members.items(): + if isinstance(arg, dict): + continue + members[key] = {'type': arg} + + +def check_type(value: Optional[object], + info: QAPISourceInfo, + source: str, + allow_array: bool = False, + allow_dict: Union[bool, str] = False) -> None: + """ + Normalize and validate the QAPI type of ``value``. + + Python types of ``str`` or ``None`` are always allowed. + + :param value: The value to check. + :param info: QAPI schema source file information. + :param source: Error string describing this ``value``. + :param allow_array: + Allow a ``List[str]`` of length 1, which indicates an array of + the type named by the list element. + :param allow_dict: + Allow a dict. Its members can be struct type members or union + branches. When the value of ``allow_dict`` is in pragma + ``member-name-exceptions``, the dict's keys may violate the + member naming rules. The dict members are normalized in place. + + :raise QAPISemError: When ``value`` fails validation. + :return: None, ``value`` is normalized in-place as needed. + """ + if value is None: + return + + # Type name + if isinstance(value, str): + return + + # Array type + if isinstance(value, list): + if not allow_array: + raise QAPISemError(info, "%s cannot be an array" % source) + if len(value) != 1 or not isinstance(value[0], str): + raise QAPISemError(info, + "%s: array type must contain single type name" % + source) + return + + # Anonymous type + + if not allow_dict: + raise QAPISemError(info, "%s should be a type name" % source) + + if not isinstance(value, dict): + raise QAPISemError(info, + "%s should be an object or type name" % source) + + permissive = False + if isinstance(allow_dict, str): + permissive = allow_dict in info.pragma.member_name_exceptions + + # value is a dictionary, check that each member is okay + for (key, arg) in value.items(): + key_source = "%s member '%s'" % (source, key) + if key.startswith('*'): + key = key[1:] + check_name_lower(key, info, key_source, + permit_upper=permissive, + permit_underscore=permissive) + if c_name(key, False) == 'u' or c_name(key, False).startswith('has_'): + raise QAPISemError(info, "%s uses reserved name" % key_source) + check_keys(arg, info, key_source, ['type'], ['if', 'features']) + check_if(arg, info, key_source) + check_features(arg.get('features'), info) + check_type(arg['type'], info, key_source, allow_array=True) + + +def check_features(features: Optional[object], + info: QAPISourceInfo) -> None: + """ + Normalize and validate the ``features`` member. + + ``features`` may be a ``list`` of either ``str`` or ``dict``. + Any ``str`` element will be normalized to ``{'name': element}``. + + :forms: + :sugared: ``List[Union[str, Feature]]`` + :canonical: ``List[Feature]`` + + :param features: The features member value to validate. + :param info: QAPI schema source file information. + + :raise QAPISemError: When ``features`` fails validation. + :return: None, ``features`` is normalized in-place as needed. + """ + if features is None: + return + if not isinstance(features, list): + raise QAPISemError(info, "'features' must be an array") + features[:] = [f if isinstance(f, dict) else {'name': f} + for f in features] + for feat in features: + source = "'features' member" + assert isinstance(feat, dict) + check_keys(feat, info, source, ['name'], ['if']) + check_name_is_str(feat['name'], info, source) + source = "%s '%s'" % (source, feat['name']) + check_name_str(feat['name'], info, source) + check_if(feat, info, source) + + +def check_enum(expr: _JSONObject, info: QAPISourceInfo) -> None: + """ + Normalize and validate this expression as an ``enum`` definition. + + :param expr: The expression to validate. + :param info: QAPI schema source file information. + + :raise QAPISemError: When ``expr`` is not a valid ``enum``. + :return: None, ``expr`` is normalized in-place as needed. + """ + name = expr['enum'] + members = expr['data'] + prefix = expr.get('prefix') + + if not isinstance(members, list): + raise QAPISemError(info, "'data' must be an array") + if prefix is not None and not isinstance(prefix, str): + raise QAPISemError(info, "'prefix' must be a string") + + permissive = name in info.pragma.member_name_exceptions + + members[:] = [m if isinstance(m, dict) else {'name': m} + for m in members] + for member in members: + source = "'data' member" + check_keys(member, info, source, ['name'], ['if', 'features']) + member_name = member['name'] + check_name_is_str(member_name, info, source) + source = "%s '%s'" % (source, member_name) + # Enum members may start with a digit + if member_name[0].isdigit(): + member_name = 'd' + member_name # Hack: hide the digit + check_name_lower(member_name, info, source, + permit_upper=permissive, + permit_underscore=permissive) + check_if(member, info, source) + check_features(member.get('features'), info) + + +def check_struct(expr: _JSONObject, info: QAPISourceInfo) -> None: + """ + Normalize and validate this expression as a ``struct`` definition. + + :param expr: The expression to validate. + :param info: QAPI schema source file information. + + :raise QAPISemError: When ``expr`` is not a valid ``struct``. + :return: None, ``expr`` is normalized in-place as needed. + """ + name = cast(str, expr['struct']) # Checked in check_exprs + members = expr['data'] + + check_type(members, info, "'data'", allow_dict=name) + check_type(expr.get('base'), info, "'base'") + + +def check_union(expr: _JSONObject, info: QAPISourceInfo) -> None: + """ + Normalize and validate this expression as a ``union`` definition. + + :param expr: The expression to validate. + :param info: QAPI schema source file information. + + :raise QAPISemError: when ``expr`` is not a valid ``union``. + :return: None, ``expr`` is normalized in-place as needed. + """ + name = cast(str, expr['union']) # Checked in check_exprs + base = expr['base'] + discriminator = expr['discriminator'] + members = expr['data'] + + check_type(base, info, "'base'", allow_dict=name) + check_name_is_str(discriminator, info, "'discriminator'") + + if not isinstance(members, dict): + raise QAPISemError(info, "'data' must be an object") + + for (key, value) in members.items(): + source = "'data' member '%s'" % key + check_keys(value, info, source, ['type'], ['if']) + check_if(value, info, source) + check_type(value['type'], info, source, allow_array=not base) + + +def check_alternate(expr: _JSONObject, info: QAPISourceInfo) -> None: + """ + Normalize and validate this expression as an ``alternate`` definition. + + :param expr: The expression to validate. + :param info: QAPI schema source file information. + + :raise QAPISemError: When ``expr`` is not a valid ``alternate``. + :return: None, ``expr`` is normalized in-place as needed. + """ + members = expr['data'] + + if not members: + raise QAPISemError(info, "'data' must not be empty") + + if not isinstance(members, dict): + raise QAPISemError(info, "'data' must be an object") + + for (key, value) in members.items(): + source = "'data' member '%s'" % key + check_name_lower(key, info, source) + check_keys(value, info, source, ['type'], ['if']) + check_if(value, info, source) + check_type(value['type'], info, source) + + +def check_command(expr: _JSONObject, info: QAPISourceInfo) -> None: + """ + Normalize and validate this expression as a ``command`` definition. + + :param expr: The expression to validate. + :param info: QAPI schema source file information. + + :raise QAPISemError: When ``expr`` is not a valid ``command``. + :return: None, ``expr`` is normalized in-place as needed. + """ + args = expr.get('data') + rets = expr.get('returns') + boxed = expr.get('boxed', False) + + if boxed and args is None: + raise QAPISemError(info, "'boxed': true requires 'data'") + check_type(args, info, "'data'", allow_dict=not boxed) + check_type(rets, info, "'returns'", allow_array=True) + + +def check_event(expr: _JSONObject, info: QAPISourceInfo) -> None: + """ + Normalize and validate this expression as an ``event`` definition. + + :param expr: The expression to validate. + :param info: QAPI schema source file information. + + :raise QAPISemError: When ``expr`` is not a valid ``event``. + :return: None, ``expr`` is normalized in-place as needed. + """ + args = expr.get('data') + boxed = expr.get('boxed', False) + + if boxed and args is None: + raise QAPISemError(info, "'boxed': true requires 'data'") + check_type(args, info, "'data'", allow_dict=not boxed) + + +def check_exprs(exprs: List[_JSONObject]) -> List[_JSONObject]: + """ + Validate and normalize a list of parsed QAPI schema expressions. + + This function accepts a list of expressions and metadata as returned + by the parser. It destructively normalizes the expressions in-place. + + :param exprs: The list of expressions to normalize and validate. + + :raise QAPISemError: When any expression fails validation. + :return: The same list of expressions (now modified). + """ + for expr_elem in exprs: + # Expression + assert isinstance(expr_elem['expr'], dict) + for key in expr_elem['expr'].keys(): + assert isinstance(key, str) + expr: _JSONObject = expr_elem['expr'] + + # QAPISourceInfo + assert isinstance(expr_elem['info'], QAPISourceInfo) + info: QAPISourceInfo = expr_elem['info'] + + # Optional[QAPIDoc] + tmp = expr_elem.get('doc') + assert tmp is None or isinstance(tmp, QAPIDoc) + doc: Optional[QAPIDoc] = tmp + + if 'include' in expr: + continue + + metas = expr.keys() & {'enum', 'struct', 'union', 'alternate', + 'command', 'event'} + if len(metas) != 1: + raise QAPISemError( + info, + "expression must have exactly one key" + " 'enum', 'struct', 'union', 'alternate'," + " 'command', 'event'") + meta = metas.pop() + + check_name_is_str(expr[meta], info, "'%s'" % meta) + name = cast(str, expr[meta]) + info.set_defn(meta, name) + check_defn_name_str(name, info, meta) + + if doc: + if doc.symbol != name: + raise QAPISemError( + info, "documentation comment is for '%s'" % doc.symbol) + doc.check_expr(expr) + elif info.pragma.doc_required: + raise QAPISemError(info, + "documentation comment required") + + if meta == 'enum': + check_keys(expr, info, meta, + ['enum', 'data'], ['if', 'features', 'prefix']) + check_enum(expr, info) + elif meta == 'union': + check_keys(expr, info, meta, + ['union', 'base', 'discriminator', 'data'], + ['if', 'features']) + normalize_members(expr.get('base')) + normalize_members(expr['data']) + check_union(expr, info) + elif meta == 'alternate': + check_keys(expr, info, meta, + ['alternate', 'data'], ['if', 'features']) + normalize_members(expr['data']) + check_alternate(expr, info) + elif meta == 'struct': + check_keys(expr, info, meta, + ['struct', 'data'], ['base', 'if', 'features']) + normalize_members(expr['data']) + check_struct(expr, info) + elif meta == 'command': + check_keys(expr, info, meta, + ['command'], + ['data', 'returns', 'boxed', 'if', 'features', + 'gen', 'success-response', 'allow-oob', + 'allow-preconfig', 'coroutine']) + normalize_members(expr.get('data')) + check_command(expr, info) + elif meta == 'event': + check_keys(expr, info, meta, + ['event'], ['data', 'boxed', 'if', 'features']) + normalize_members(expr.get('data')) + check_event(expr, info) + else: + assert False, 'unexpected meta type' + + check_if(expr, info, meta) + check_features(expr.get('features'), info) + check_flags(expr, info) + + return exprs diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py new file mode 100644 index 000000000..995a97d2b --- /dev/null +++ b/scripts/qapi/gen.py @@ -0,0 +1,339 @@ +# -*- coding: utf-8 -*- +# +# QAPI code generation +# +# Copyright (c) 2015-2019 Red Hat Inc. +# +# Authors: +# Markus Armbruster <armbru@redhat.com> +# Marc-André Lureau <marcandre.lureau@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. + +from contextlib import contextmanager +import os +import re +from typing import ( + Dict, + Iterator, + Optional, + Sequence, + Tuple, +) + +from .common import ( + c_fname, + c_name, + guardend, + guardstart, + mcgen, +) +from .schema import ( + QAPISchemaFeature, + QAPISchemaIfCond, + QAPISchemaModule, + QAPISchemaObjectType, + QAPISchemaVisitor, +) +from .source import QAPISourceInfo + + +def gen_special_features(features: Sequence[QAPISchemaFeature]) -> str: + special_features = [f"1u << QAPI_{feat.name.upper()}" + for feat in features if feat.is_special()] + return ' | '.join(special_features) or '0' + + +class QAPIGen: + def __init__(self, fname: str): + self.fname = fname + self._preamble = '' + self._body = '' + + def preamble_add(self, text: str) -> None: + self._preamble += text + + def add(self, text: str) -> None: + self._body += text + + def get_content(self) -> str: + return self._top() + self._preamble + self._body + self._bottom() + + def _top(self) -> str: + # pylint: disable=no-self-use + return '' + + def _bottom(self) -> str: + # pylint: disable=no-self-use + return '' + + def write(self, output_dir: str) -> None: + # Include paths starting with ../ are used to reuse modules of the main + # schema in specialised schemas. Don't overwrite the files that are + # already generated for the main schema. + if self.fname.startswith('../'): + return + pathname = os.path.join(output_dir, self.fname) + odir = os.path.dirname(pathname) + + if odir: + os.makedirs(odir, exist_ok=True) + + # use os.open for O_CREAT to create and read a non-existant file + fd = os.open(pathname, os.O_RDWR | os.O_CREAT, 0o666) + with os.fdopen(fd, 'r+', encoding='utf-8') as fp: + text = self.get_content() + oldtext = fp.read(len(text) + 1) + if text != oldtext: + fp.seek(0) + fp.truncate(0) + fp.write(text) + + +def _wrap_ifcond(ifcond: QAPISchemaIfCond, before: str, after: str) -> str: + if before == after: + return after # suppress empty #if ... #endif + + assert after.startswith(before) + out = before + added = after[len(before):] + if added[0] == '\n': + out += '\n' + added = added[1:] + out += ifcond.gen_if() + out += added + out += ifcond.gen_endif() + return out + + +def build_params(arg_type: Optional[QAPISchemaObjectType], + boxed: bool, + extra: Optional[str] = None) -> str: + ret = '' + sep = '' + if boxed: + assert arg_type + ret += '%s arg' % arg_type.c_param_type() + sep = ', ' + elif arg_type: + assert not arg_type.variants + for memb in arg_type.members: + ret += sep + sep = ', ' + if memb.optional: + ret += 'bool has_%s, ' % c_name(memb.name) + ret += '%s %s' % (memb.type.c_param_type(), + c_name(memb.name)) + if extra: + ret += sep + extra + return ret if ret else 'void' + + +class QAPIGenCCode(QAPIGen): + def __init__(self, fname: str): + super().__init__(fname) + self._start_if: Optional[Tuple[QAPISchemaIfCond, str, str]] = None + + def start_if(self, ifcond: QAPISchemaIfCond) -> None: + assert self._start_if is None + self._start_if = (ifcond, self._body, self._preamble) + + def end_if(self) -> None: + assert self._start_if is not None + self._body = _wrap_ifcond(self._start_if[0], + self._start_if[1], self._body) + self._preamble = _wrap_ifcond(self._start_if[0], + self._start_if[2], self._preamble) + self._start_if = None + + def get_content(self) -> str: + assert self._start_if is None + return super().get_content() + + +class QAPIGenC(QAPIGenCCode): + def __init__(self, fname: str, blurb: str, pydoc: str): + super().__init__(fname) + self._blurb = blurb + self._copyright = '\n * '.join(re.findall(r'^Copyright .*', pydoc, + re.MULTILINE)) + + def _top(self) -> str: + return mcgen(''' +/* AUTOMATICALLY GENERATED, DO NOT MODIFY */ + +/* +%(blurb)s + * + * %(copyright)s + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +''', + blurb=self._blurb, copyright=self._copyright) + + def _bottom(self) -> str: + return mcgen(''' + +/* Dummy declaration to prevent empty .o file */ +char qapi_dummy_%(name)s; +''', + name=c_fname(self.fname)) + + +class QAPIGenH(QAPIGenC): + def _top(self) -> str: + return super()._top() + guardstart(self.fname) + + def _bottom(self) -> str: + return guardend(self.fname) + + +@contextmanager +def ifcontext(ifcond: QAPISchemaIfCond, *args: QAPIGenCCode) -> Iterator[None]: + """ + A with-statement context manager that wraps with `start_if()` / `end_if()`. + + :param ifcond: A sequence of conditionals, passed to `start_if()`. + :param args: any number of `QAPIGenCCode`. + + Example:: + + with ifcontext(ifcond, self._genh, self._genc): + modify self._genh and self._genc ... + + Is equivalent to calling:: + + self._genh.start_if(ifcond) + self._genc.start_if(ifcond) + modify self._genh and self._genc ... + self._genh.end_if() + self._genc.end_if() + """ + for arg in args: + arg.start_if(ifcond) + yield + for arg in args: + arg.end_if() + + +class QAPISchemaMonolithicCVisitor(QAPISchemaVisitor): + def __init__(self, + prefix: str, + what: str, + blurb: str, + pydoc: str): + self._prefix = prefix + self._what = what + self._genc = QAPIGenC(self._prefix + self._what + '.c', + blurb, pydoc) + self._genh = QAPIGenH(self._prefix + self._what + '.h', + blurb, pydoc) + + def write(self, output_dir: str) -> None: + self._genc.write(output_dir) + self._genh.write(output_dir) + + +class QAPISchemaModularCVisitor(QAPISchemaVisitor): + def __init__(self, + prefix: str, + what: str, + user_blurb: str, + builtin_blurb: Optional[str], + pydoc: str): + self._prefix = prefix + self._what = what + self._user_blurb = user_blurb + self._builtin_blurb = builtin_blurb + self._pydoc = pydoc + self._current_module: Optional[str] = None + self._module: Dict[str, Tuple[QAPIGenC, QAPIGenH]] = {} + self._main_module: Optional[str] = None + + @property + def _genc(self) -> QAPIGenC: + assert self._current_module is not None + return self._module[self._current_module][0] + + @property + def _genh(self) -> QAPIGenH: + assert self._current_module is not None + return self._module[self._current_module][1] + + @staticmethod + def _module_dirname(name: str) -> str: + if QAPISchemaModule.is_user_module(name): + return os.path.dirname(name) + return '' + + def _module_basename(self, what: str, name: str) -> str: + ret = '' if QAPISchemaModule.is_builtin_module(name) else self._prefix + if QAPISchemaModule.is_user_module(name): + basename = os.path.basename(name) + ret += what + if name != self._main_module: + ret += '-' + os.path.splitext(basename)[0] + else: + assert QAPISchemaModule.is_system_module(name) + ret += re.sub(r'-', '-' + name[2:] + '-', what) + return ret + + def _module_filename(self, what: str, name: str) -> str: + return os.path.join(self._module_dirname(name), + self._module_basename(what, name)) + + def _add_module(self, name: str, blurb: str) -> None: + if QAPISchemaModule.is_user_module(name): + if self._main_module is None: + self._main_module = name + basename = self._module_filename(self._what, name) + genc = QAPIGenC(basename + '.c', blurb, self._pydoc) + genh = QAPIGenH(basename + '.h', blurb, self._pydoc) + self._module[name] = (genc, genh) + self._current_module = name + + @contextmanager + def _temp_module(self, name: str) -> Iterator[None]: + old_module = self._current_module + self._current_module = name + yield + self._current_module = old_module + + def write(self, output_dir: str, opt_builtins: bool = False) -> None: + for name, (genc, genh) in self._module.items(): + if QAPISchemaModule.is_builtin_module(name) and not opt_builtins: + continue + genc.write(output_dir) + genh.write(output_dir) + + def _begin_builtin_module(self) -> None: + pass + + def _begin_user_module(self, name: str) -> None: + pass + + def visit_module(self, name: str) -> None: + if QAPISchemaModule.is_builtin_module(name): + if self._builtin_blurb: + self._add_module(name, self._builtin_blurb) + self._begin_builtin_module() + else: + # The built-in module has not been created. No code may + # be generated. + self._current_module = None + else: + assert QAPISchemaModule.is_user_module(name) + self._add_module(name, self._user_blurb) + self._begin_user_module(name) + + def visit_include(self, name: str, info: Optional[QAPISourceInfo]) -> None: + relname = os.path.relpath(self._module_filename(self._what, name), + os.path.dirname(self._genh.fname)) + self._genh.preamble_add(mcgen(''' +#include "%(relname)s.h" +''', + relname=relname)) diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py new file mode 100644 index 000000000..67c7d89aa --- /dev/null +++ b/scripts/qapi/introspect.py @@ -0,0 +1,390 @@ +""" +QAPI introspection generator + +Copyright (C) 2015-2021 Red Hat, Inc. + +Authors: + Markus Armbruster <armbru@redhat.com> + John Snow <jsnow@redhat.com> + +This work is licensed under the terms of the GNU GPL, version 2. +See the COPYING file in the top-level directory. +""" + +from typing import ( + Any, + Dict, + Generic, + List, + Optional, + Sequence, + TypeVar, + Union, +) + +from .common import c_name, mcgen +from .gen import QAPISchemaMonolithicCVisitor +from .schema import ( + QAPISchema, + QAPISchemaArrayType, + QAPISchemaBuiltinType, + QAPISchemaEntity, + QAPISchemaEnumMember, + QAPISchemaFeature, + QAPISchemaIfCond, + QAPISchemaObjectType, + QAPISchemaObjectTypeMember, + QAPISchemaType, + QAPISchemaVariant, + QAPISchemaVariants, +) +from .source import QAPISourceInfo + + +# This module constructs a tree data structure that is used to +# generate the introspection information for QEMU. It is shaped +# like a JSON value. +# +# A complexity over JSON is that our values may or may not be annotated. +# +# Un-annotated values may be: +# Scalar: str, bool, None. +# Non-scalar: List, Dict +# _value = Union[str, bool, None, Dict[str, JSONValue], List[JSONValue]] +# +# With optional annotations, the type of all values is: +# JSONValue = Union[_Value, Annotated[_Value]] +# +# Sadly, mypy does not support recursive types; so the _Stub alias is used to +# mark the imprecision in the type model where we'd otherwise use JSONValue. +_Stub = Any +_Scalar = Union[str, bool, None] +_NonScalar = Union[Dict[str, _Stub], List[_Stub]] +_Value = Union[_Scalar, _NonScalar] +JSONValue = Union[_Value, 'Annotated[_Value]'] + +# These types are based on structures defined in QEMU's schema, so we +# lack precise types for them here. Python 3.6 does not offer +# TypedDict constructs, so they are broadly typed here as simple +# Python Dicts. +SchemaInfo = Dict[str, object] +SchemaInfoEnumMember = Dict[str, object] +SchemaInfoObject = Dict[str, object] +SchemaInfoObjectVariant = Dict[str, object] +SchemaInfoObjectMember = Dict[str, object] +SchemaInfoCommand = Dict[str, object] + + +_ValueT = TypeVar('_ValueT', bound=_Value) + + +class Annotated(Generic[_ValueT]): + """ + Annotated generally contains a SchemaInfo-like type (as a dict), + But it also used to wrap comments/ifconds around scalar leaf values, + for the benefit of features and enums. + """ + # TODO: Remove after Python 3.7 adds @dataclass: + # pylint: disable=too-few-public-methods + def __init__(self, value: _ValueT, ifcond: QAPISchemaIfCond, + comment: Optional[str] = None): + self.value = value + self.comment: Optional[str] = comment + self.ifcond = ifcond + + +def _tree_to_qlit(obj: JSONValue, + level: int = 0, + dict_value: bool = False) -> str: + """ + Convert the type tree into a QLIT C string, recursively. + + :param obj: The value to convert. + This value may not be Annotated when dict_value is True. + :param level: The indentation level for this particular value. + :param dict_value: True when the value being processed belongs to a + dict key; which suppresses the output indent. + """ + + def indent(level: int) -> str: + return level * 4 * ' ' + + if isinstance(obj, Annotated): + # NB: _tree_to_qlit is called recursively on the values of a + # key:value pair; those values can't be decorated with + # comments or conditionals. + msg = "dict values cannot have attached comments or if-conditionals." + assert not dict_value, msg + + ret = '' + if obj.comment: + ret += indent(level) + f"/* {obj.comment} */\n" + if obj.ifcond.is_present(): + ret += obj.ifcond.gen_if() + ret += _tree_to_qlit(obj.value, level) + if obj.ifcond.is_present(): + ret += '\n' + obj.ifcond.gen_endif() + return ret + + ret = '' + if not dict_value: + ret += indent(level) + + # Scalars: + if obj is None: + ret += 'QLIT_QNULL' + elif isinstance(obj, str): + ret += f"QLIT_QSTR({to_c_string(obj)})" + elif isinstance(obj, bool): + ret += f"QLIT_QBOOL({str(obj).lower()})" + + # Non-scalars: + elif isinstance(obj, list): + ret += 'QLIT_QLIST(((QLitObject[]) {\n' + for value in obj: + ret += _tree_to_qlit(value, level + 1).strip('\n') + '\n' + ret += indent(level + 1) + '{}\n' + ret += indent(level) + '}))' + elif isinstance(obj, dict): + ret += 'QLIT_QDICT(((QLitDictEntry[]) {\n' + for key, value in sorted(obj.items()): + ret += indent(level + 1) + "{{ {:s}, {:s} }},\n".format( + to_c_string(key), + _tree_to_qlit(value, level + 1, dict_value=True) + ) + ret += indent(level + 1) + '{}\n' + ret += indent(level) + '}))' + else: + raise NotImplementedError( + f"type '{type(obj).__name__}' not implemented" + ) + + if level > 0: + ret += ',' + return ret + + +def to_c_string(string: str) -> str: + return '"' + string.replace('\\', r'\\').replace('"', r'\"') + '"' + + +class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor): + + def __init__(self, prefix: str, unmask: bool): + super().__init__( + prefix, 'qapi-introspect', + ' * QAPI/QMP schema introspection', __doc__) + self._unmask = unmask + self._schema: Optional[QAPISchema] = None + self._trees: List[Annotated[SchemaInfo]] = [] + self._used_types: List[QAPISchemaType] = [] + self._name_map: Dict[str, str] = {} + self._genc.add(mcgen(''' +#include "qemu/osdep.h" +#include "%(prefix)sqapi-introspect.h" + +''', + prefix=prefix)) + + def visit_begin(self, schema: QAPISchema) -> None: + self._schema = schema + + def visit_end(self) -> None: + # visit the types that are actually used + for typ in self._used_types: + typ.visit(self) + # generate C + name = c_name(self._prefix, protect=False) + 'qmp_schema_qlit' + self._genh.add(mcgen(''' +#include "qapi/qmp/qlit.h" + +extern const QLitObject %(c_name)s; +''', + c_name=c_name(name))) + self._genc.add(mcgen(''' +const QLitObject %(c_name)s = %(c_string)s; +''', + c_name=c_name(name), + c_string=_tree_to_qlit(self._trees))) + self._schema = None + self._trees = [] + self._used_types = [] + self._name_map = {} + + def visit_needed(self, entity: QAPISchemaEntity) -> bool: + # Ignore types on first pass; visit_end() will pick up used types + return not isinstance(entity, QAPISchemaType) + + def _name(self, name: str) -> str: + if self._unmask: + return name + if name not in self._name_map: + self._name_map[name] = '%d' % len(self._name_map) + return self._name_map[name] + + def _use_type(self, typ: QAPISchemaType) -> str: + assert self._schema is not None + + # Map the various integer types to plain int + if typ.json_type() == 'int': + typ = self._schema.lookup_type('int') + elif (isinstance(typ, QAPISchemaArrayType) and + typ.element_type.json_type() == 'int'): + typ = self._schema.lookup_type('intList') + # Add type to work queue if new + if typ not in self._used_types: + self._used_types.append(typ) + # Clients should examine commands and events, not types. Hide + # type names as integers to reduce the temptation. Also, it + # saves a few characters on the wire. + if isinstance(typ, QAPISchemaBuiltinType): + return typ.name + if isinstance(typ, QAPISchemaArrayType): + return '[' + self._use_type(typ.element_type) + ']' + return self._name(typ.name) + + @staticmethod + def _gen_features(features: Sequence[QAPISchemaFeature] + ) -> List[Annotated[str]]: + return [Annotated(f.name, f.ifcond) for f in features] + + def _gen_tree(self, name: str, mtype: str, obj: Dict[str, object], + ifcond: QAPISchemaIfCond = QAPISchemaIfCond(), + features: Sequence[QAPISchemaFeature] = ()) -> None: + """ + Build and append a SchemaInfo object to self._trees. + + :param name: The SchemaInfo's name. + :param mtype: The SchemaInfo's meta-type. + :param obj: Additional SchemaInfo members, as appropriate for + the meta-type. + :param ifcond: Conditionals to apply to the SchemaInfo. + :param features: The SchemaInfo's features. + Will be omitted from the output if empty. + """ + comment: Optional[str] = None + if mtype not in ('command', 'event', 'builtin', 'array'): + if not self._unmask: + # Output a comment to make it easy to map masked names + # back to the source when reading the generated output. + comment = f'"{self._name(name)}" = {name}' + name = self._name(name) + obj['name'] = name + obj['meta-type'] = mtype + if features: + obj['features'] = self._gen_features(features) + self._trees.append(Annotated(obj, ifcond, comment)) + + def _gen_enum_member(self, member: QAPISchemaEnumMember + ) -> Annotated[SchemaInfoEnumMember]: + obj: SchemaInfoEnumMember = { + 'name': member.name, + } + if member.features: + obj['features'] = self._gen_features(member.features) + return Annotated(obj, member.ifcond) + + def _gen_object_member(self, member: QAPISchemaObjectTypeMember + ) -> Annotated[SchemaInfoObjectMember]: + obj: SchemaInfoObjectMember = { + 'name': member.name, + 'type': self._use_type(member.type) + } + if member.optional: + obj['default'] = None + if member.features: + obj['features'] = self._gen_features(member.features) + return Annotated(obj, member.ifcond) + + def _gen_variant(self, variant: QAPISchemaVariant + ) -> Annotated[SchemaInfoObjectVariant]: + obj: SchemaInfoObjectVariant = { + 'case': variant.name, + 'type': self._use_type(variant.type) + } + return Annotated(obj, variant.ifcond) + + def visit_builtin_type(self, name: str, info: Optional[QAPISourceInfo], + json_type: str) -> None: + self._gen_tree(name, 'builtin', {'json-type': json_type}) + + def visit_enum_type(self, name: str, info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + members: List[QAPISchemaEnumMember], + prefix: Optional[str]) -> None: + self._gen_tree( + name, 'enum', + {'members': [self._gen_enum_member(m) for m in members], + 'values': [Annotated(m.name, m.ifcond) for m in members]}, + ifcond, features + ) + + def visit_array_type(self, name: str, info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + element_type: QAPISchemaType) -> None: + element = self._use_type(element_type) + self._gen_tree('[' + element + ']', 'array', {'element-type': element}, + ifcond) + + def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + members: List[QAPISchemaObjectTypeMember], + variants: Optional[QAPISchemaVariants]) -> None: + obj: SchemaInfoObject = { + 'members': [self._gen_object_member(m) for m in members] + } + if variants: + obj['tag'] = variants.tag_member.name + obj['variants'] = [self._gen_variant(v) for v in variants.variants] + self._gen_tree(name, 'object', obj, ifcond, features) + + def visit_alternate_type(self, name: str, info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + variants: QAPISchemaVariants) -> None: + self._gen_tree( + name, 'alternate', + {'members': [Annotated({'type': self._use_type(m.type)}, + m.ifcond) + for m in variants.variants]}, + ifcond, features + ) + + def visit_command(self, name: str, info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + arg_type: Optional[QAPISchemaObjectType], + ret_type: Optional[QAPISchemaType], gen: bool, + success_response: bool, boxed: bool, allow_oob: bool, + allow_preconfig: bool, coroutine: bool) -> None: + assert self._schema is not None + + arg_type = arg_type or self._schema.the_empty_object_type + ret_type = ret_type or self._schema.the_empty_object_type + obj: SchemaInfoCommand = { + 'arg-type': self._use_type(arg_type), + 'ret-type': self._use_type(ret_type) + } + if allow_oob: + obj['allow-oob'] = allow_oob + self._gen_tree(name, 'command', obj, ifcond, features) + + def visit_event(self, name: str, info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + arg_type: Optional[QAPISchemaObjectType], + boxed: bool) -> None: + assert self._schema is not None + + arg_type = arg_type or self._schema.the_empty_object_type + self._gen_tree(name, 'event', {'arg-type': self._use_type(arg_type)}, + ifcond, features) + + +def gen_introspect(schema: QAPISchema, output_dir: str, prefix: str, + opt_unmask: bool) -> None: + vis = QAPISchemaGenIntrospectVisitor(prefix, opt_unmask) + schema.visit(vis) + vis.write(output_dir) diff --git a/scripts/qapi/main.py b/scripts/qapi/main.py new file mode 100644 index 000000000..f2ea6e0ce --- /dev/null +++ b/scripts/qapi/main.py @@ -0,0 +1,95 @@ +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. + +""" +QAPI Generator + +This is the main entry point for generating C code from the QAPI schema. +""" + +import argparse +import sys +from typing import Optional + +from .commands import gen_commands +from .common import must_match +from .error import QAPIError +from .events import gen_events +from .introspect import gen_introspect +from .schema import QAPISchema +from .types import gen_types +from .visit import gen_visit + + +def invalid_prefix_char(prefix: str) -> Optional[str]: + match = must_match(r'([A-Za-z_.-][A-Za-z0-9_.-]*)?', prefix) + if match.end() != len(prefix): + return prefix[match.end()] + return None + + +def generate(schema_file: str, + output_dir: str, + prefix: str, + unmask: bool = False, + builtins: bool = False) -> None: + """ + Generate C code for the given schema into the target directory. + + :param schema_file: The primary QAPI schema file. + :param output_dir: The output directory to store generated code. + :param prefix: Optional C-code prefix for symbol names. + :param unmask: Expose non-ABI names through introspection? + :param builtins: Generate code for built-in types? + + :raise QAPIError: On failures. + """ + assert invalid_prefix_char(prefix) is None + + schema = QAPISchema(schema_file) + gen_types(schema, output_dir, prefix, builtins) + gen_visit(schema, output_dir, prefix, builtins) + gen_commands(schema, output_dir, prefix) + gen_events(schema, output_dir, prefix) + gen_introspect(schema, output_dir, prefix, unmask) + + +def main() -> int: + """ + gapi-gen executable entry point. + Expects arguments via sys.argv, see --help for details. + + :return: int, 0 on success, 1 on failure. + """ + parser = argparse.ArgumentParser( + description='Generate code from a QAPI schema') + parser.add_argument('-b', '--builtins', action='store_true', + help="generate code for built-in types") + parser.add_argument('-o', '--output-dir', action='store', + default='', + help="write output to directory OUTPUT_DIR") + parser.add_argument('-p', '--prefix', action='store', + default='', + help="prefix for symbols") + parser.add_argument('-u', '--unmask-non-abi-names', action='store_true', + dest='unmask', + help="expose non-ABI names in introspection") + parser.add_argument('schema', action='store') + args = parser.parse_args() + + funny_char = invalid_prefix_char(args.prefix) + if funny_char: + msg = f"funny character '{funny_char}' in argument of --prefix" + print(f"{sys.argv[0]}: {msg}", file=sys.stderr) + return 1 + + try: + generate(args.schema, + output_dir=args.output_dir, + prefix=args.prefix, + unmask=args.unmask, + builtins=args.builtins) + except QAPIError as err: + print(f"{sys.argv[0]}: {str(err)}", file=sys.stderr) + return 1 + return 0 diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini new file mode 100644 index 000000000..662535642 --- /dev/null +++ b/scripts/qapi/mypy.ini @@ -0,0 +1,9 @@ +[mypy] +strict = True +disallow_untyped_calls = False +python_version = 3.6 + +[mypy-qapi.schema] +disallow_untyped_defs = False +disallow_incomplete_defs = False +check_untyped_defs = False diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py new file mode 100644 index 000000000..1b006cdc1 --- /dev/null +++ b/scripts/qapi/parser.py @@ -0,0 +1,810 @@ +# -*- coding: utf-8 -*- +# +# QAPI schema parser +# +# Copyright IBM, Corp. 2011 +# Copyright (c) 2013-2019 Red Hat Inc. +# +# Authors: +# Anthony Liguori <aliguori@us.ibm.com> +# Markus Armbruster <armbru@redhat.com> +# Marc-André Lureau <marcandre.lureau@redhat.com> +# Kevin Wolf <kwolf@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. + +from collections import OrderedDict +import os +import re +from typing import ( + TYPE_CHECKING, + Dict, + List, + Optional, + Set, + Union, +) + +from .common import must_match +from .error import QAPISemError, QAPISourceError +from .source import QAPISourceInfo + + +if TYPE_CHECKING: + # pylint: disable=cyclic-import + # TODO: Remove cycle. [schema -> expr -> parser -> schema] + from .schema import QAPISchemaFeature, QAPISchemaMember + + +#: Represents a single Top Level QAPI schema expression. +TopLevelExpr = Dict[str, object] + +# Return value alias for get_expr(). +_ExprValue = Union[List[object], Dict[str, object], str, bool] + +# FIXME: Consolidate and centralize definitions for TopLevelExpr, +# _ExprValue, _JSONValue, and _JSONObject; currently scattered across +# several modules. + + +class QAPIParseError(QAPISourceError): + """Error class for all QAPI schema parsing errors.""" + def __init__(self, parser: 'QAPISchemaParser', msg: str): + col = 1 + for ch in parser.src[parser.line_pos:parser.pos]: + if ch == '\t': + col = (col + 7) % 8 + 1 + else: + col += 1 + super().__init__(parser.info, msg, col) + + +class QAPISchemaParser: + """ + Parse QAPI schema source. + + Parse a JSON-esque schema file and process directives. See + qapi-code-gen.txt section "Schema Syntax" for the exact syntax. + Grammatical validation is handled later by `expr.check_exprs()`. + + :param fname: Source file name. + :param previously_included: + The absolute names of previously included source files, + if being invoked from another parser. + :param incl_info: + `QAPISourceInfo` belonging to the parent module. + ``None`` implies this is the root module. + + :ivar exprs: Resulting parsed expressions. + :ivar docs: Resulting parsed documentation blocks. + + :raise OSError: For problems reading the root schema document. + :raise QAPIError: For errors in the schema source. + """ + def __init__(self, + fname: str, + previously_included: Optional[Set[str]] = None, + incl_info: Optional[QAPISourceInfo] = None): + self._fname = fname + self._included = previously_included or set() + self._included.add(os.path.abspath(self._fname)) + self.src = '' + + # Lexer state (see `accept` for details): + self.info = QAPISourceInfo(self._fname, incl_info) + self.tok: Union[None, str] = None + self.pos = 0 + self.cursor = 0 + self.val: Optional[Union[bool, str]] = None + self.line_pos = 0 + + # Parser output: + self.exprs: List[Dict[str, object]] = [] + self.docs: List[QAPIDoc] = [] + + # Showtime! + self._parse() + + def _parse(self) -> None: + """ + Parse the QAPI schema document. + + :return: None. Results are stored in ``.exprs`` and ``.docs``. + """ + cur_doc = None + + # May raise OSError; allow the caller to handle it. + with open(self._fname, 'r', encoding='utf-8') as fp: + self.src = fp.read() + if self.src == '' or self.src[-1] != '\n': + self.src += '\n' + + # Prime the lexer: + self.accept() + + # Parse until done: + while self.tok is not None: + info = self.info + if self.tok == '#': + self.reject_expr_doc(cur_doc) + for cur_doc in self.get_doc(info): + self.docs.append(cur_doc) + continue + + expr = self.get_expr() + if not isinstance(expr, dict): + raise QAPISemError( + info, "top-level expression must be an object") + + if 'include' in expr: + self.reject_expr_doc(cur_doc) + if len(expr) != 1: + raise QAPISemError(info, "invalid 'include' directive") + include = expr['include'] + if not isinstance(include, str): + raise QAPISemError(info, + "value of 'include' must be a string") + incl_fname = os.path.join(os.path.dirname(self._fname), + include) + self.exprs.append({'expr': {'include': incl_fname}, + 'info': info}) + exprs_include = self._include(include, info, incl_fname, + self._included) + if exprs_include: + self.exprs.extend(exprs_include.exprs) + self.docs.extend(exprs_include.docs) + elif "pragma" in expr: + self.reject_expr_doc(cur_doc) + if len(expr) != 1: + raise QAPISemError(info, "invalid 'pragma' directive") + pragma = expr['pragma'] + if not isinstance(pragma, dict): + raise QAPISemError( + info, "value of 'pragma' must be an object") + for name, value in pragma.items(): + self._pragma(name, value, info) + else: + expr_elem = {'expr': expr, + 'info': info} + if cur_doc: + if not cur_doc.symbol: + raise QAPISemError( + cur_doc.info, "definition documentation required") + expr_elem['doc'] = cur_doc + self.exprs.append(expr_elem) + cur_doc = None + self.reject_expr_doc(cur_doc) + + @staticmethod + def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: + if doc and doc.symbol: + raise QAPISemError( + doc.info, + "documentation for '%s' is not followed by the definition" + % doc.symbol) + + @staticmethod + def _include(include: str, + info: QAPISourceInfo, + incl_fname: str, + previously_included: Set[str] + ) -> Optional['QAPISchemaParser']: + incl_abs_fname = os.path.abspath(incl_fname) + # catch inclusion cycle + inf: Optional[QAPISourceInfo] = info + while inf: + if incl_abs_fname == os.path.abspath(inf.fname): + raise QAPISemError(info, "inclusion loop for %s" % include) + inf = inf.parent + + # skip multiple include of the same file + if incl_abs_fname in previously_included: + return None + + try: + return QAPISchemaParser(incl_fname, previously_included, info) + except OSError as err: + raise QAPISemError( + info, + f"can't read include file '{incl_fname}': {err.strerror}" + ) from err + + @staticmethod + def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: + + def check_list_str(name: str, value: object) -> List[str]: + if (not isinstance(value, list) or + any(not isinstance(elt, str) for elt in value)): + raise QAPISemError( + info, + "pragma %s must be a list of strings" % name) + return value + + pragma = info.pragma + + if name == 'doc-required': + if not isinstance(value, bool): + raise QAPISemError(info, + "pragma 'doc-required' must be boolean") + pragma.doc_required = value + elif name == 'command-name-exceptions': + pragma.command_name_exceptions = check_list_str(name, value) + elif name == 'command-returns-exceptions': + pragma.command_returns_exceptions = check_list_str(name, value) + elif name == 'member-name-exceptions': + pragma.member_name_exceptions = check_list_str(name, value) + else: + raise QAPISemError(info, "unknown pragma '%s'" % name) + + def accept(self, skip_comment: bool = True) -> None: + """ + Read and store the next token. + + :param skip_comment: + When false, return COMMENT tokens ("#"). + This is used when reading documentation blocks. + + :return: + None. Several instance attributes are updated instead: + + - ``.tok`` represents the token type. See below for values. + - ``.info`` describes the token's source location. + - ``.val`` is the token's value, if any. See below. + - ``.pos`` is the buffer index of the first character of + the token. + + * Single-character tokens: + + These are "{", "}", ":", ",", "[", and "]". + ``.tok`` holds the single character and ``.val`` is None. + + * Multi-character tokens: + + * COMMENT: + + This token is not normally returned by the lexer, but it can + be when ``skip_comment`` is False. ``.tok`` is "#", and + ``.val`` is a string including all chars until end-of-line, + including the "#" itself. + + * STRING: + + ``.tok`` is "'", the single quote. ``.val`` contains the + string, excluding the surrounding quotes. + + * TRUE and FALSE: + + ``.tok`` is either "t" or "f", ``.val`` will be the + corresponding bool value. + + * EOF: + + ``.tok`` and ``.val`` will both be None at EOF. + """ + while True: + self.tok = self.src[self.cursor] + self.pos = self.cursor + self.cursor += 1 + self.val = None + + if self.tok == '#': + if self.src[self.cursor] == '#': + # Start of doc comment + skip_comment = False + self.cursor = self.src.find('\n', self.cursor) + if not skip_comment: + self.val = self.src[self.pos:self.cursor] + return + elif self.tok in '{}:,[]': + return + elif self.tok == "'": + # Note: we accept only printable ASCII + string = '' + esc = False + while True: + ch = self.src[self.cursor] + self.cursor += 1 + if ch == '\n': + raise QAPIParseError(self, "missing terminating \"'\"") + if esc: + # Note: we recognize only \\ because we have + # no use for funny characters in strings + if ch != '\\': + raise QAPIParseError(self, + "unknown escape \\%s" % ch) + esc = False + elif ch == '\\': + esc = True + continue + elif ch == "'": + self.val = string + return + if ord(ch) < 32 or ord(ch) >= 127: + raise QAPIParseError( + self, "funny character in string") + string += ch + elif self.src.startswith('true', self.pos): + self.val = True + self.cursor += 3 + return + elif self.src.startswith('false', self.pos): + self.val = False + self.cursor += 4 + return + elif self.tok == '\n': + if self.cursor == len(self.src): + self.tok = None + return + self.info = self.info.next_line() + self.line_pos = self.cursor + elif not self.tok.isspace(): + # Show up to next structural, whitespace or quote + # character + match = must_match('[^[\\]{}:,\\s\'"]+', + self.src[self.cursor-1:]) + raise QAPIParseError(self, "stray '%s'" % match.group(0)) + + def get_members(self) -> Dict[str, object]: + expr: Dict[str, object] = OrderedDict() + if self.tok == '}': + self.accept() + return expr + if self.tok != "'": + raise QAPIParseError(self, "expected string or '}'") + while True: + key = self.val + assert isinstance(key, str) # Guaranteed by tok == "'" + + self.accept() + if self.tok != ':': + raise QAPIParseError(self, "expected ':'") + self.accept() + if key in expr: + raise QAPIParseError(self, "duplicate key '%s'" % key) + expr[key] = self.get_expr() + if self.tok == '}': + self.accept() + return expr + if self.tok != ',': + raise QAPIParseError(self, "expected ',' or '}'") + self.accept() + if self.tok != "'": + raise QAPIParseError(self, "expected string") + + def get_values(self) -> List[object]: + expr: List[object] = [] + if self.tok == ']': + self.accept() + return expr + if self.tok not in tuple("{['tf"): + raise QAPIParseError( + self, "expected '{', '[', ']', string, or boolean") + while True: + expr.append(self.get_expr()) + if self.tok == ']': + self.accept() + return expr + if self.tok != ',': + raise QAPIParseError(self, "expected ',' or ']'") + self.accept() + + def get_expr(self) -> _ExprValue: + expr: _ExprValue + if self.tok == '{': + self.accept() + expr = self.get_members() + elif self.tok == '[': + self.accept() + expr = self.get_values() + elif self.tok in tuple("'tf"): + assert isinstance(self.val, (str, bool)) + expr = self.val + self.accept() + else: + raise QAPIParseError( + self, "expected '{', '[', string, or boolean") + return expr + + def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']: + if self.val != '##': + raise QAPIParseError( + self, "junk after '##' at start of documentation comment") + + docs = [] + cur_doc = QAPIDoc(self, info) + self.accept(False) + while self.tok == '#': + assert isinstance(self.val, str) + if self.val.startswith('##'): + # End of doc comment + if self.val != '##': + raise QAPIParseError( + self, + "junk after '##' at end of documentation comment") + cur_doc.end_comment() + docs.append(cur_doc) + self.accept() + return docs + if self.val.startswith('# ='): + if cur_doc.symbol: + raise QAPIParseError( + self, + "unexpected '=' markup in definition documentation") + if cur_doc.body.text: + cur_doc.end_comment() + docs.append(cur_doc) + cur_doc = QAPIDoc(self, info) + cur_doc.append(self.val) + self.accept(False) + + raise QAPIParseError(self, "documentation comment must end with '##'") + + +class QAPIDoc: + """ + A documentation comment block, either definition or free-form + + Definition documentation blocks consist of + + * a body section: one line naming the definition, followed by an + overview (any number of lines) + + * argument sections: a description of each argument (for commands + and events) or member (for structs, unions and alternates) + + * features sections: a description of each feature flag + + * additional (non-argument) sections, possibly tagged + + Free-form documentation blocks consist only of a body section. + """ + + class Section: + # pylint: disable=too-few-public-methods + def __init__(self, parser: QAPISchemaParser, + name: Optional[str] = None, indent: int = 0): + + # parser, for error messages about indentation + self._parser = parser + # optional section name (argument/member or section name) + self.name = name + self.text = '' + # the expected indent level of the text of this section + self._indent = indent + + def append(self, line: str) -> None: + # Strip leading spaces corresponding to the expected indent level + # Blank lines are always OK. + if line: + indent = must_match(r'\s*', line).end() + if indent < self._indent: + raise QAPIParseError( + self._parser, + "unexpected de-indent (expected at least %d spaces)" % + self._indent) + line = line[self._indent:] + + self.text += line.rstrip() + '\n' + + class ArgSection(Section): + def __init__(self, parser: QAPISchemaParser, + name: str, indent: int = 0): + super().__init__(parser, name, indent) + self.member: Optional['QAPISchemaMember'] = None + + def connect(self, member: 'QAPISchemaMember') -> None: + self.member = member + + class NullSection(Section): + """ + Immutable dummy section for use at the end of a doc block. + """ + # pylint: disable=too-few-public-methods + def append(self, line: str) -> None: + assert False, "Text appended after end_comment() called." + + def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo): + # self._parser is used to report errors with QAPIParseError. The + # resulting error position depends on the state of the parser. + # It happens to be the beginning of the comment. More or less + # servicable, but action at a distance. + self._parser = parser + self.info = info + self.symbol: Optional[str] = None + self.body = QAPIDoc.Section(parser) + # dicts mapping parameter/feature names to their ArgSection + self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict() + self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict() + self.sections: List[QAPIDoc.Section] = [] + # the current section + self._section = self.body + self._append_line = self._append_body_line + + def has_section(self, name: str) -> bool: + """Return True if we have a section with this name.""" + for i in self.sections: + if i.name == name: + return True + return False + + def append(self, line: str) -> None: + """ + Parse a comment line and add it to the documentation. + + The way that the line is dealt with depends on which part of + the documentation we're parsing right now: + * The body section: ._append_line is ._append_body_line + * An argument section: ._append_line is ._append_args_line + * A features section: ._append_line is ._append_features_line + * An additional section: ._append_line is ._append_various_line + """ + line = line[1:] + if not line: + self._append_freeform(line) + return + + if line[0] != ' ': + raise QAPIParseError(self._parser, "missing space after #") + line = line[1:] + self._append_line(line) + + def end_comment(self) -> None: + self._switch_section(QAPIDoc.NullSection(self._parser)) + + @staticmethod + def _is_section_tag(name: str) -> bool: + return name in ('Returns:', 'Since:', + # those are often singular or plural + 'Note:', 'Notes:', + 'Example:', 'Examples:', + 'TODO:') + + def _append_body_line(self, line: str) -> None: + """ + Process a line of documentation text in the body section. + + If this a symbol line and it is the section's first line, this + is a definition documentation block for that symbol. + + If it's a definition documentation block, another symbol line + begins the argument section for the argument named by it, and + a section tag begins an additional section. Start that + section and append the line to it. + + Else, append the line to the current section. + """ + name = line.split(' ', 1)[0] + # FIXME not nice: things like '# @foo:' and '# @foo: ' aren't + # recognized, and get silently treated as ordinary text + if not self.symbol and not self.body.text and line.startswith('@'): + if not line.endswith(':'): + raise QAPIParseError(self._parser, "line should end with ':'") + self.symbol = line[1:-1] + # Invalid names are not checked here, but the name provided MUST + # match the following definition, which *is* validated in expr.py. + if not self.symbol: + raise QAPIParseError( + self._parser, "name required after '@'") + elif self.symbol: + # This is a definition documentation block + if name.startswith('@') and name.endswith(':'): + self._append_line = self._append_args_line + self._append_args_line(line) + elif line == 'Features:': + self._append_line = self._append_features_line + elif self._is_section_tag(name): + self._append_line = self._append_various_line + self._append_various_line(line) + else: + self._append_freeform(line) + else: + # This is a free-form documentation block + self._append_freeform(line) + + def _append_args_line(self, line: str) -> None: + """ + Process a line of documentation text in an argument section. + + A symbol line begins the next argument section, a section tag + section or a non-indented line after a blank line begins an + additional section. Start that section and append the line to + it. + + Else, append the line to the current section. + + """ + name = line.split(' ', 1)[0] + + if name.startswith('@') and name.endswith(':'): + # If line is "@arg: first line of description", find + # the index of 'f', which is the indent we expect for any + # following lines. We then remove the leading "@arg:" + # from line and replace it with spaces so that 'f' has the + # same index as it did in the original line and can be + # handled the same way we will handle following lines. + indent = must_match(r'@\S*:\s*', line).end() + line = line[indent:] + if not line: + # Line was just the "@arg:" header; following lines + # are not indented + indent = 0 + else: + line = ' ' * indent + line + self._start_args_section(name[1:-1], indent) + elif self._is_section_tag(name): + self._append_line = self._append_various_line + self._append_various_line(line) + return + elif (self._section.text.endswith('\n\n') + and line and not line[0].isspace()): + if line == 'Features:': + self._append_line = self._append_features_line + else: + self._start_section() + self._append_line = self._append_various_line + self._append_various_line(line) + return + + self._append_freeform(line) + + def _append_features_line(self, line: str) -> None: + name = line.split(' ', 1)[0] + + if name.startswith('@') and name.endswith(':'): + # If line is "@arg: first line of description", find + # the index of 'f', which is the indent we expect for any + # following lines. We then remove the leading "@arg:" + # from line and replace it with spaces so that 'f' has the + # same index as it did in the original line and can be + # handled the same way we will handle following lines. + indent = must_match(r'@\S*:\s*', line).end() + line = line[indent:] + if not line: + # Line was just the "@arg:" header; following lines + # are not indented + indent = 0 + else: + line = ' ' * indent + line + self._start_features_section(name[1:-1], indent) + elif self._is_section_tag(name): + self._append_line = self._append_various_line + self._append_various_line(line) + return + elif (self._section.text.endswith('\n\n') + and line and not line[0].isspace()): + self._start_section() + self._append_line = self._append_various_line + self._append_various_line(line) + return + + self._append_freeform(line) + + def _append_various_line(self, line: str) -> None: + """ + Process a line of documentation text in an additional section. + + A symbol line is an error. + + A section tag begins an additional section. Start that + section and append the line to it. + + Else, append the line to the current section. + """ + name = line.split(' ', 1)[0] + + if name.startswith('@') and name.endswith(':'): + raise QAPIParseError(self._parser, + "'%s' can't follow '%s' section" + % (name, self.sections[0].name)) + if self._is_section_tag(name): + # If line is "Section: first line of description", find + # the index of 'f', which is the indent we expect for any + # following lines. We then remove the leading "Section:" + # from line and replace it with spaces so that 'f' has the + # same index as it did in the original line and can be + # handled the same way we will handle following lines. + indent = must_match(r'\S*:\s*', line).end() + line = line[indent:] + if not line: + # Line was just the "Section:" header; following lines + # are not indented + indent = 0 + else: + line = ' ' * indent + line + self._start_section(name[:-1], indent) + + self._append_freeform(line) + + def _start_symbol_section( + self, + symbols_dict: Dict[str, 'QAPIDoc.ArgSection'], + name: str, + indent: int) -> None: + # FIXME invalid names other than the empty string aren't flagged + if not name: + raise QAPIParseError(self._parser, "invalid parameter name") + if name in symbols_dict: + raise QAPIParseError(self._parser, + "'%s' parameter name duplicated" % name) + assert not self.sections + new_section = QAPIDoc.ArgSection(self._parser, name, indent) + self._switch_section(new_section) + symbols_dict[name] = new_section + + def _start_args_section(self, name: str, indent: int) -> None: + self._start_symbol_section(self.args, name, indent) + + def _start_features_section(self, name: str, indent: int) -> None: + self._start_symbol_section(self.features, name, indent) + + def _start_section(self, name: Optional[str] = None, + indent: int = 0) -> None: + if name in ('Returns', 'Since') and self.has_section(name): + raise QAPIParseError(self._parser, + "duplicated '%s' section" % name) + new_section = QAPIDoc.Section(self._parser, name, indent) + self._switch_section(new_section) + self.sections.append(new_section) + + def _switch_section(self, new_section: 'QAPIDoc.Section') -> None: + text = self._section.text = self._section.text.strip() + + # Only the 'body' section is allowed to have an empty body. + # All other sections, including anonymous ones, must have text. + if self._section != self.body and not text: + # We do not create anonymous sections unless there is + # something to put in them; this is a parser bug. + assert self._section.name + raise QAPIParseError( + self._parser, + "empty doc section '%s'" % self._section.name) + + self._section = new_section + + def _append_freeform(self, line: str) -> None: + match = re.match(r'(@\S+:)', line) + if match: + raise QAPIParseError(self._parser, + "'%s' not allowed in free-form documentation" + % match.group(1)) + self._section.append(line) + + def connect_member(self, member: 'QAPISchemaMember') -> None: + if member.name not in self.args: + # Undocumented TODO outlaw + self.args[member.name] = QAPIDoc.ArgSection(self._parser, + member.name) + self.args[member.name].connect(member) + + def connect_feature(self, feature: 'QAPISchemaFeature') -> None: + if feature.name not in self.features: + raise QAPISemError(feature.info, + "feature '%s' lacks documentation" + % feature.name) + self.features[feature.name].connect(feature) + + def check_expr(self, expr: TopLevelExpr) -> None: + if self.has_section('Returns') and 'command' not in expr: + raise QAPISemError(self.info, + "'Returns:' is only valid for commands") + + def check(self) -> None: + + def check_args_section( + args: Dict[str, QAPIDoc.ArgSection], what: str + ) -> None: + bogus = [name for name, section in args.items() + if not section.member] + if bogus: + raise QAPISemError( + self.info, + "documented %s%s '%s' %s not exist" % ( + what, + "s" if len(bogus) > 1 else "", + "', '".join(bogus), + "do" if len(bogus) > 1 else "does" + )) + + check_args_section(self.args, 'member') + check_args_section(self.features, 'feature') diff --git a/scripts/qapi/pylintrc b/scripts/qapi/pylintrc new file mode 100644 index 000000000..b259531a7 --- /dev/null +++ b/scripts/qapi/pylintrc @@ -0,0 +1,69 @@ +[MASTER] + +# Add files or directories matching the regex patterns to the ignore list. +# The regex matches against base names, not paths. +ignore-patterns=schema.py, + + +[MESSAGES CONTROL] + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=fixme, + missing-docstring, + too-many-arguments, + too-many-branches, + too-many-statements, + too-many-instance-attributes, + consider-using-f-string, + +[REPORTS] + +[REFACTORING] + +[MISCELLANEOUS] + +[LOGGING] + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _, + fp, # fp = open(...) + fd, # fd = os.open(...) + ch, + +[VARIABLES] + +[STRING] + +[SPELLING] + +[FORMAT] + +[SIMILARITIES] + +# Ignore import statements themselves when computing similarities. +ignore-imports=yes + +[TYPECHECK] + +[CLASSES] + +[IMPORTS] + +[DESIGN] + +[EXCEPTIONS] diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py new file mode 100644 index 000000000..b7b3fc0ce --- /dev/null +++ b/scripts/qapi/schema.py @@ -0,0 +1,1185 @@ +# -*- coding: utf-8 -*- +# +# QAPI schema internal representation +# +# Copyright (c) 2015-2019 Red Hat Inc. +# +# Authors: +# Markus Armbruster <armbru@redhat.com> +# Eric Blake <eblake@redhat.com> +# Marc-André Lureau <marcandre.lureau@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. + +# TODO catching name collisions in generated code would be nice + +from collections import OrderedDict +import os +import re +from typing import Optional + +from .common import ( + POINTER_SUFFIX, + c_name, + cgen_ifcond, + docgen_ifcond, + gen_endif, + gen_if, +) +from .error import QAPIError, QAPISemError, QAPISourceError +from .expr import check_exprs +from .parser import QAPISchemaParser + + +class QAPISchemaIfCond: + def __init__(self, ifcond=None): + self.ifcond = ifcond + + def _cgen(self): + return cgen_ifcond(self.ifcond) + + def gen_if(self): + return gen_if(self._cgen()) + + def gen_endif(self): + return gen_endif(self._cgen()) + + def docgen(self): + return docgen_ifcond(self.ifcond) + + def is_present(self): + return bool(self.ifcond) + + +class QAPISchemaEntity: + meta: Optional[str] = None + + def __init__(self, name: str, info, doc, ifcond=None, features=None): + assert name is None or isinstance(name, str) + for f in features or []: + assert isinstance(f, QAPISchemaFeature) + f.set_defined_in(name) + self.name = name + self._module = None + # For explicitly defined entities, info points to the (explicit) + # definition. For builtins (and their arrays), info is None. + # For implicitly defined entities, info points to a place that + # triggered the implicit definition (there may be more than one + # such place). + self.info = info + self.doc = doc + self._ifcond = ifcond or QAPISchemaIfCond() + self.features = features or [] + self._checked = False + + def c_name(self): + return c_name(self.name) + + def check(self, schema): + assert not self._checked + seen = {} + for f in self.features: + f.check_clash(self.info, seen) + self._checked = True + + def connect_doc(self, doc=None): + doc = doc or self.doc + if doc: + for f in self.features: + doc.connect_feature(f) + + def check_doc(self): + if self.doc: + self.doc.check() + + def _set_module(self, schema, info): + assert self._checked + fname = info.fname if info else QAPISchemaModule.BUILTIN_MODULE_NAME + self._module = schema.module_by_fname(fname) + self._module.add_entity(self) + + def set_module(self, schema): + self._set_module(schema, self.info) + + @property + def ifcond(self): + assert self._checked + return self._ifcond + + def is_implicit(self): + return not self.info + + def visit(self, visitor): + assert self._checked + + def describe(self): + assert self.meta + return "%s '%s'" % (self.meta, self.name) + + +class QAPISchemaVisitor: + def visit_begin(self, schema): + pass + + def visit_end(self): + pass + + def visit_module(self, name): + pass + + def visit_needed(self, entity): + # Default to visiting everything + return True + + def visit_include(self, name, info): + pass + + def visit_builtin_type(self, name, info, json_type): + pass + + def visit_enum_type(self, name, info, ifcond, features, members, prefix): + pass + + def visit_array_type(self, name, info, ifcond, element_type): + pass + + def visit_object_type(self, name, info, ifcond, features, + base, members, variants): + pass + + def visit_object_type_flat(self, name, info, ifcond, features, + members, variants): + pass + + def visit_alternate_type(self, name, info, ifcond, features, variants): + pass + + def visit_command(self, name, info, ifcond, features, + arg_type, ret_type, gen, success_response, boxed, + allow_oob, allow_preconfig, coroutine): + pass + + def visit_event(self, name, info, ifcond, features, arg_type, boxed): + pass + + +class QAPISchemaModule: + + BUILTIN_MODULE_NAME = './builtin' + + def __init__(self, name): + self.name = name + self._entity_list = [] + + @staticmethod + def is_system_module(name: str) -> bool: + """ + System modules are internally defined modules. + + Their names start with the "./" prefix. + """ + return name.startswith('./') + + @classmethod + def is_user_module(cls, name: str) -> bool: + """ + User modules are those defined by the user in qapi JSON files. + + They do not start with the "./" prefix. + """ + return not cls.is_system_module(name) + + @classmethod + def is_builtin_module(cls, name: str) -> bool: + """ + The built-in module is a single System module for the built-in types. + + It is always "./builtin". + """ + return name == cls.BUILTIN_MODULE_NAME + + def add_entity(self, ent): + self._entity_list.append(ent) + + def visit(self, visitor): + visitor.visit_module(self.name) + for entity in self._entity_list: + if visitor.visit_needed(entity): + entity.visit(visitor) + + +class QAPISchemaInclude(QAPISchemaEntity): + def __init__(self, sub_module, info): + super().__init__(None, info, None) + self._sub_module = sub_module + + def visit(self, visitor): + super().visit(visitor) + visitor.visit_include(self._sub_module.name, self.info) + + +class QAPISchemaType(QAPISchemaEntity): + # Return the C type for common use. + # For the types we commonly box, this is a pointer type. + def c_type(self): + pass + + # Return the C type to be used in a parameter list. + def c_param_type(self): + return self.c_type() + + # Return the C type to be used where we suppress boxing. + def c_unboxed_type(self): + return self.c_type() + + def json_type(self): + pass + + def alternate_qtype(self): + json2qtype = { + 'null': 'QTYPE_QNULL', + 'string': 'QTYPE_QSTRING', + 'number': 'QTYPE_QNUM', + 'int': 'QTYPE_QNUM', + 'boolean': 'QTYPE_QBOOL', + 'object': 'QTYPE_QDICT' + } + return json2qtype.get(self.json_type()) + + def doc_type(self): + if self.is_implicit(): + return None + return self.name + + def check(self, schema): + QAPISchemaEntity.check(self, schema) + for feat in self.features: + if feat.is_special(): + raise QAPISemError( + self.info, + f"feature '{feat.name}' is not supported for types") + + def describe(self): + assert self.meta + return "%s type '%s'" % (self.meta, self.name) + + +class QAPISchemaBuiltinType(QAPISchemaType): + meta = 'built-in' + + def __init__(self, name, json_type, c_type): + super().__init__(name, None, None) + assert not c_type or isinstance(c_type, str) + assert json_type in ('string', 'number', 'int', 'boolean', 'null', + 'value') + self._json_type_name = json_type + self._c_type_name = c_type + + def c_name(self): + return self.name + + def c_type(self): + return self._c_type_name + + def c_param_type(self): + if self.name == 'str': + return 'const ' + self._c_type_name + return self._c_type_name + + def json_type(self): + return self._json_type_name + + def doc_type(self): + return self.json_type() + + def visit(self, visitor): + super().visit(visitor) + visitor.visit_builtin_type(self.name, self.info, self.json_type()) + + +class QAPISchemaEnumType(QAPISchemaType): + meta = 'enum' + + def __init__(self, name, info, doc, ifcond, features, members, prefix): + super().__init__(name, info, doc, ifcond, features) + for m in members: + assert isinstance(m, QAPISchemaEnumMember) + m.set_defined_in(name) + assert prefix is None or isinstance(prefix, str) + self.members = members + self.prefix = prefix + + def check(self, schema): + super().check(schema) + seen = {} + for m in self.members: + m.check_clash(self.info, seen) + + def connect_doc(self, doc=None): + super().connect_doc(doc) + doc = doc or self.doc + for m in self.members: + m.connect_doc(doc) + + def is_implicit(self): + # See QAPISchema._def_predefineds() + return self.name == 'QType' + + def c_type(self): + return c_name(self.name) + + def member_names(self): + return [m.name for m in self.members] + + def json_type(self): + return 'string' + + def visit(self, visitor): + super().visit(visitor) + visitor.visit_enum_type( + self.name, self.info, self.ifcond, self.features, + self.members, self.prefix) + + +class QAPISchemaArrayType(QAPISchemaType): + meta = 'array' + + def __init__(self, name, info, element_type): + super().__init__(name, info, None) + assert isinstance(element_type, str) + self._element_type_name = element_type + self.element_type = None + + def check(self, schema): + super().check(schema) + self.element_type = schema.resolve_type( + self._element_type_name, self.info, + self.info and self.info.defn_meta) + assert not isinstance(self.element_type, QAPISchemaArrayType) + + def set_module(self, schema): + self._set_module(schema, self.element_type.info) + + @property + def ifcond(self): + assert self._checked + return self.element_type.ifcond + + def is_implicit(self): + return True + + def c_type(self): + return c_name(self.name) + POINTER_SUFFIX + + def json_type(self): + return 'array' + + def doc_type(self): + elt_doc_type = self.element_type.doc_type() + if not elt_doc_type: + return None + return 'array of ' + elt_doc_type + + def visit(self, visitor): + super().visit(visitor) + visitor.visit_array_type(self.name, self.info, self.ifcond, + self.element_type) + + def describe(self): + assert self.meta + return "%s type ['%s']" % (self.meta, self._element_type_name) + + +class QAPISchemaObjectType(QAPISchemaType): + def __init__(self, name, info, doc, ifcond, features, + base, local_members, variants): + # struct has local_members, optional base, and no variants + # union has base, variants, and no local_members + super().__init__(name, info, doc, ifcond, features) + self.meta = 'union' if variants else 'struct' + assert base is None or isinstance(base, str) + for m in local_members: + assert isinstance(m, QAPISchemaObjectTypeMember) + m.set_defined_in(name) + if variants is not None: + assert isinstance(variants, QAPISchemaVariants) + variants.set_defined_in(name) + self._base_name = base + self.base = None + self.local_members = local_members + self.variants = variants + self.members = None + + def check(self, schema): + # This calls another type T's .check() exactly when the C + # struct emitted by gen_object() contains that T's C struct + # (pointers don't count). + if self.members is not None: + # A previous .check() completed: nothing to do + return + if self._checked: + # Recursed: C struct contains itself + raise QAPISemError(self.info, + "object %s contains itself" % self.name) + + super().check(schema) + assert self._checked and self.members is None + + seen = OrderedDict() + if self._base_name: + self.base = schema.resolve_type(self._base_name, self.info, + "'base'") + if (not isinstance(self.base, QAPISchemaObjectType) + or self.base.variants): + raise QAPISemError( + self.info, + "'base' requires a struct type, %s isn't" + % self.base.describe()) + self.base.check(schema) + self.base.check_clash(self.info, seen) + for m in self.local_members: + m.check(schema) + m.check_clash(self.info, seen) + members = seen.values() + + if self.variants: + self.variants.check(schema, seen) + self.variants.check_clash(self.info, seen) + + self.members = members # mark completed + + # Check that the members of this type do not cause duplicate JSON members, + # and update seen to track the members seen so far. Report any errors + # on behalf of info, which is not necessarily self.info + def check_clash(self, info, seen): + assert self._checked + assert not self.variants # not implemented + for m in self.members: + m.check_clash(info, seen) + + def connect_doc(self, doc=None): + super().connect_doc(doc) + doc = doc or self.doc + if self.base and self.base.is_implicit(): + self.base.connect_doc(doc) + for m in self.local_members: + m.connect_doc(doc) + + def is_implicit(self): + # See QAPISchema._make_implicit_object_type(), as well as + # _def_predefineds() + return self.name.startswith('q_') + + def is_empty(self): + assert self.members is not None + return not self.members and not self.variants + + def c_name(self): + assert self.name != 'q_empty' + return super().c_name() + + def c_type(self): + assert not self.is_implicit() + return c_name(self.name) + POINTER_SUFFIX + + def c_unboxed_type(self): + return c_name(self.name) + + def json_type(self): + return 'object' + + def visit(self, visitor): + super().visit(visitor) + visitor.visit_object_type( + self.name, self.info, self.ifcond, self.features, + self.base, self.local_members, self.variants) + visitor.visit_object_type_flat( + self.name, self.info, self.ifcond, self.features, + self.members, self.variants) + + +class QAPISchemaAlternateType(QAPISchemaType): + meta = 'alternate' + + def __init__(self, name, info, doc, ifcond, features, variants): + super().__init__(name, info, doc, ifcond, features) + assert isinstance(variants, QAPISchemaVariants) + assert variants.tag_member + variants.set_defined_in(name) + variants.tag_member.set_defined_in(self.name) + self.variants = variants + + def check(self, schema): + super().check(schema) + self.variants.tag_member.check(schema) + # Not calling self.variants.check_clash(), because there's nothing + # to clash with + self.variants.check(schema, {}) + # Alternate branch names have no relation to the tag enum values; + # so we have to check for potential name collisions ourselves. + seen = {} + types_seen = {} + for v in self.variants.variants: + v.check_clash(self.info, seen) + qtype = v.type.alternate_qtype() + if not qtype: + raise QAPISemError( + self.info, + "%s cannot use %s" + % (v.describe(self.info), v.type.describe())) + conflicting = set([qtype]) + if qtype == 'QTYPE_QSTRING': + if isinstance(v.type, QAPISchemaEnumType): + for m in v.type.members: + if m.name in ['on', 'off']: + conflicting.add('QTYPE_QBOOL') + if re.match(r'[-+0-9.]', m.name): + # lazy, could be tightened + conflicting.add('QTYPE_QNUM') + else: + conflicting.add('QTYPE_QNUM') + conflicting.add('QTYPE_QBOOL') + for qt in conflicting: + if qt in types_seen: + raise QAPISemError( + self.info, + "%s can't be distinguished from '%s'" + % (v.describe(self.info), types_seen[qt])) + types_seen[qt] = v.name + + def connect_doc(self, doc=None): + super().connect_doc(doc) + doc = doc or self.doc + for v in self.variants.variants: + v.connect_doc(doc) + + def c_type(self): + return c_name(self.name) + POINTER_SUFFIX + + def json_type(self): + return 'value' + + def visit(self, visitor): + super().visit(visitor) + visitor.visit_alternate_type( + self.name, self.info, self.ifcond, self.features, self.variants) + + +class QAPISchemaVariants: + def __init__(self, tag_name, info, tag_member, variants): + # Unions pass tag_name but not tag_member. + # Alternates pass tag_member but not tag_name. + # After check(), tag_member is always set. + assert bool(tag_member) != bool(tag_name) + assert (isinstance(tag_name, str) or + isinstance(tag_member, QAPISchemaObjectTypeMember)) + for v in variants: + assert isinstance(v, QAPISchemaVariant) + self._tag_name = tag_name + self.info = info + self.tag_member = tag_member + self.variants = variants + + def set_defined_in(self, name): + for v in self.variants: + v.set_defined_in(name) + + def check(self, schema, seen): + if self._tag_name: # union + self.tag_member = seen.get(c_name(self._tag_name)) + base = "'base'" + # Pointing to the base type when not implicit would be + # nice, but we don't know it here + if not self.tag_member or self._tag_name != self.tag_member.name: + raise QAPISemError( + self.info, + "discriminator '%s' is not a member of %s" + % (self._tag_name, base)) + # Here we do: + base_type = schema.lookup_type(self.tag_member.defined_in) + assert base_type + if not base_type.is_implicit(): + base = "base type '%s'" % self.tag_member.defined_in + if not isinstance(self.tag_member.type, QAPISchemaEnumType): + raise QAPISemError( + self.info, + "discriminator member '%s' of %s must be of enum type" + % (self._tag_name, base)) + if self.tag_member.optional: + raise QAPISemError( + self.info, + "discriminator member '%s' of %s must not be optional" + % (self._tag_name, base)) + if self.tag_member.ifcond.is_present(): + raise QAPISemError( + self.info, + "discriminator member '%s' of %s must not be conditional" + % (self._tag_name, base)) + else: # alternate + assert isinstance(self.tag_member.type, QAPISchemaEnumType) + assert not self.tag_member.optional + assert not self.tag_member.ifcond.is_present() + if self._tag_name: # union + # branches that are not explicitly covered get an empty type + cases = {v.name for v in self.variants} + for m in self.tag_member.type.members: + if m.name not in cases: + v = QAPISchemaVariant(m.name, self.info, + 'q_empty', m.ifcond) + v.set_defined_in(self.tag_member.defined_in) + self.variants.append(v) + if not self.variants: + raise QAPISemError(self.info, "union has no branches") + for v in self.variants: + v.check(schema) + # Union names must match enum values; alternate names are + # checked separately. Use 'seen' to tell the two apart. + if seen: + if v.name not in self.tag_member.type.member_names(): + raise QAPISemError( + self.info, + "branch '%s' is not a value of %s" + % (v.name, self.tag_member.type.describe())) + if (not isinstance(v.type, QAPISchemaObjectType) + or v.type.variants): + raise QAPISemError( + self.info, + "%s cannot use %s" + % (v.describe(self.info), v.type.describe())) + v.type.check(schema) + + def check_clash(self, info, seen): + for v in self.variants: + # Reset seen map for each variant, since qapi names from one + # branch do not affect another branch + v.type.check_clash(info, dict(seen)) + + +class QAPISchemaMember: + """ Represents object members, enum members and features """ + role = 'member' + + def __init__(self, name, info, ifcond=None): + assert isinstance(name, str) + self.name = name + self.info = info + self.ifcond = ifcond or QAPISchemaIfCond() + self.defined_in = None + + def set_defined_in(self, name): + assert not self.defined_in + self.defined_in = name + + def check_clash(self, info, seen): + cname = c_name(self.name) + if cname in seen: + raise QAPISemError( + info, + "%s collides with %s" + % (self.describe(info), seen[cname].describe(info))) + seen[cname] = self + + def connect_doc(self, doc): + if doc: + doc.connect_member(self) + + def describe(self, info): + role = self.role + defined_in = self.defined_in + assert defined_in + + if defined_in.startswith('q_obj_'): + # See QAPISchema._make_implicit_object_type() - reverse the + # mapping there to create a nice human-readable description + defined_in = defined_in[6:] + if defined_in.endswith('-arg'): + # Implicit type created for a command's dict 'data' + assert role == 'member' + role = 'parameter' + elif defined_in.endswith('-base'): + # Implicit type created for a union's dict 'base' + role = 'base ' + role + else: + assert False + elif defined_in != info.defn_name: + return "%s '%s' of type '%s'" % (role, self.name, defined_in) + return "%s '%s'" % (role, self.name) + + +class QAPISchemaEnumMember(QAPISchemaMember): + role = 'value' + + def __init__(self, name, info, ifcond=None, features=None): + super().__init__(name, info, ifcond) + for f in features or []: + assert isinstance(f, QAPISchemaFeature) + f.set_defined_in(name) + self.features = features or [] + + def connect_doc(self, doc): + super().connect_doc(doc) + if doc: + for f in self.features: + doc.connect_feature(f) + + +class QAPISchemaFeature(QAPISchemaMember): + role = 'feature' + + def is_special(self): + return self.name in ('deprecated', 'unstable') + + +class QAPISchemaObjectTypeMember(QAPISchemaMember): + def __init__(self, name, info, typ, optional, ifcond=None, features=None): + super().__init__(name, info, ifcond) + assert isinstance(typ, str) + assert isinstance(optional, bool) + for f in features or []: + assert isinstance(f, QAPISchemaFeature) + f.set_defined_in(name) + self._type_name = typ + self.type = None + self.optional = optional + self.features = features or [] + + def check(self, schema): + assert self.defined_in + self.type = schema.resolve_type(self._type_name, self.info, + self.describe) + seen = {} + for f in self.features: + f.check_clash(self.info, seen) + + def connect_doc(self, doc): + super().connect_doc(doc) + if doc: + for f in self.features: + doc.connect_feature(f) + + +class QAPISchemaVariant(QAPISchemaObjectTypeMember): + role = 'branch' + + def __init__(self, name, info, typ, ifcond=None): + super().__init__(name, info, typ, False, ifcond) + + +class QAPISchemaCommand(QAPISchemaEntity): + meta = 'command' + + def __init__(self, name, info, doc, ifcond, features, + arg_type, ret_type, + gen, success_response, boxed, allow_oob, allow_preconfig, + coroutine): + super().__init__(name, info, doc, ifcond, features) + assert not arg_type or isinstance(arg_type, str) + assert not ret_type or isinstance(ret_type, str) + self._arg_type_name = arg_type + self.arg_type = None + self._ret_type_name = ret_type + self.ret_type = None + self.gen = gen + self.success_response = success_response + self.boxed = boxed + self.allow_oob = allow_oob + self.allow_preconfig = allow_preconfig + self.coroutine = coroutine + + def check(self, schema): + super().check(schema) + if self._arg_type_name: + self.arg_type = schema.resolve_type( + self._arg_type_name, self.info, "command's 'data'") + if not isinstance(self.arg_type, QAPISchemaObjectType): + raise QAPISemError( + self.info, + "command's 'data' cannot take %s" + % self.arg_type.describe()) + if self.arg_type.variants and not self.boxed: + raise QAPISemError( + self.info, + "command's 'data' can take %s only with 'boxed': true" + % self.arg_type.describe()) + if self._ret_type_name: + self.ret_type = schema.resolve_type( + self._ret_type_name, self.info, "command's 'returns'") + if self.name not in self.info.pragma.command_returns_exceptions: + typ = self.ret_type + if isinstance(typ, QAPISchemaArrayType): + typ = self.ret_type.element_type + assert typ + if not isinstance(typ, QAPISchemaObjectType): + raise QAPISemError( + self.info, + "command's 'returns' cannot take %s" + % self.ret_type.describe()) + + def connect_doc(self, doc=None): + super().connect_doc(doc) + doc = doc or self.doc + if doc: + if self.arg_type and self.arg_type.is_implicit(): + self.arg_type.connect_doc(doc) + + def visit(self, visitor): + super().visit(visitor) + visitor.visit_command( + self.name, self.info, self.ifcond, self.features, + self.arg_type, self.ret_type, self.gen, self.success_response, + self.boxed, self.allow_oob, self.allow_preconfig, + self.coroutine) + + +class QAPISchemaEvent(QAPISchemaEntity): + meta = 'event' + + def __init__(self, name, info, doc, ifcond, features, arg_type, boxed): + super().__init__(name, info, doc, ifcond, features) + assert not arg_type or isinstance(arg_type, str) + self._arg_type_name = arg_type + self.arg_type = None + self.boxed = boxed + + def check(self, schema): + super().check(schema) + if self._arg_type_name: + self.arg_type = schema.resolve_type( + self._arg_type_name, self.info, "event's 'data'") + if not isinstance(self.arg_type, QAPISchemaObjectType): + raise QAPISemError( + self.info, + "event's 'data' cannot take %s" + % self.arg_type.describe()) + if self.arg_type.variants and not self.boxed: + raise QAPISemError( + self.info, + "event's 'data' can take %s only with 'boxed': true" + % self.arg_type.describe()) + + def connect_doc(self, doc=None): + super().connect_doc(doc) + doc = doc or self.doc + if doc: + if self.arg_type and self.arg_type.is_implicit(): + self.arg_type.connect_doc(doc) + + def visit(self, visitor): + super().visit(visitor) + visitor.visit_event( + self.name, self.info, self.ifcond, self.features, + self.arg_type, self.boxed) + + +class QAPISchema: + def __init__(self, fname): + self.fname = fname + + try: + parser = QAPISchemaParser(fname) + except OSError as err: + raise QAPIError( + f"can't read schema file '{fname}': {err.strerror}" + ) from err + + exprs = check_exprs(parser.exprs) + self.docs = parser.docs + self._entity_list = [] + self._entity_dict = {} + self._module_dict = OrderedDict() + self._schema_dir = os.path.dirname(fname) + self._make_module(QAPISchemaModule.BUILTIN_MODULE_NAME) + self._make_module(fname) + self._predefining = True + self._def_predefineds() + self._predefining = False + self._def_exprs(exprs) + self.check() + + def _def_entity(self, ent): + # Only the predefined types are allowed to not have info + assert ent.info or self._predefining + self._entity_list.append(ent) + if ent.name is None: + return + # TODO reject names that differ only in '_' vs. '.' vs. '-', + # because they're liable to clash in generated C. + other_ent = self._entity_dict.get(ent.name) + if other_ent: + if other_ent.info: + where = QAPISourceError(other_ent.info, "previous definition") + raise QAPISemError( + ent.info, + "'%s' is already defined\n%s" % (ent.name, where)) + raise QAPISemError( + ent.info, "%s is already defined" % other_ent.describe()) + self._entity_dict[ent.name] = ent + + def lookup_entity(self, name, typ=None): + ent = self._entity_dict.get(name) + if typ and not isinstance(ent, typ): + return None + return ent + + def lookup_type(self, name): + return self.lookup_entity(name, QAPISchemaType) + + def resolve_type(self, name, info, what): + typ = self.lookup_type(name) + if not typ: + if callable(what): + what = what(info) + raise QAPISemError( + info, "%s uses unknown type '%s'" % (what, name)) + return typ + + def _module_name(self, fname: str) -> str: + if QAPISchemaModule.is_system_module(fname): + return fname + return os.path.relpath(fname, self._schema_dir) + + def _make_module(self, fname): + name = self._module_name(fname) + if name not in self._module_dict: + self._module_dict[name] = QAPISchemaModule(name) + return self._module_dict[name] + + def module_by_fname(self, fname): + name = self._module_name(fname) + return self._module_dict[name] + + def _def_include(self, expr, info, doc): + include = expr['include'] + assert doc is None + self._def_entity(QAPISchemaInclude(self._make_module(include), info)) + + def _def_builtin_type(self, name, json_type, c_type): + self._def_entity(QAPISchemaBuiltinType(name, json_type, c_type)) + # Instantiating only the arrays that are actually used would + # be nice, but we can't as long as their generated code + # (qapi-builtin-types.[ch]) may be shared by some other + # schema. + self._make_array_type(name, None) + + def _def_predefineds(self): + for t in [('str', 'string', 'char' + POINTER_SUFFIX), + ('number', 'number', 'double'), + ('int', 'int', 'int64_t'), + ('int8', 'int', 'int8_t'), + ('int16', 'int', 'int16_t'), + ('int32', 'int', 'int32_t'), + ('int64', 'int', 'int64_t'), + ('uint8', 'int', 'uint8_t'), + ('uint16', 'int', 'uint16_t'), + ('uint32', 'int', 'uint32_t'), + ('uint64', 'int', 'uint64_t'), + ('size', 'int', 'uint64_t'), + ('bool', 'boolean', 'bool'), + ('any', 'value', 'QObject' + POINTER_SUFFIX), + ('null', 'null', 'QNull' + POINTER_SUFFIX)]: + self._def_builtin_type(*t) + self.the_empty_object_type = QAPISchemaObjectType( + 'q_empty', None, None, None, None, None, [], None) + self._def_entity(self.the_empty_object_type) + + qtypes = ['none', 'qnull', 'qnum', 'qstring', 'qdict', 'qlist', + 'qbool'] + qtype_values = self._make_enum_members( + [{'name': n} for n in qtypes], None) + + self._def_entity(QAPISchemaEnumType('QType', None, None, None, None, + qtype_values, 'QTYPE')) + + def _make_features(self, features, info): + if features is None: + return [] + return [QAPISchemaFeature(f['name'], info, + QAPISchemaIfCond(f.get('if'))) + for f in features] + + def _make_enum_member(self, name, ifcond, features, info): + return QAPISchemaEnumMember(name, info, + QAPISchemaIfCond(ifcond), + self._make_features(features, info)) + + def _make_enum_members(self, values, info): + return [self._make_enum_member(v['name'], v.get('if'), + v.get('features'), info) + for v in values] + + def _make_array_type(self, element_type, info): + name = element_type + 'List' # reserved by check_defn_name_str() + if not self.lookup_type(name): + self._def_entity(QAPISchemaArrayType(name, info, element_type)) + return name + + def _make_implicit_object_type(self, name, info, ifcond, role, members): + if not members: + return None + # See also QAPISchemaObjectTypeMember.describe() + name = 'q_obj_%s-%s' % (name, role) + typ = self.lookup_entity(name, QAPISchemaObjectType) + if typ: + # The implicit object type has multiple users. This can + # only be a duplicate definition, which will be flagged + # later. + pass + else: + self._def_entity(QAPISchemaObjectType( + name, info, None, ifcond, None, None, members, None)) + return name + + def _def_enum_type(self, expr, info, doc): + name = expr['enum'] + data = expr['data'] + prefix = expr.get('prefix') + ifcond = QAPISchemaIfCond(expr.get('if')) + features = self._make_features(expr.get('features'), info) + self._def_entity(QAPISchemaEnumType( + name, info, doc, ifcond, features, + self._make_enum_members(data, info), prefix)) + + def _make_member(self, name, typ, ifcond, features, info): + optional = False + if name.startswith('*'): + name = name[1:] + optional = True + if isinstance(typ, list): + assert len(typ) == 1 + typ = self._make_array_type(typ[0], info) + return QAPISchemaObjectTypeMember(name, info, typ, optional, ifcond, + self._make_features(features, info)) + + def _make_members(self, data, info): + return [self._make_member(key, value['type'], + QAPISchemaIfCond(value.get('if')), + value.get('features'), info) + for (key, value) in data.items()] + + def _def_struct_type(self, expr, info, doc): + name = expr['struct'] + base = expr.get('base') + data = expr['data'] + ifcond = QAPISchemaIfCond(expr.get('if')) + features = self._make_features(expr.get('features'), info) + self._def_entity(QAPISchemaObjectType( + name, info, doc, ifcond, features, base, + self._make_members(data, info), + None)) + + def _make_variant(self, case, typ, ifcond, info): + return QAPISchemaVariant(case, info, typ, ifcond) + + def _def_union_type(self, expr, info, doc): + name = expr['union'] + base = expr['base'] + tag_name = expr['discriminator'] + data = expr['data'] + ifcond = QAPISchemaIfCond(expr.get('if')) + features = self._make_features(expr.get('features'), info) + if isinstance(base, dict): + base = self._make_implicit_object_type( + name, info, ifcond, + 'base', self._make_members(base, info)) + variants = [ + self._make_variant(key, value['type'], + QAPISchemaIfCond(value.get('if')), + info) + for (key, value) in data.items()] + members = [] + self._def_entity( + QAPISchemaObjectType(name, info, doc, ifcond, features, + base, members, + QAPISchemaVariants( + tag_name, info, None, variants))) + + def _def_alternate_type(self, expr, info, doc): + name = expr['alternate'] + data = expr['data'] + ifcond = QAPISchemaIfCond(expr.get('if')) + features = self._make_features(expr.get('features'), info) + variants = [ + self._make_variant(key, value['type'], + QAPISchemaIfCond(value.get('if')), + info) + for (key, value) in data.items()] + tag_member = QAPISchemaObjectTypeMember('type', info, 'QType', False) + self._def_entity( + QAPISchemaAlternateType(name, info, doc, ifcond, features, + QAPISchemaVariants( + None, info, tag_member, variants))) + + def _def_command(self, expr, info, doc): + name = expr['command'] + data = expr.get('data') + rets = expr.get('returns') + gen = expr.get('gen', True) + success_response = expr.get('success-response', True) + boxed = expr.get('boxed', False) + allow_oob = expr.get('allow-oob', False) + allow_preconfig = expr.get('allow-preconfig', False) + coroutine = expr.get('coroutine', False) + ifcond = QAPISchemaIfCond(expr.get('if')) + features = self._make_features(expr.get('features'), info) + if isinstance(data, OrderedDict): + data = self._make_implicit_object_type( + name, info, ifcond, + 'arg', self._make_members(data, info)) + if isinstance(rets, list): + assert len(rets) == 1 + rets = self._make_array_type(rets[0], info) + self._def_entity(QAPISchemaCommand(name, info, doc, ifcond, features, + data, rets, + gen, success_response, + boxed, allow_oob, allow_preconfig, + coroutine)) + + def _def_event(self, expr, info, doc): + name = expr['event'] + data = expr.get('data') + boxed = expr.get('boxed', False) + ifcond = QAPISchemaIfCond(expr.get('if')) + features = self._make_features(expr.get('features'), info) + if isinstance(data, OrderedDict): + data = self._make_implicit_object_type( + name, info, ifcond, + 'arg', self._make_members(data, info)) + self._def_entity(QAPISchemaEvent(name, info, doc, ifcond, features, + data, boxed)) + + def _def_exprs(self, exprs): + for expr_elem in exprs: + expr = expr_elem['expr'] + info = expr_elem['info'] + doc = expr_elem.get('doc') + if 'enum' in expr: + self._def_enum_type(expr, info, doc) + elif 'struct' in expr: + self._def_struct_type(expr, info, doc) + elif 'union' in expr: + self._def_union_type(expr, info, doc) + elif 'alternate' in expr: + self._def_alternate_type(expr, info, doc) + elif 'command' in expr: + self._def_command(expr, info, doc) + elif 'event' in expr: + self._def_event(expr, info, doc) + elif 'include' in expr: + self._def_include(expr, info, doc) + else: + assert False + + def check(self): + for ent in self._entity_list: + ent.check(self) + ent.connect_doc() + ent.check_doc() + for ent in self._entity_list: + ent.set_module(self) + + def visit(self, visitor): + visitor.visit_begin(self) + for mod in self._module_dict.values(): + mod.visit(visitor) + visitor.visit_end() diff --git a/scripts/qapi/source.py b/scripts/qapi/source.py new file mode 100644 index 000000000..04193cc96 --- /dev/null +++ b/scripts/qapi/source.py @@ -0,0 +1,71 @@ +# +# QAPI frontend source file info +# +# Copyright (c) 2019 Red Hat Inc. +# +# Authors: +# Markus Armbruster <armbru@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. + +import copy +from typing import List, Optional, TypeVar + + +class QAPISchemaPragma: + # Replace with @dataclass in Python 3.7+ + # pylint: disable=too-few-public-methods + + def __init__(self) -> None: + # Are documentation comments required? + self.doc_required = False + # Commands whose names may use '_' + self.command_name_exceptions: List[str] = [] + # Commands allowed to return a non-dictionary + self.command_returns_exceptions: List[str] = [] + # Types whose member names may violate case conventions + self.member_name_exceptions: List[str] = [] + + +class QAPISourceInfo: + T = TypeVar('T', bound='QAPISourceInfo') + + def __init__(self, fname: str, parent: Optional['QAPISourceInfo']): + self.fname = fname + self.line = 1 + self.parent = parent + self.pragma: QAPISchemaPragma = ( + parent.pragma if parent else QAPISchemaPragma() + ) + self.defn_meta: Optional[str] = None + self.defn_name: Optional[str] = None + + def set_defn(self, meta: str, name: str) -> None: + self.defn_meta = meta + self.defn_name = name + + def next_line(self: T) -> T: + info = copy.copy(self) + info.line += 1 + return info + + def loc(self) -> str: + return f"{self.fname}:{self.line}" + + def in_defn(self) -> str: + if self.defn_name: + return "%s: In %s '%s':\n" % (self.fname, + self.defn_meta, self.defn_name) + return '' + + def include_path(self) -> str: + ret = '' + parent = self.parent + while parent: + ret = 'In file included from %s:\n' % parent.loc() + ret + parent = parent.parent + return ret + + def __str__(self) -> str: + return self.include_path() + self.in_defn() + self.loc() diff --git a/scripts/qapi/types.py b/scripts/qapi/types.py new file mode 100644 index 000000000..3013329c2 --- /dev/null +++ b/scripts/qapi/types.py @@ -0,0 +1,383 @@ +""" +QAPI types generator + +Copyright IBM, Corp. 2011 +Copyright (c) 2013-2018 Red Hat Inc. + +Authors: + Anthony Liguori <aliguori@us.ibm.com> + Michael Roth <mdroth@linux.vnet.ibm.com> + Markus Armbruster <armbru@redhat.com> + +This work is licensed under the terms of the GNU GPL, version 2. +# See the COPYING file in the top-level directory. +""" + +from typing import List, Optional + +from .common import c_enum_const, c_name, mcgen +from .gen import QAPISchemaModularCVisitor, gen_special_features, ifcontext +from .schema import ( + QAPISchema, + QAPISchemaEnumMember, + QAPISchemaFeature, + QAPISchemaIfCond, + QAPISchemaObjectType, + QAPISchemaObjectTypeMember, + QAPISchemaType, + QAPISchemaVariants, +) +from .source import QAPISourceInfo + + +# variants must be emitted before their container; track what has already +# been output +objects_seen = set() + + +def gen_enum_lookup(name: str, + members: List[QAPISchemaEnumMember], + prefix: Optional[str] = None) -> str: + max_index = c_enum_const(name, '_MAX', prefix) + feats = '' + ret = mcgen(''' + +const QEnumLookup %(c_name)s_lookup = { + .array = (const char *const[]) { +''', + c_name=c_name(name)) + for memb in members: + ret += memb.ifcond.gen_if() + index = c_enum_const(name, memb.name, prefix) + ret += mcgen(''' + [%(index)s] = "%(name)s", +''', + index=index, name=memb.name) + ret += memb.ifcond.gen_endif() + + special_features = gen_special_features(memb.features) + if special_features != '0': + feats += mcgen(''' + [%(index)s] = %(special_features)s, +''', + index=index, special_features=special_features) + + if feats: + ret += mcgen(''' + }, + .special_features = (const unsigned char[%(max_index)s]) { +''', + max_index=max_index) + ret += feats + + ret += mcgen(''' + }, + .size = %(max_index)s +}; +''', + max_index=max_index) + return ret + + +def gen_enum(name: str, + members: List[QAPISchemaEnumMember], + prefix: Optional[str] = None) -> str: + # append automatically generated _MAX value + enum_members = members + [QAPISchemaEnumMember('_MAX', None)] + + ret = mcgen(''' + +typedef enum %(c_name)s { +''', + c_name=c_name(name)) + + for memb in enum_members: + ret += memb.ifcond.gen_if() + ret += mcgen(''' + %(c_enum)s, +''', + c_enum=c_enum_const(name, memb.name, prefix)) + ret += memb.ifcond.gen_endif() + + ret += mcgen(''' +} %(c_name)s; +''', + c_name=c_name(name)) + + ret += mcgen(''' + +#define %(c_name)s_str(val) \\ + qapi_enum_lookup(&%(c_name)s_lookup, (val)) + +extern const QEnumLookup %(c_name)s_lookup; +''', + c_name=c_name(name)) + return ret + + +def gen_fwd_object_or_array(name: str) -> str: + return mcgen(''' + +typedef struct %(c_name)s %(c_name)s; +''', + c_name=c_name(name)) + + +def gen_array(name: str, element_type: QAPISchemaType) -> str: + return mcgen(''' + +struct %(c_name)s { + %(c_name)s *next; + %(c_type)s value; +}; +''', + c_name=c_name(name), c_type=element_type.c_type()) + + +def gen_struct_members(members: List[QAPISchemaObjectTypeMember]) -> str: + ret = '' + for memb in members: + ret += memb.ifcond.gen_if() + if memb.optional: + ret += mcgen(''' + bool has_%(c_name)s; +''', + c_name=c_name(memb.name)) + ret += mcgen(''' + %(c_type)s %(c_name)s; +''', + c_type=memb.type.c_type(), c_name=c_name(memb.name)) + ret += memb.ifcond.gen_endif() + return ret + + +def gen_object(name: str, ifcond: QAPISchemaIfCond, + base: Optional[QAPISchemaObjectType], + members: List[QAPISchemaObjectTypeMember], + variants: Optional[QAPISchemaVariants]) -> str: + if name in objects_seen: + return '' + objects_seen.add(name) + + ret = '' + for var in variants.variants if variants else (): + obj = var.type + if not isinstance(obj, QAPISchemaObjectType): + continue + ret += gen_object(obj.name, obj.ifcond, obj.base, + obj.local_members, obj.variants) + + ret += mcgen(''' + +''') + ret += ifcond.gen_if() + ret += mcgen(''' +struct %(c_name)s { +''', + c_name=c_name(name)) + + if base: + if not base.is_implicit(): + ret += mcgen(''' + /* Members inherited from %(c_name)s: */ +''', + c_name=base.c_name()) + ret += gen_struct_members(base.members) + if not base.is_implicit(): + ret += mcgen(''' + /* Own members: */ +''') + ret += gen_struct_members(members) + + if variants: + ret += gen_variants(variants) + + # Make sure that all structs have at least one member; this avoids + # potential issues with attempting to malloc space for zero-length + # structs in C, and also incompatibility with C++ (where an empty + # struct is size 1). + if (not base or base.is_empty()) and not members and not variants: + ret += mcgen(''' + char qapi_dummy_for_empty_struct; +''') + + ret += mcgen(''' +}; +''') + ret += ifcond.gen_endif() + + return ret + + +def gen_upcast(name: str, base: QAPISchemaObjectType) -> str: + # C makes const-correctness ugly. We have to cast away const to let + # this function work for both const and non-const obj. + return mcgen(''' + +static inline %(base)s *qapi_%(c_name)s_base(const %(c_name)s *obj) +{ + return (%(base)s *)obj; +} +''', + c_name=c_name(name), base=base.c_name()) + + +def gen_variants(variants: QAPISchemaVariants) -> str: + ret = mcgen(''' + union { /* union tag is @%(c_name)s */ +''', + c_name=c_name(variants.tag_member.name)) + + for var in variants.variants: + if var.type.name == 'q_empty': + continue + ret += var.ifcond.gen_if() + ret += mcgen(''' + %(c_type)s %(c_name)s; +''', + c_type=var.type.c_unboxed_type(), + c_name=c_name(var.name)) + ret += var.ifcond.gen_endif() + + ret += mcgen(''' + } u; +''') + + return ret + + +def gen_type_cleanup_decl(name: str) -> str: + ret = mcgen(''' + +void qapi_free_%(c_name)s(%(c_name)s *obj); +G_DEFINE_AUTOPTR_CLEANUP_FUNC(%(c_name)s, qapi_free_%(c_name)s) +''', + c_name=c_name(name)) + return ret + + +def gen_type_cleanup(name: str) -> str: + ret = mcgen(''' + +void qapi_free_%(c_name)s(%(c_name)s *obj) +{ + Visitor *v; + + if (!obj) { + return; + } + + v = qapi_dealloc_visitor_new(); + visit_type_%(c_name)s(v, NULL, &obj, NULL); + visit_free(v); +} +''', + c_name=c_name(name)) + return ret + + +class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor): + + def __init__(self, prefix: str): + super().__init__( + prefix, 'qapi-types', ' * Schema-defined QAPI types', + ' * Built-in QAPI types', __doc__) + + def _begin_builtin_module(self) -> None: + self._genc.preamble_add(mcgen(''' +#include "qemu/osdep.h" +#include "qapi/dealloc-visitor.h" +#include "qapi/qapi-builtin-types.h" +#include "qapi/qapi-builtin-visit.h" +''')) + self._genh.preamble_add(mcgen(''' +#include "qapi/util.h" +''')) + + def _begin_user_module(self, name: str) -> None: + types = self._module_basename('qapi-types', name) + visit = self._module_basename('qapi-visit', name) + self._genc.preamble_add(mcgen(''' +#include "qemu/osdep.h" +#include "qapi/dealloc-visitor.h" +#include "%(types)s.h" +#include "%(visit)s.h" +''', + types=types, visit=visit)) + self._genh.preamble_add(mcgen(''' +#include "qapi/qapi-builtin-types.h" +''')) + + def visit_begin(self, schema: QAPISchema) -> None: + # gen_object() is recursive, ensure it doesn't visit the empty type + objects_seen.add(schema.the_empty_object_type.name) + + def _gen_type_cleanup(self, name: str) -> None: + self._genh.add(gen_type_cleanup_decl(name)) + self._genc.add(gen_type_cleanup(name)) + + def visit_enum_type(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + members: List[QAPISchemaEnumMember], + prefix: Optional[str]) -> None: + with ifcontext(ifcond, self._genh, self._genc): + self._genh.preamble_add(gen_enum(name, members, prefix)) + self._genc.add(gen_enum_lookup(name, members, prefix)) + + def visit_array_type(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + element_type: QAPISchemaType) -> None: + with ifcontext(ifcond, self._genh, self._genc): + self._genh.preamble_add(gen_fwd_object_or_array(name)) + self._genh.add(gen_array(name, element_type)) + self._gen_type_cleanup(name) + + def visit_object_type(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + base: Optional[QAPISchemaObjectType], + members: List[QAPISchemaObjectTypeMember], + variants: Optional[QAPISchemaVariants]) -> None: + # Nothing to do for the special empty builtin + if name == 'q_empty': + return + with ifcontext(ifcond, self._genh): + self._genh.preamble_add(gen_fwd_object_or_array(name)) + self._genh.add(gen_object(name, ifcond, base, members, variants)) + with ifcontext(ifcond, self._genh, self._genc): + if base and not base.is_implicit(): + self._genh.add(gen_upcast(name, base)) + # TODO Worth changing the visitor signature, so we could + # directly use rather than repeat type.is_implicit()? + if not name.startswith('q_'): + # implicit types won't be directly allocated/freed + self._gen_type_cleanup(name) + + def visit_alternate_type(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + variants: QAPISchemaVariants) -> None: + with ifcontext(ifcond, self._genh): + self._genh.preamble_add(gen_fwd_object_or_array(name)) + self._genh.add(gen_object(name, ifcond, None, + [variants.tag_member], variants)) + with ifcontext(ifcond, self._genh, self._genc): + self._gen_type_cleanup(name) + + +def gen_types(schema: QAPISchema, + output_dir: str, + prefix: str, + opt_builtins: bool) -> None: + vis = QAPISchemaGenTypeVisitor(prefix) + schema.visit(vis) + vis.write(output_dir, opt_builtins) diff --git a/scripts/qapi/visit.py b/scripts/qapi/visit.py new file mode 100644 index 000000000..e13bbe429 --- /dev/null +++ b/scripts/qapi/visit.py @@ -0,0 +1,410 @@ +""" +QAPI visitor generator + +Copyright IBM, Corp. 2011 +Copyright (C) 2014-2018 Red Hat, Inc. + +Authors: + Anthony Liguori <aliguori@us.ibm.com> + Michael Roth <mdroth@linux.vnet.ibm.com> + Markus Armbruster <armbru@redhat.com> + +This work is licensed under the terms of the GNU GPL, version 2. +See the COPYING file in the top-level directory. +""" + +from typing import List, Optional + +from .common import ( + c_enum_const, + c_name, + indent, + mcgen, +) +from .gen import QAPISchemaModularCVisitor, gen_special_features, ifcontext +from .schema import ( + QAPISchema, + QAPISchemaEnumMember, + QAPISchemaEnumType, + QAPISchemaFeature, + QAPISchemaIfCond, + QAPISchemaObjectType, + QAPISchemaObjectTypeMember, + QAPISchemaType, + QAPISchemaVariants, +) +from .source import QAPISourceInfo + + +def gen_visit_decl(name: str, scalar: bool = False) -> str: + c_type = c_name(name) + ' *' + if not scalar: + c_type += '*' + return mcgen(''' + +bool visit_type_%(c_name)s(Visitor *v, const char *name, + %(c_type)sobj, Error **errp); +''', + c_name=c_name(name), c_type=c_type) + + +def gen_visit_members_decl(name: str) -> str: + return mcgen(''' + +bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp); +''', + c_name=c_name(name)) + + +def gen_visit_object_members(name: str, + base: Optional[QAPISchemaObjectType], + members: List[QAPISchemaObjectTypeMember], + variants: Optional[QAPISchemaVariants]) -> str: + ret = mcgen(''' + +bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp) +{ +''', + c_name=c_name(name)) + + if base: + ret += mcgen(''' + if (!visit_type_%(c_type)s_members(v, (%(c_type)s *)obj, errp)) { + return false; + } +''', + c_type=base.c_name()) + + for memb in members: + ret += memb.ifcond.gen_if() + if memb.optional: + ret += mcgen(''' + if (visit_optional(v, "%(name)s", &obj->has_%(c_name)s)) { +''', + name=memb.name, c_name=c_name(memb.name)) + indent.increase() + special_features = gen_special_features(memb.features) + if special_features != '0': + ret += mcgen(''' + if (visit_policy_reject(v, "%(name)s", %(special_features)s, errp)) { + return false; + } + if (!visit_policy_skip(v, "%(name)s", %(special_features)s)) { +''', + name=memb.name, special_features=special_features) + indent.increase() + ret += mcgen(''' + if (!visit_type_%(c_type)s(v, "%(name)s", &obj->%(c_name)s, errp)) { + return false; + } +''', + c_type=memb.type.c_name(), name=memb.name, + c_name=c_name(memb.name)) + if special_features != '0': + indent.decrease() + ret += mcgen(''' + } +''') + if memb.optional: + indent.decrease() + ret += mcgen(''' + } +''') + ret += memb.ifcond.gen_endif() + + if variants: + tag_member = variants.tag_member + assert isinstance(tag_member.type, QAPISchemaEnumType) + + ret += mcgen(''' + switch (obj->%(c_name)s) { +''', + c_name=c_name(tag_member.name)) + + for var in variants.variants: + case_str = c_enum_const(tag_member.type.name, var.name, + tag_member.type.prefix) + ret += var.ifcond.gen_if() + if var.type.name == 'q_empty': + # valid variant and nothing to do + ret += mcgen(''' + case %(case)s: + break; +''', + case=case_str) + else: + ret += mcgen(''' + case %(case)s: + return visit_type_%(c_type)s_members(v, &obj->u.%(c_name)s, errp); +''', + case=case_str, + c_type=var.type.c_name(), c_name=c_name(var.name)) + + ret += var.ifcond.gen_endif() + ret += mcgen(''' + default: + abort(); + } +''') + + ret += mcgen(''' + return true; +} +''') + return ret + + +def gen_visit_list(name: str, element_type: QAPISchemaType) -> str: + return mcgen(''' + +bool visit_type_%(c_name)s(Visitor *v, const char *name, + %(c_name)s **obj, Error **errp) +{ + bool ok = false; + %(c_name)s *tail; + size_t size = sizeof(**obj); + + if (!visit_start_list(v, name, (GenericList **)obj, size, errp)) { + return false; + } + + for (tail = *obj; tail; + tail = (%(c_name)s *)visit_next_list(v, (GenericList *)tail, size)) { + if (!visit_type_%(c_elt_type)s(v, NULL, &tail->value, errp)) { + goto out_obj; + } + } + + ok = visit_check_list(v, errp); +out_obj: + visit_end_list(v, (void **)obj); + if (!ok && visit_is_input(v)) { + qapi_free_%(c_name)s(*obj); + *obj = NULL; + } + return ok; +} +''', + c_name=c_name(name), c_elt_type=element_type.c_name()) + + +def gen_visit_enum(name: str) -> str: + return mcgen(''' + +bool visit_type_%(c_name)s(Visitor *v, const char *name, + %(c_name)s *obj, Error **errp) +{ + int value = *obj; + bool ok = visit_type_enum(v, name, &value, &%(c_name)s_lookup, errp); + *obj = value; + return ok; +} +''', + c_name=c_name(name)) + + +def gen_visit_alternate(name: str, variants: QAPISchemaVariants) -> str: + ret = mcgen(''' + +bool visit_type_%(c_name)s(Visitor *v, const char *name, + %(c_name)s **obj, Error **errp) +{ + bool ok = false; + + if (!visit_start_alternate(v, name, (GenericAlternate **)obj, + sizeof(**obj), errp)) { + return false; + } + if (!*obj) { + /* incomplete */ + assert(visit_is_dealloc(v)); + ok = true; + goto out_obj; + } + switch ((*obj)->type) { +''', + c_name=c_name(name)) + + for var in variants.variants: + ret += var.ifcond.gen_if() + ret += mcgen(''' + case %(case)s: +''', + case=var.type.alternate_qtype()) + if isinstance(var.type, QAPISchemaObjectType): + ret += mcgen(''' + if (!visit_start_struct(v, name, NULL, 0, errp)) { + break; + } + if (visit_type_%(c_type)s_members(v, &(*obj)->u.%(c_name)s, errp)) { + ok = visit_check_struct(v, errp); + } + visit_end_struct(v, NULL); +''', + c_type=var.type.c_name(), + c_name=c_name(var.name)) + else: + ret += mcgen(''' + ok = visit_type_%(c_type)s(v, name, &(*obj)->u.%(c_name)s, errp); +''', + c_type=var.type.c_name(), + c_name=c_name(var.name)) + ret += mcgen(''' + break; +''') + ret += var.ifcond.gen_endif() + + ret += mcgen(''' + case QTYPE_NONE: + abort(); + default: + assert(visit_is_input(v)); + error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null", + "%(name)s"); + /* Avoid passing invalid *obj to qapi_free_%(c_name)s() */ + g_free(*obj); + *obj = NULL; + } +out_obj: + visit_end_alternate(v, (void **)obj); + if (!ok && visit_is_input(v)) { + qapi_free_%(c_name)s(*obj); + *obj = NULL; + } + return ok; +} +''', + name=name, c_name=c_name(name)) + + return ret + + +def gen_visit_object(name: str) -> str: + return mcgen(''' + +bool visit_type_%(c_name)s(Visitor *v, const char *name, + %(c_name)s **obj, Error **errp) +{ + bool ok = false; + + if (!visit_start_struct(v, name, (void **)obj, sizeof(%(c_name)s), errp)) { + return false; + } + if (!*obj) { + /* incomplete */ + assert(visit_is_dealloc(v)); + ok = true; + goto out_obj; + } + if (!visit_type_%(c_name)s_members(v, *obj, errp)) { + goto out_obj; + } + ok = visit_check_struct(v, errp); +out_obj: + visit_end_struct(v, (void **)obj); + if (!ok && visit_is_input(v)) { + qapi_free_%(c_name)s(*obj); + *obj = NULL; + } + return ok; +} +''', + c_name=c_name(name)) + + +class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor): + + def __init__(self, prefix: str): + super().__init__( + prefix, 'qapi-visit', ' * Schema-defined QAPI visitors', + ' * Built-in QAPI visitors', __doc__) + + def _begin_builtin_module(self) -> None: + self._genc.preamble_add(mcgen(''' +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-builtin-visit.h" +''')) + self._genh.preamble_add(mcgen(''' +#include "qapi/visitor.h" +#include "qapi/qapi-builtin-types.h" + +''')) + + def _begin_user_module(self, name: str) -> None: + types = self._module_basename('qapi-types', name) + visit = self._module_basename('qapi-visit', name) + self._genc.preamble_add(mcgen(''' +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "%(visit)s.h" +''', + visit=visit)) + self._genh.preamble_add(mcgen(''' +#include "qapi/qapi-builtin-visit.h" +#include "%(types)s.h" + +''', + types=types)) + + def visit_enum_type(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + members: List[QAPISchemaEnumMember], + prefix: Optional[str]) -> None: + with ifcontext(ifcond, self._genh, self._genc): + self._genh.add(gen_visit_decl(name, scalar=True)) + self._genc.add(gen_visit_enum(name)) + + def visit_array_type(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + element_type: QAPISchemaType) -> None: + with ifcontext(ifcond, self._genh, self._genc): + self._genh.add(gen_visit_decl(name)) + self._genc.add(gen_visit_list(name, element_type)) + + def visit_object_type(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + base: Optional[QAPISchemaObjectType], + members: List[QAPISchemaObjectTypeMember], + variants: Optional[QAPISchemaVariants]) -> None: + # Nothing to do for the special empty builtin + if name == 'q_empty': + return + with ifcontext(ifcond, self._genh, self._genc): + self._genh.add(gen_visit_members_decl(name)) + self._genc.add(gen_visit_object_members(name, base, + members, variants)) + # TODO Worth changing the visitor signature, so we could + # directly use rather than repeat type.is_implicit()? + if not name.startswith('q_'): + # only explicit types need an allocating visit + self._genh.add(gen_visit_decl(name)) + self._genc.add(gen_visit_object(name)) + + def visit_alternate_type(self, + name: str, + info: Optional[QAPISourceInfo], + ifcond: QAPISchemaIfCond, + features: List[QAPISchemaFeature], + variants: QAPISchemaVariants) -> None: + with ifcontext(ifcond, self._genh, self._genc): + self._genh.add(gen_visit_decl(name)) + self._genc.add(gen_visit_alternate(name, variants)) + + +def gen_visit(schema: QAPISchema, + output_dir: str, + prefix: str, + opt_builtins: bool) -> None: + vis = QAPISchemaGenVisitVisitor(prefix) + schema.visit(vis) + vis.write(output_dir, opt_builtins) diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh new file mode 100755 index 000000000..7de996d53 --- /dev/null +++ b/scripts/qemu-binfmt-conf.sh @@ -0,0 +1,411 @@ +#!/bin/sh +# Enable automatic program execution by the kernel. + +qemu_target_list="i386 i486 alpha arm armeb sparc sparc32plus sparc64 \ +ppc ppc64 ppc64le m68k mips mipsel mipsn32 mipsn32el mips64 mips64el \ +sh4 sh4eb s390x aarch64 aarch64_be hppa riscv32 riscv64 xtensa xtensaeb \ +microblaze microblazeel or1k x86_64 hexagon" + +i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00' +i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +i386_family=i386 + +i486_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00' +i486_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +i486_family=i386 + +x86_64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x3e\x00' +x86_64_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +x86_64_family=i386 + +alpha_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90' +alpha_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +alpha_family=alpha + +arm_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00' +arm_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +arm_family=arm + +armeb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28' +armeb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +armeb_family=armeb + +sparc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02' +sparc_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sparc_family=sparc + +sparc32plus_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x12' +sparc32plus_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sparc32plus_family=sparc + +sparc64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2b' +sparc64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sparc64_family=sparc + +ppc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14' +ppc_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +ppc_family=ppc + +ppc64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15' +ppc64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +ppc64_family=ppc + +ppc64le_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00' +ppc64le_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00' +ppc64le_family=ppcle + +m68k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04' +m68k_mask='\xff\xff\xff\xff\xff\xff\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +m68k_family=m68k + +# FIXME: We could use the other endianness on a MIPS host. + +mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_family=mips + +mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_family=mips + +mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mipsn32_family=mips + +mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsn32el_family=mips + +mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips64_family=mips + +mips64el_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mips64el_family=mips + +sh4_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00' +sh4_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +sh4_family=sh4 + +sh4eb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a' +sh4eb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sh4eb_family=sh4 + +s390x_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16' +s390x_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +s390x_family=s390x + +aarch64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00' +aarch64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +aarch64_family=arm + +aarch64_be_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7' +aarch64_be_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +aarch64_be_family=armeb + +hppa_magic='\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x0f' +hppa_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +hppa_family=hppa + +riscv32_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xf3\x00' +riscv32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +riscv32_family=riscv + +riscv64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xf3\x00' +riscv64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +riscv64_family=riscv + +xtensa_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x5e\x00' +xtensa_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +xtensa_family=xtensa + +xtensaeb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x5e' +xtensaeb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +xtensaeb_family=xtensaeb + +microblaze_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xba\xab' +microblaze_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +microblaze_family=microblaze + +microblazeel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xab\xba' +microblazeel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +microblazeel_family=microblazeel + +or1k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x5c' +or1k_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +or1k_family=or1k + +hexagon_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xa4\x00' +hexagon_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +hexagon_family=hexagon + +qemu_get_family() { + cpu=${HOST_ARCH:-$(uname -m)} + case "$cpu" in + amd64|i386|i486|i586|i686|i86pc|BePC|x86_64) + echo "i386" + ;; + mips*) + echo "mips" + ;; + "Power Macintosh"|ppc64|powerpc|ppc) + echo "ppc" + ;; + ppc64el|ppc64le) + echo "ppcle" + ;; + arm|armel|armhf|arm64|armv[4-9]*l|aarch64) + echo "arm" + ;; + armeb|armv[4-9]*b|aarch64_be) + echo "armeb" + ;; + sparc*) + echo "sparc" + ;; + riscv*) + echo "riscv" + ;; + *) + echo "$cpu" + ;; + esac +} + +usage() { + cat <<EOF +Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU] + [--help][--credential yes|no][--exportdir PATH] + [--persistent yes|no][--qemu-suffix SUFFIX] + [--preserve-argv0 yes|no] + + Configure binfmt_misc to use qemu interpreter + + --help: display this usage + --qemu-path: set path to qemu interpreter ($QEMU_PATH) + --qemu-suffix: add a suffix to the default interpreter name + --debian: don't write into /proc, + instead generate update-binfmts templates + --systemd: don't write into /proc, + instead generate file for systemd-binfmt.service + for the given CPU. If CPU is "ALL", generate a + file for all known cpus + --exportdir: define where to write configuration files + (default: $SYSTEMDDIR or $DEBIANDIR) + --credential: if yes, credential and security tokens are + calculated according to the binary to interpret + --persistent: if yes, the interpreter is loaded when binfmt is + configured and remains in memory. All future uses + are cloned from the open file. + --preserve-argv0 preserve argv[0] + + To import templates with update-binfmts, use : + + sudo update-binfmts --importdir ${EXPORTDIR:-$DEBIANDIR} --import qemu-CPU + + To remove interpreter, use : + + sudo update-binfmts --package qemu-CPU --remove qemu-CPU $QEMU_PATH + + With systemd, binfmt files are loaded by systemd-binfmt.service + + The environment variable HOST_ARCH allows to override 'uname' to generate + configuration files for a different architecture than the current one. + + where CPU is one of: + + $qemu_target_list + +EOF +} + +qemu_check_access() { + if [ ! -w "$1" ] ; then + echo "ERROR: cannot write to $1" 1>&2 + exit 1 + fi +} + +qemu_check_bintfmt_misc() { + # load the binfmt_misc module + if [ ! -d /proc/sys/fs/binfmt_misc ]; then + if ! /sbin/modprobe binfmt_misc ; then + exit 1 + fi + fi + if [ ! -f /proc/sys/fs/binfmt_misc/register ]; then + if ! mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc ; then + exit 1 + fi + fi + + qemu_check_access /proc/sys/fs/binfmt_misc/register +} + +installed_dpkg() { + dpkg --status "$1" > /dev/null 2>&1 +} + +qemu_check_debian() { + if [ ! -e /etc/debian_version ] ; then + echo "WARNING: your system is not a Debian based distro" 1>&2 + elif ! installed_dpkg binfmt-support ; then + echo "WARNING: package binfmt-support is needed" 1>&2 + fi + qemu_check_access "$EXPORTDIR" +} + +qemu_check_systemd() { + if ! systemctl -q is-enabled systemd-binfmt.service ; then + echo "WARNING: systemd-binfmt.service is missing or disabled" 1>&2 + fi + qemu_check_access "$EXPORTDIR" +} + +qemu_generate_register() { + flags="" + if [ "$CREDENTIAL" = "yes" ] ; then + flags="OC" + fi + if [ "$PERSISTENT" = "yes" ] ; then + flags="${flags}F" + fi + if [ "$PRESERVE_ARG0" = "yes" ] ; then + flags="${flags}P" + fi + + echo ":qemu-$cpu:M::$magic:$mask:$qemu:$flags" +} + +qemu_register_interpreter() { + echo "Setting $qemu as binfmt interpreter for $cpu" + qemu_generate_register > /proc/sys/fs/binfmt_misc/register +} + +qemu_generate_systemd() { + echo "Setting $qemu as binfmt interpreter for $cpu for systemd-binfmt.service" + qemu_generate_register > "$EXPORTDIR/qemu-$cpu.conf" +} + +qemu_generate_debian() { + cat > "$EXPORTDIR/qemu-$cpu" <<EOF +package qemu-$cpu +interpreter $qemu +magic $magic +mask $mask +credentials $CREDENTIAL +preserve $PRESERVE_ARG0 +fix_binary $PERSISTENT +EOF +} + +qemu_set_binfmts() { + # probe cpu type + host_family=$(qemu_get_family) + + # register the interpreter for each cpu except for the native one + + for cpu in ${qemu_target_list} ; do + magic=$(eval echo \$${cpu}_magic) + mask=$(eval echo \$${cpu}_mask) + family=$(eval echo \$${cpu}_family) + + if [ "$magic" = "" ] || [ "$mask" = "" ] || [ "$family" = "" ] ; then + echo "INTERNAL ERROR: unknown cpu $cpu" 1>&2 + continue + fi + + qemu="$QEMU_PATH/qemu-$cpu" + if [ "$cpu" = "i486" ] ; then + qemu="$QEMU_PATH/qemu-i386" + fi + + qemu="$qemu$QEMU_SUFFIX" + if [ "$host_family" != "$family" ] ; then + $BINFMT_SET + fi + done +} + +CHECK=qemu_check_bintfmt_misc +BINFMT_SET=qemu_register_interpreter + +SYSTEMDDIR="/etc/binfmt.d" +DEBIANDIR="/usr/share/binfmts" + +QEMU_PATH=/usr/local/bin +CREDENTIAL=no +PERSISTENT=no +PRESERVE_ARG0=no +QEMU_SUFFIX="" + +options=$(getopt -o ds:Q:S:e:hc:p:g: -l debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent:,preserve-argv0: -- "$@") +eval set -- "$options" + +while true ; do + case "$1" in + -d|--debian) + CHECK=qemu_check_debian + BINFMT_SET=qemu_generate_debian + EXPORTDIR=${EXPORTDIR:-$DEBIANDIR} + ;; + -s|--systemd) + CHECK=qemu_check_systemd + BINFMT_SET=qemu_generate_systemd + EXPORTDIR=${EXPORTDIR:-$SYSTEMDDIR} + shift + # check given cpu is in the supported CPU list + if [ "$1" != "ALL" ] ; then + for cpu in ${qemu_target_list} ; do + if [ "$cpu" = "$1" ] ; then + break + fi + done + + if [ "$cpu" = "$1" ] ; then + qemu_target_list="$1" + else + echo "ERROR: unknown CPU \"$1\"" 1>&2 + usage + exit 1 + fi + fi + ;; + -Q|--qemu-path) + shift + QEMU_PATH="$1" + ;; + -F|--qemu-suffix) + shift + QEMU_SUFFIX="$1" + ;; + -e|--exportdir) + shift + EXPORTDIR="$1" + ;; + -h|--help) + usage + exit 1 + ;; + -c|--credential) + shift + CREDENTIAL="$1" + ;; + -p|--persistent) + shift + PERSISTENT="$1" + ;; + -g|--preserve-argv0) + shift + PRESERVE_ARG0="$1" + ;; + *) + break + ;; + esac + shift +done + +$CHECK +qemu_set_binfmts diff --git a/scripts/qemu-gdb.py b/scripts/qemu-gdb.py new file mode 100644 index 000000000..4d2a9f6c4 --- /dev/null +++ b/scripts/qemu-gdb.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# +# GDB debugging support +# +# Copyright 2012 Red Hat, Inc. and/or its affiliates +# +# Authors: +# Avi Kivity <avi@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +# Usage: +# At the (gdb) prompt, type "source scripts/qemu-gdb.py". +# "help qemu" should then list the supported QEMU debug support commands. + +import gdb + +import os, sys + +# Annoyingly, gdb doesn't put the directory of scripts onto the +# module search path. Do it manually. + +sys.path.append(os.path.dirname(__file__)) + +from qemugdb import aio, mtree, coroutine, tcg, timers + +class QemuCommand(gdb.Command): + '''Prefix for QEMU debug support commands''' + def __init__(self): + gdb.Command.__init__(self, 'qemu', gdb.COMMAND_DATA, + gdb.COMPLETE_NONE, True) + +QemuCommand() +coroutine.CoroutineCommand() +mtree.MtreeCommand() +aio.HandlersCommand() +tcg.TCGLockStatusCommand() +timers.TimersCommand() + +coroutine.CoroutineSPFunction() +coroutine.CoroutinePCFunction() +coroutine.CoroutineBt() + +# Default to silently passing through SIGUSR1, because QEMU sends it +# to itself a lot. +gdb.execute('handle SIGUSR1 pass noprint nostop') diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook new file mode 100755 index 000000000..13aafd484 --- /dev/null +++ b/scripts/qemu-guest-agent/fsfreeze-hook @@ -0,0 +1,33 @@ +#!/bin/sh + +# This script is executed when a guest agent receives fsfreeze-freeze and +# fsfreeze-thaw command, if it is specified in --fsfreeze-hook (-F) +# option of qemu-ga or placed in default path (/etc/qemu/fsfreeze-hook). +# When the agent receives fsfreeze-freeze request, this script is issued with +# "freeze" argument before the filesystem is frozen. And for fsfreeze-thaw +# request, it is issued with "thaw" argument after filesystem is thawed. + +LOGFILE=/var/log/qga-fsfreeze-hook.log +FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d + +# Check whether file $1 is a backup or rpm-generated file and should be ignored +is_ignored_file() { + case "$1" in + *~ | *.bak | *.orig | *.rpmnew | *.rpmorig | *.rpmsave | *.sample | *.dpkg-old | *.dpkg-new | *.dpkg-tmp | *.dpkg-dist | *.dpkg-bak | *.dpkg-backup | *.dpkg-remove) + return 0 ;; + esac + return 1 +} + +# Iterate executables in directory "fsfreeze-hook.d" with the specified args +[ ! -d "$FSFREEZE_D" ] && exit 0 +for file in "$FSFREEZE_D"/* ; do + is_ignored_file "$file" && continue + [ -x "$file" ] || continue + printf "$(date): execute $file $@\n" >>$LOGFILE + "$file" "$@" >>$LOGFILE 2>&1 + STATUS=$? + printf "$(date): $file finished with status=$STATUS\n" >>$LOGFILE +done + +exit 0 diff --git a/scripts/qemu-guest-agent/fsfreeze-hook.d/mysql-flush.sh.sample b/scripts/qemu-guest-agent/fsfreeze-hook.d/mysql-flush.sh.sample new file mode 100755 index 000000000..2b4fa3aeb --- /dev/null +++ b/scripts/qemu-guest-agent/fsfreeze-hook.d/mysql-flush.sh.sample @@ -0,0 +1,56 @@ +#!/bin/sh + +# Flush MySQL tables to the disk before the filesystem is frozen. +# At the same time, this keeps a read lock in order to avoid write accesses +# from the other clients until the filesystem is thawed. + +MYSQL="/usr/bin/mysql" +MYSQL_OPTS="-uroot" #"-prootpassword" +FIFO=/var/run/mysql-flush.fifo + +# Check mysql is installed and the server running +[ -x "$MYSQL" ] && "$MYSQL" $MYSQL_OPTS < /dev/null || exit 0 + +flush_and_wait() { + printf "FLUSH TABLES WITH READ LOCK \\G\n" + trap 'printf "$(date): $0 is killed\n">&2' HUP INT QUIT ALRM TERM + read < $FIFO + printf "UNLOCK TABLES \\G\n" + rm -f $FIFO +} + +case "$1" in + freeze) + mkfifo $FIFO || exit 1 + flush_and_wait | "$MYSQL" $MYSQL_OPTS & + # wait until every block is flushed + while [ "$(echo 'SHOW STATUS LIKE "Key_blocks_not_flushed"' |\ + "$MYSQL" $MYSQL_OPTS | tail -1 | cut -f 2)" -gt 0 ]; do + sleep 1 + done + # for InnoDB, wait until every log is flushed + INNODB_STATUS=$(mktemp /tmp/mysql-flush.XXXXXX) + [ $? -ne 0 ] && exit 2 + trap "rm -f $INNODB_STATUS; exit 1" HUP INT QUIT ALRM TERM + while :; do + printf "SHOW ENGINE INNODB STATUS \\G" |\ + "$MYSQL" $MYSQL_OPTS > $INNODB_STATUS + LOG_CURRENT=$(grep 'Log sequence number' $INNODB_STATUS |\ + tr -s ' ' | cut -d' ' -f4) + LOG_FLUSHED=$(grep 'Log flushed up to' $INNODB_STATUS |\ + tr -s ' ' | cut -d' ' -f5) + [ "$LOG_CURRENT" = "$LOG_FLUSHED" ] && break + sleep 1 + done + rm -f $INNODB_STATUS + ;; + + thaw) + [ ! -p $FIFO ] && exit 1 + echo > $FIFO + ;; + + *) + exit 1 + ;; +esac diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap new file mode 100755 index 000000000..eb6e951ff --- /dev/null +++ b/scripts/qemu-trace-stap @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# -*- python -*- +# +# Copyright (C) 2019 Red Hat, Inc +# +# QEMU SystemTap Trace Tool +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + +import argparse +import copy +import os.path +import re +import subprocess +import sys + + +def probe_prefix(binary): + dirname, filename = os.path.split(binary) + return re.sub("-", ".", filename) + ".log" + + +def which(binary): + for path in os.environ["PATH"].split(os.pathsep): + if os.path.exists(os.path.join(path, binary)): + return os.path.join(path, binary) + + print("Unable to find '%s' in $PATH" % binary) + sys.exit(1) + + +def tapset_dir(binary): + dirname, filename = os.path.split(binary) + if dirname == '': + thisfile = which(binary) + else: + thisfile = os.path.realpath(binary) + if not os.path.exists(thisfile): + print("Unable to find '%s'" % thisfile) + sys.exit(1) + + basedir = os.path.split(thisfile)[0] + tapset = os.path.join(basedir, "..", "share", "systemtap", "tapset") + return os.path.realpath(tapset) + + +def cmd_run(args): + prefix = probe_prefix(args.binary) + tapsets = tapset_dir(args.binary) + + if args.verbose: + print("Using tapset dir '%s' for binary '%s'" % (tapsets, args.binary)) + + probes = [] + for probe in args.probes: + probes.append("probe %s.%s {}" % (prefix, probe)) + if len(probes) == 0: + print("At least one probe pattern must be specified") + sys.exit(1) + + script = " ".join(probes) + if args.verbose: + print("Compiling script '%s'" % script) + script = """probe begin { print("Running script, <Ctrl>-c to quit\\n") } """ + script + + # We request an 8MB buffer, since the stap default 1MB buffer + # can be easily overflowed by frequently firing QEMU traces + stapargs = ["stap", "-s", "8", "-I", tapsets ] + if args.pid is not None: + stapargs.extend(["-x", args.pid]) + stapargs.extend(["-e", script]) + subprocess.call(stapargs) + + +def cmd_list(args): + tapsets = tapset_dir(args.binary) + + if args.verbose: + print("Using tapset dir '%s' for binary '%s'" % (tapsets, args.binary)) + + def print_probes(verbose, name): + prefix = probe_prefix(args.binary) + offset = len(prefix) + 1 + script = prefix + "." + name + + if verbose: + print("Listing probes with name '%s'" % script) + proc = subprocess.Popen(["stap", "-I", tapsets, "-l", script], + stdout=subprocess.PIPE, + universal_newlines=True) + out, err = proc.communicate() + if proc.returncode != 0: + print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary)) + sys.exit(1) + + for line in out.splitlines(): + if line.startswith(prefix): + print("%s" % line[offset:]) + + if len(args.probes) == 0: + print_probes(args.verbose, "*") + else: + for probe in args.probes: + print_probes(args.verbose, probe) + + +def main(): + parser = argparse.ArgumentParser(description="QEMU SystemTap trace tool") + parser.add_argument("-v", "--verbose", help="Print verbose progress info", + action='store_true') + + subparser = parser.add_subparsers(help="commands") + subparser.required = True + subparser.dest = "command" + + runparser = subparser.add_parser("run", help="Run a trace session", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" + +To watch all trace points on the qemu-system-x86_64 binary: + + %(argv0)s run qemu-system-x86_64 + +To only watch the trace points matching the qio* and qcrypto* patterns + + %(argv0)s run qemu-system-x86_64 'qio*' 'qcrypto*' +""" % {"argv0": sys.argv[0]}) + runparser.set_defaults(func=cmd_run) + runparser.add_argument("--pid", "-p", dest="pid", + help="Restrict tracing to a specific process ID") + runparser.add_argument("binary", help="QEMU system or user emulator binary") + runparser.add_argument("probes", help="Probe names or wildcards", + nargs=argparse.REMAINDER) + + listparser = subparser.add_parser("list", help="List probe points", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" + +To list all trace points on the qemu-system-x86_64 binary: + + %(argv0)s list qemu-system-x86_64 + +To only list the trace points matching the qio* and qcrypto* patterns + + %(argv0)s list qemu-system-x86_64 'qio*' 'qcrypto*' +""" % {"argv0": sys.argv[0]}) + listparser.set_defaults(func=cmd_list) + listparser.add_argument("binary", help="QEMU system or user emulator binary") + listparser.add_argument("probes", help="Probe names or wildcards", + nargs=argparse.REMAINDER) + + args = parser.parse_args() + + args.func(args) + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh new file mode 100755 index 000000000..3f6e7e6d4 --- /dev/null +++ b/scripts/qemu-version.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +set -eu + +dir="$1" +pkgversion="$2" +version="$3" + +if [ -z "$pkgversion" ]; then + cd "$dir" + if [ -e .git ]; then + pkgversion=$(git describe --match 'v*' --dirty) || : + fi +fi + +if [ -n "$pkgversion" ]; then + fullversion="$version ($pkgversion)" +else + fullversion="$version" +fi + +cat <<EOF +#define QEMU_PKGVERSION "$pkgversion" +#define QEMU_FULL_VERSION "$fullversion" +EOF diff --git a/scripts/qemugdb/__init__.py b/scripts/qemugdb/__init__.py new file mode 100644 index 000000000..da8ff612e --- /dev/null +++ b/scripts/qemugdb/__init__.py @@ -0,0 +1,27 @@ +# +# GDB debugging support +# +# Copyright (c) 2015 Linaro Ltd +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# <http://www.gnu.org/licenses/gpl-2.0.html> +# + +# We don't need to do anything in our init file currently. + +""" +Support routines for debugging QEMU under GDB +""" + +__license__ = "GPL version 2 or (at your option) any later version" diff --git a/scripts/qemugdb/aio.py b/scripts/qemugdb/aio.py new file mode 100644 index 000000000..d7c1ba0c2 --- /dev/null +++ b/scripts/qemugdb/aio.py @@ -0,0 +1,57 @@ +# +# GDB debugging support: aio/iohandler debug +# +# Copyright (c) 2015 Red Hat, Inc. +# +# Author: Dr. David Alan Gilbert <dgilbert@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. +# + +import gdb +from qemugdb import coroutine + +def isnull(ptr): + return ptr == gdb.Value(0).cast(ptr.type) + +def dump_aiocontext(context, verbose): + '''Display a dump and backtrace for an aiocontext''' + cur = context['aio_handlers']['lh_first'] + # Get pointers to functions we're going to process specially + sym_fd_coroutine_enter = gdb.parse_and_eval('fd_coroutine_enter') + + while not isnull(cur): + entry = cur.dereference() + gdb.write('----\n%s\n' % entry) + if verbose and cur['io_read'] == sym_fd_coroutine_enter: + coptr = (cur['opaque'].cast(gdb.lookup_type('FDYieldUntilData').pointer()))['co'] + coptr = coptr.cast(gdb.lookup_type('CoroutineUContext').pointer()) + coroutine.bt_jmpbuf(coptr['env']['__jmpbuf']) + cur = cur['node']['le_next']; + + gdb.write('----\n') + +class HandlersCommand(gdb.Command): + '''Display aio handlers''' + def __init__(self): + gdb.Command.__init__(self, 'qemu handlers', gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + def invoke(self, arg, from_tty): + verbose = False + argv = gdb.string_to_argv(arg) + + if len(argv) > 0 and argv[0] == '--verbose': + verbose = True + argv.pop(0) + + if len(argv) > 1: + gdb.write('usage: qemu handlers [--verbose] [handler]\n') + return + + if len(argv) == 1: + handlers_name = argv[0] + else: + handlers_name = 'qemu_aio_context' + dump_aiocontext(gdb.parse_and_eval(handlers_name), verbose) diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py new file mode 100644 index 000000000..7db46d4b6 --- /dev/null +++ b/scripts/qemugdb/coroutine.py @@ -0,0 +1,148 @@ +# +# GDB debugging support +# +# Copyright 2012 Red Hat, Inc. and/or its affiliates +# +# Authors: +# Avi Kivity <avi@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 +# or later. See the COPYING file in the top-level directory. + +import gdb + +VOID_PTR = gdb.lookup_type('void').pointer() + +def get_fs_base(): + '''Fetch %fs base value using arch_prctl(ARCH_GET_FS). This is + pthread_self().''' + # %rsp - 120 is scratch space according to the SystemV ABI + old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)') + gdb.execute('call (int)arch_prctl(0x1003, $rsp - 120)', False, True) + fs_base = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)') + gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True) + return fs_base + +def pthread_self(): + '''Fetch pthread_self() from the glibc start_thread function.''' + f = gdb.newest_frame() + while f.name() != 'start_thread': + f = f.older() + if f is None: + return get_fs_base() + + try: + return f.read_var("arg") + except ValueError: + return get_fs_base() + +def get_glibc_pointer_guard(): + '''Fetch glibc pointer guard value''' + fs_base = pthread_self() + return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base) + +def glibc_ptr_demangle(val, pointer_guard): + '''Undo effect of glibc's PTR_MANGLE()''' + return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard)) + +def get_jmpbuf_regs(jmpbuf): + JB_RBX = 0 + JB_RBP = 1 + JB_R12 = 2 + JB_R13 = 3 + JB_R14 = 4 + JB_R15 = 5 + JB_RSP = 6 + JB_PC = 7 + + pointer_guard = get_glibc_pointer_guard() + return {'rbx': jmpbuf[JB_RBX], + 'rbp': glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard), + 'rsp': glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard), + 'r12': jmpbuf[JB_R12], + 'r13': jmpbuf[JB_R13], + 'r14': jmpbuf[JB_R14], + 'r15': jmpbuf[JB_R15], + 'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) } + +def bt_jmpbuf(jmpbuf): + '''Backtrace a jmpbuf''' + regs = get_jmpbuf_regs(jmpbuf) + old = dict() + + # remember current stack frame and select the topmost + # so that register modifications don't wreck it + selected_frame = gdb.selected_frame() + gdb.newest_frame().select() + + for i in regs: + old[i] = gdb.parse_and_eval('(uint64_t)$%s' % i) + + for i in regs: + gdb.execute('set $%s = %s' % (i, regs[i])) + + gdb.execute('bt') + + for i in regs: + gdb.execute('set $%s = %s' % (i, old[i])) + + selected_frame.select() + +def co_cast(co): + return co.cast(gdb.lookup_type('CoroutineUContext').pointer()) + +def coroutine_to_jmpbuf(co): + coroutine_pointer = co_cast(co) + return coroutine_pointer['env']['__jmpbuf'] + + +class CoroutineCommand(gdb.Command): + '''Display coroutine backtrace''' + def __init__(self): + gdb.Command.__init__(self, 'qemu coroutine', gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + def invoke(self, arg, from_tty): + argv = gdb.string_to_argv(arg) + if len(argv) != 1: + gdb.write('usage: qemu coroutine <coroutine-pointer>\n') + return + + bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0]))) + +class CoroutineBt(gdb.Command): + '''Display backtrace including coroutine switches''' + def __init__(self): + gdb.Command.__init__(self, 'qemu bt', gdb.COMMAND_STACK, + gdb.COMPLETE_NONE) + + def invoke(self, arg, from_tty): + + gdb.execute("bt") + + if gdb.parse_and_eval("qemu_in_coroutine()") == False: + return + + co_ptr = gdb.parse_and_eval("qemu_coroutine_self()") + + while True: + co = co_cast(co_ptr) + co_ptr = co["base"]["caller"] + if co_ptr == 0: + break + gdb.write("Coroutine at " + str(co_ptr) + ":\n") + bt_jmpbuf(coroutine_to_jmpbuf(co_ptr)) + +class CoroutineSPFunction(gdb.Function): + def __init__(self): + gdb.Function.__init__(self, 'qemu_coroutine_sp') + + def invoke(self, addr): + return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rsp'].cast(VOID_PTR) + +class CoroutinePCFunction(gdb.Function): + def __init__(self): + gdb.Function.__init__(self, 'qemu_coroutine_pc') + + def invoke(self, addr): + return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rip'].cast(VOID_PTR) diff --git a/scripts/qemugdb/mtree.py b/scripts/qemugdb/mtree.py new file mode 100644 index 000000000..8fe42c3c1 --- /dev/null +++ b/scripts/qemugdb/mtree.py @@ -0,0 +1,85 @@ +# +# GDB debugging support +# +# Copyright 2012 Red Hat, Inc. and/or its affiliates +# +# Authors: +# Avi Kivity <avi@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +# 'qemu mtree' -- display the memory hierarchy + +import gdb + +def isnull(ptr): + return ptr == gdb.Value(0).cast(ptr.type) + +def int128(p): + '''Read an Int128 type to a python integer. + + QEMU can be built with native Int128 support so we need to detect + if the value is a structure or the native type. + ''' + if p.type.code == gdb.TYPE_CODE_STRUCT: + return int(p['lo']) + (int(p['hi']) << 64) + else: + return int(("%s" % p), 16) + +class MtreeCommand(gdb.Command): + '''Display the memory tree hierarchy''' + def __init__(self): + gdb.Command.__init__(self, 'qemu mtree', gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + self.queue = [] + def invoke(self, arg, from_tty): + self.seen = set() + self.queue_root('address_space_memory') + self.queue_root('address_space_io') + self.process_queue() + def queue_root(self, varname): + ptr = gdb.parse_and_eval(varname)['root'] + self.queue.append(ptr) + def process_queue(self): + while self.queue: + ptr = self.queue.pop(0) + if int(ptr) in self.seen: + continue + self.print_item(ptr) + def print_item(self, ptr, offset = gdb.Value(0), level = 0): + self.seen.add(int(ptr)) + addr = ptr['addr'] + addr += offset + size = int128(ptr['size']) + alias = ptr['alias'] + klass = '' + if not isnull(alias): + klass = ' (alias)' + elif not isnull(ptr['ops']): + klass = ' (I/O)' + elif bool(ptr['ram']): + klass = ' (RAM)' + gdb.write('%s%016x-%016x %s%s (@ %s)\n' + % (' ' * level, + int(addr), + int(addr + (size - 1)), + ptr['name'].string(), + klass, + ptr, + ), + gdb.STDOUT) + if not isnull(alias): + gdb.write('%s alias: %s@%016x (@ %s)\n' % + (' ' * level, + alias['name'].string(), + int(ptr['alias_offset']), + alias, + ), + gdb.STDOUT) + self.queue.append(alias) + subregion = ptr['subregions']['tqh_first'] + level += 1 + while not isnull(subregion): + self.print_item(subregion, addr, level) + subregion = subregion['subregions_link']['tqe_next'] diff --git a/scripts/qemugdb/tcg.py b/scripts/qemugdb/tcg.py new file mode 100644 index 000000000..16c03c06a --- /dev/null +++ b/scripts/qemugdb/tcg.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# GDB debugging support, TCG status +# +# Copyright 2016 Linaro Ltd +# +# Authors: +# Alex Bennée <alex.bennee@linaro.org> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +# 'qemu tcg-lock-status' -- display the TCG lock status across threads + +import gdb + +class TCGLockStatusCommand(gdb.Command): + '''Display TCG Execution Status''' + def __init__(self): + gdb.Command.__init__(self, 'qemu tcg-lock-status', gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + def invoke(self, arg, from_tty): + gdb.write("Thread, BQL (iothread_mutex), Replay, Blocked?\n") + for thread in gdb.inferiors()[0].threads(): + thread.switch() + + iothread = gdb.parse_and_eval("iothread_locked") + replay = gdb.parse_and_eval("replay_locked") + + frame = gdb.selected_frame() + if frame.name() == "__lll_lock_wait": + frame.older().select() + mutex = gdb.parse_and_eval("mutex") + owner = gdb.parse_and_eval("mutex->__data.__owner") + blocked = ("__lll_lock_wait waiting on %s from %d" % + (mutex, owner)) + else: + blocked = "not blocked" + + gdb.write("%d/%d, %s, %s, %s\n" % (thread.num, thread.ptid[1], + iothread, replay, blocked)) diff --git a/scripts/qemugdb/timers.py b/scripts/qemugdb/timers.py new file mode 100644 index 000000000..46537b27c --- /dev/null +++ b/scripts/qemugdb/timers.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# GDB debugging support +# +# Copyright 2017 Linaro Ltd +# +# Author: Alex Bennée <alex.bennee@linaro.org> +# +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. +# +# SPDX-License-Identifier: GPL-2.0-or-later + +# 'qemu timers' -- display the current timerlists + +import gdb + +class TimersCommand(gdb.Command): + '''Display the current QEMU timers''' + + def __init__(self): + 'Register the class as a gdb command' + gdb.Command.__init__(self, 'qemu timers', gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + def dump_timers(self, timer): + "Follow a timer and recursively dump each one in the list." + # timer should be of type QemuTimer + gdb.write(" timer %s/%s (cb:%s,opq:%s)\n" % ( + timer['expire_time'], + timer['scale'], + timer['cb'], + timer['opaque'])) + + if int(timer['next']) > 0: + self.dump_timers(timer['next']) + + + def process_timerlist(self, tlist, ttype): + gdb.write("Processing %s timers\n" % (ttype)) + gdb.write(" clock %s is enabled:%s, last:%s\n" % ( + tlist['clock']['type'], + tlist['clock']['enabled'], + tlist['clock']['last'])) + if int(tlist['active_timers']) > 0: + self.dump_timers(tlist['active_timers']) + + + def invoke(self, arg, from_tty): + 'Run the command' + main_timers = gdb.parse_and_eval("main_loop_tlg") + + # This will break if QEMUClockType in timer.h is redfined + self.process_timerlist(main_timers['tl'][0], "Realtime") + self.process_timerlist(main_timers['tl'][1], "Virtual") + self.process_timerlist(main_timers['tl'][2], "Host") + self.process_timerlist(main_timers['tl'][3], "Virtual RT") diff --git a/scripts/qmp/qemu-ga-client b/scripts/qmp/qemu-ga-client new file mode 100755 index 000000000..102fd2cad --- /dev/null +++ b/scripts/qmp/qemu-ga-client @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) +from qemu.qmp import qemu_ga_client + + +if __name__ == '__main__': + sys.exit(qemu_ga_client.main()) diff --git a/scripts/qmp/qmp b/scripts/qmp/qmp new file mode 100755 index 000000000..0f12307c8 --- /dev/null +++ b/scripts/qmp/qmp @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import sys + +print('''This unmaintained and undocumented script was removed in preference +for qmp-shell. The assumption is that most users are using either +qmp-shell, socat, or pasting/piping JSON into stdio. The duplication of +facilities here is unwanted, and the divergence of syntax harmful.''', + file=sys.stderr) + +sys.exit(1) diff --git a/scripts/qmp/qmp-shell b/scripts/qmp/qmp-shell new file mode 100755 index 000000000..4a20f97db --- /dev/null +++ b/scripts/qmp/qmp-shell @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) +from qemu.qmp import qmp_shell + + +if __name__ == '__main__': + qmp_shell.main() diff --git a/scripts/qmp/qom-fuse b/scripts/qmp/qom-fuse new file mode 100755 index 000000000..a58c8ef97 --- /dev/null +++ b/scripts/qmp/qom-fuse @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) +from qemu.qmp.qom_fuse import QOMFuse + + +if __name__ == '__main__': + sys.exit(QOMFuse.entry_point()) diff --git a/scripts/qmp/qom-get b/scripts/qmp/qom-get new file mode 100755 index 000000000..e4f3e0c01 --- /dev/null +++ b/scripts/qmp/qom-get @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) +from qemu.qmp.qom import QOMGet + + +if __name__ == '__main__': + sys.exit(QOMGet.entry_point()) diff --git a/scripts/qmp/qom-list b/scripts/qmp/qom-list new file mode 100755 index 000000000..7a071a54e --- /dev/null +++ b/scripts/qmp/qom-list @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) +from qemu.qmp.qom import QOMList + + +if __name__ == '__main__': + sys.exit(QOMList.entry_point()) diff --git a/scripts/qmp/qom-set b/scripts/qmp/qom-set new file mode 100755 index 000000000..9ca9e2ba1 --- /dev/null +++ b/scripts/qmp/qom-set @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) +from qemu.qmp.qom import QOMSet + + +if __name__ == '__main__': + sys.exit(QOMSet.entry_point()) diff --git a/scripts/qmp/qom-tree b/scripts/qmp/qom-tree new file mode 100755 index 000000000..7d0ccca3a --- /dev/null +++ b/scripts/qmp/qom-tree @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) +from qemu.qmp.qom import QOMTree + + +if __name__ == '__main__': + sys.exit(QOMTree.entry_point()) diff --git a/scripts/refresh-pxe-roms.sh b/scripts/refresh-pxe-roms.sh new file mode 100755 index 000000000..90fc0b374 --- /dev/null +++ b/scripts/refresh-pxe-roms.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# PXE ROM build script +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +# Copyright (C) 2011 Red Hat, Inc. +# Authors: Alex Williamson <alex.williamson@redhat.com> +# +# Usage: Run from root of qemu tree +# ./scripts/refresh-pxe-roms.sh + +targets="pxerom" +if test -x "$(which EfiRom 2>/dev/null)"; then + targets="$targets efirom" +fi + +cd roms +make -j4 $targets || exit 1 +make clean diff --git a/scripts/render_block_graph.py b/scripts/render_block_graph.py new file mode 100755 index 000000000..da6acf050 --- /dev/null +++ b/scripts/render_block_graph.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +# +# Render Qemu Block Graph +# +# Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +import sys +import subprocess +import json +from graphviz import Digraph + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python')) +from qemu.qmp import ( + QEMUMonitorProtocol, + QMPResponseError, +) + + +def perm(arr): + s = 'w' if 'write' in arr else '_' + s += 'r' if 'consistent-read' in arr else '_' + s += 'u' if 'write-unchanged' in arr else '_' + s += 'g' if 'graph-mod' in arr else '_' + s += 's' if 'resize' in arr else '_' + return s + + +def render_block_graph(qmp, filename, format='png'): + ''' + Render graph in text (dot) representation into "@filename" and + representation in @format into "@filename.@format" + ''' + + bds_nodes = qmp.command('query-named-block-nodes') + bds_nodes = {n['node-name']: n for n in bds_nodes} + + job_nodes = qmp.command('query-block-jobs') + job_nodes = {n['device']: n for n in job_nodes} + + block_graph = qmp.command('x-debug-query-block-graph') + + graph = Digraph(comment='Block Nodes Graph') + graph.format = format + graph.node('permission symbols:\l' + ' w - Write\l' + ' r - consistent-Read\l' + ' u - write - Unchanged\l' + ' g - Graph-mod\l' + ' s - reSize\l' + 'edge label scheme:\l' + ' <child type>\l' + ' <perm>\l' + ' <shared_perm>\l', shape='none') + + for n in block_graph['nodes']: + if n['type'] == 'block-driver': + info = bds_nodes[n['name']] + label = n['name'] + ' [' + info['drv'] + ']' + if info['drv'] == 'file': + label += '\n' + os.path.basename(info['file']) + shape = 'ellipse' + elif n['type'] == 'block-job': + info = job_nodes[n['name']] + label = info['type'] + ' job (' + n['name'] + ')' + shape = 'box' + else: + assert n['type'] == 'block-backend' + label = n['name'] if n['name'] else 'unnamed blk' + shape = 'box' + + graph.node(str(n['id']), label, shape=shape) + + for e in block_graph['edges']: + label = '%s\l%s\l%s\l' % (e['name'], perm(e['perm']), + perm(e['shared-perm'])) + graph.edge(str(e['parent']), str(e['child']), label=label) + + graph.render(filename) + + +class LibvirtGuest(): + def __init__(self, name): + self.name = name + + def command(self, cmd): + # only supports qmp commands without parameters + m = {'execute': cmd} + ar = ['virsh', 'qemu-monitor-command', self.name, json.dumps(m)] + + reply = json.loads(subprocess.check_output(ar)) + + if 'error' in reply: + raise QMPResponseError(reply) + + return reply['return'] + + +if __name__ == '__main__': + obj = sys.argv[1] + out = sys.argv[2] + + if os.path.exists(obj): + # assume unix socket + qmp = QEMUMonitorProtocol(obj) + qmp.connect() + else: + # assume libvirt guest name + qmp = LibvirtGuest(obj) + + render_block_graph(qmp, out) diff --git a/scripts/replay-dump.py b/scripts/replay-dump.py new file mode 100755 index 000000000..3ba97a6d3 --- /dev/null +++ b/scripts/replay-dump.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Dump the contents of a recorded execution stream +# +# Copyright (c) 2017 Alex Bennée <alex.bennee@linaro.org> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +import argparse +import struct +from collections import namedtuple + +# This mirrors some of the global replay state which some of the +# stream loading refers to. Some decoders may read the next event so +# we need handle that case. Calling reuse_event will ensure the next +# event is read from the cache rather than advancing the file. + +class ReplayState(object): + def __init__(self): + self.event = -1 + self.event_count = 0 + self.already_read = False + self.current_checkpoint = 0 + self.checkpoint = 0 + + def set_event(self, ev): + self.event = ev + self.event_count += 1 + + def get_event(self): + self.already_read = False + return self.event + + def reuse_event(self, ev): + self.event = ev + self.already_read = True + + def set_checkpoint(self): + self.checkpoint = self.event - self.checkpoint_start + + def get_checkpoint(self): + return self.checkpoint + +replay_state = ReplayState() + +# Simple read functions that mirror replay-internal.c +# The file-stream is big-endian and manually written out a byte at a time. + +def read_byte(fin): + "Read a single byte" + return struct.unpack('>B', fin.read(1))[0] + +def read_event(fin): + "Read a single byte event, but save some state" + if replay_state.already_read: + return replay_state.get_event() + else: + replay_state.set_event(read_byte(fin)) + return replay_state.event + +def read_word(fin): + "Read a 16 bit word" + return struct.unpack('>H', fin.read(2))[0] + +def read_dword(fin): + "Read a 32 bit word" + return struct.unpack('>I', fin.read(4))[0] + +def read_qword(fin): + "Read a 64 bit word" + return struct.unpack('>Q', fin.read(8))[0] + +# Generic decoder structure +Decoder = namedtuple("Decoder", "eid name fn") + +def call_decode(table, index, dumpfile): + "Search decode table for next step" + decoder = next((d for d in table if d.eid == index), None) + if not decoder: + print("Could not decode index: %d" % (index)) + print("Entry is: %s" % (decoder)) + print("Decode Table is:\n%s" % (table)) + return False + else: + return decoder.fn(decoder.eid, decoder.name, dumpfile) + +# Print event +def print_event(eid, name, string=None, event_count=None): + "Print event with count" + if not event_count: + event_count = replay_state.event_count + + if string: + print("%d:%s(%d) %s" % (event_count, name, eid, string)) + else: + print("%d:%s(%d)" % (event_count, name, eid)) + + +# Decoders for each event type + +def decode_unimp(eid, name, _unused_dumpfile): + "Unimplimented decoder, will trigger exit" + print("%s not handled - will now stop" % (name)) + return False + +# Checkpoint decoder +def swallow_async_qword(eid, name, dumpfile): + "Swallow a qword of data without looking at it" + step_id = read_qword(dumpfile) + print(" %s(%d) @ %d" % (name, eid, step_id)) + return True + +async_decode_table = [ Decoder(0, "REPLAY_ASYNC_EVENT_BH", swallow_async_qword), + Decoder(1, "REPLAY_ASYNC_INPUT", decode_unimp), + Decoder(2, "REPLAY_ASYNC_INPUT_SYNC", decode_unimp), + Decoder(3, "REPLAY_ASYNC_CHAR_READ", decode_unimp), + Decoder(4, "REPLAY_ASYNC_EVENT_BLOCK", decode_unimp), + Decoder(5, "REPLAY_ASYNC_EVENT_NET", decode_unimp), +] +# See replay_read_events/replay_read_event +def decode_async(eid, name, dumpfile): + """Decode an ASYNC event""" + + print_event(eid, name) + + async_event_kind = read_byte(dumpfile) + async_event_checkpoint = read_byte(dumpfile) + + if async_event_checkpoint != replay_state.current_checkpoint: + print(" mismatch between checkpoint %d and async data %d" % ( + replay_state.current_checkpoint, async_event_checkpoint)) + return True + + return call_decode(async_decode_table, async_event_kind, dumpfile) + + +def decode_instruction(eid, name, dumpfile): + ins_diff = read_dword(dumpfile) + print_event(eid, name, "0x%x" % (ins_diff)) + return True + +def decode_audio_out(eid, name, dumpfile): + audio_data = read_dword(dumpfile) + print_event(eid, name, "%d" % (audio_data)) + return True + +def decode_checkpoint(eid, name, dumpfile): + """Decode a checkpoint. + + Checkpoints contain a series of async events with their own specific data. + """ + replay_state.set_checkpoint() + # save event count as we peek ahead + event_number = replay_state.event_count + next_event = read_event(dumpfile) + + # if the next event is EVENT_ASYNC there are a bunch of + # async events to read, otherwise we are done + if next_event != 3: + print_event(eid, name, "no additional data", event_number) + else: + print_event(eid, name, "more data follows", event_number) + + replay_state.reuse_event(next_event) + return True + +def decode_checkpoint_init(eid, name, dumpfile): + print_event(eid, name) + return True + +def decode_interrupt(eid, name, dumpfile): + print_event(eid, name) + return True + +def decode_clock(eid, name, dumpfile): + clock_data = read_qword(dumpfile) + print_event(eid, name, "0x%x" % (clock_data)) + return True + + +# pre-MTTCG merge +v5_event_table = [Decoder(0, "EVENT_INSTRUCTION", decode_instruction), + Decoder(1, "EVENT_INTERRUPT", decode_interrupt), + Decoder(2, "EVENT_EXCEPTION", decode_unimp), + Decoder(3, "EVENT_ASYNC", decode_async), + Decoder(4, "EVENT_SHUTDOWN", decode_unimp), + Decoder(5, "EVENT_CHAR_WRITE", decode_unimp), + Decoder(6, "EVENT_CHAR_READ_ALL", decode_unimp), + Decoder(7, "EVENT_CHAR_READ_ALL_ERROR", decode_unimp), + Decoder(8, "EVENT_CLOCK_HOST", decode_clock), + Decoder(9, "EVENT_CLOCK_VIRTUAL_RT", decode_clock), + Decoder(10, "EVENT_CP_CLOCK_WARP_START", decode_checkpoint), + Decoder(11, "EVENT_CP_CLOCK_WARP_ACCOUNT", decode_checkpoint), + Decoder(12, "EVENT_CP_RESET_REQUESTED", decode_checkpoint), + Decoder(13, "EVENT_CP_SUSPEND_REQUESTED", decode_checkpoint), + Decoder(14, "EVENT_CP_CLOCK_VIRTUAL", decode_checkpoint), + Decoder(15, "EVENT_CP_CLOCK_HOST", decode_checkpoint), + Decoder(16, "EVENT_CP_CLOCK_VIRTUAL_RT", decode_checkpoint), + Decoder(17, "EVENT_CP_INIT", decode_checkpoint_init), + Decoder(18, "EVENT_CP_RESET", decode_checkpoint), +] + +# post-MTTCG merge, AUDIO support added +v6_event_table = [Decoder(0, "EVENT_INSTRUCTION", decode_instruction), + Decoder(1, "EVENT_INTERRUPT", decode_interrupt), + Decoder(2, "EVENT_EXCEPTION", decode_unimp), + Decoder(3, "EVENT_ASYNC", decode_async), + Decoder(4, "EVENT_SHUTDOWN", decode_unimp), + Decoder(5, "EVENT_CHAR_WRITE", decode_unimp), + Decoder(6, "EVENT_CHAR_READ_ALL", decode_unimp), + Decoder(7, "EVENT_CHAR_READ_ALL_ERROR", decode_unimp), + Decoder(8, "EVENT_AUDIO_OUT", decode_audio_out), + Decoder(9, "EVENT_AUDIO_IN", decode_unimp), + Decoder(10, "EVENT_CLOCK_HOST", decode_clock), + Decoder(11, "EVENT_CLOCK_VIRTUAL_RT", decode_clock), + Decoder(12, "EVENT_CP_CLOCK_WARP_START", decode_checkpoint), + Decoder(13, "EVENT_CP_CLOCK_WARP_ACCOUNT", decode_checkpoint), + Decoder(14, "EVENT_CP_RESET_REQUESTED", decode_checkpoint), + Decoder(15, "EVENT_CP_SUSPEND_REQUESTED", decode_checkpoint), + Decoder(16, "EVENT_CP_CLOCK_VIRTUAL", decode_checkpoint), + Decoder(17, "EVENT_CP_CLOCK_HOST", decode_checkpoint), + Decoder(18, "EVENT_CP_CLOCK_VIRTUAL_RT", decode_checkpoint), + Decoder(19, "EVENT_CP_INIT", decode_checkpoint_init), + Decoder(20, "EVENT_CP_RESET", decode_checkpoint), +] + +# Shutdown cause added +v7_event_table = [Decoder(0, "EVENT_INSTRUCTION", decode_instruction), + Decoder(1, "EVENT_INTERRUPT", decode_interrupt), + Decoder(2, "EVENT_EXCEPTION", decode_unimp), + Decoder(3, "EVENT_ASYNC", decode_async), + Decoder(4, "EVENT_SHUTDOWN", decode_unimp), + Decoder(5, "EVENT_SHUTDOWN_HOST_ERR", decode_unimp), + Decoder(6, "EVENT_SHUTDOWN_HOST_QMP", decode_unimp), + Decoder(7, "EVENT_SHUTDOWN_HOST_SIGNAL", decode_unimp), + Decoder(8, "EVENT_SHUTDOWN_HOST_UI", decode_unimp), + Decoder(9, "EVENT_SHUTDOWN_GUEST_SHUTDOWN", decode_unimp), + Decoder(10, "EVENT_SHUTDOWN_GUEST_RESET", decode_unimp), + Decoder(11, "EVENT_SHUTDOWN_GUEST_PANIC", decode_unimp), + Decoder(12, "EVENT_SHUTDOWN___MAX", decode_unimp), + Decoder(13, "EVENT_CHAR_WRITE", decode_unimp), + Decoder(14, "EVENT_CHAR_READ_ALL", decode_unimp), + Decoder(15, "EVENT_CHAR_READ_ALL_ERROR", decode_unimp), + Decoder(16, "EVENT_AUDIO_OUT", decode_audio_out), + Decoder(17, "EVENT_AUDIO_IN", decode_unimp), + Decoder(18, "EVENT_CLOCK_HOST", decode_clock), + Decoder(19, "EVENT_CLOCK_VIRTUAL_RT", decode_clock), + Decoder(20, "EVENT_CP_CLOCK_WARP_START", decode_checkpoint), + Decoder(21, "EVENT_CP_CLOCK_WARP_ACCOUNT", decode_checkpoint), + Decoder(22, "EVENT_CP_RESET_REQUESTED", decode_checkpoint), + Decoder(23, "EVENT_CP_SUSPEND_REQUESTED", decode_checkpoint), + Decoder(24, "EVENT_CP_CLOCK_VIRTUAL", decode_checkpoint), + Decoder(25, "EVENT_CP_CLOCK_HOST", decode_checkpoint), + Decoder(26, "EVENT_CP_CLOCK_VIRTUAL_RT", decode_checkpoint), + Decoder(27, "EVENT_CP_INIT", decode_checkpoint_init), + Decoder(28, "EVENT_CP_RESET", decode_checkpoint), +] + +def parse_arguments(): + "Grab arguments for script" + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--file", help='record/replay dump to read from', + required=True) + return parser.parse_args() + +def decode_file(filename): + "Decode a record/replay dump" + dumpfile = open(filename, "rb") + + # read and throwaway the header + version = read_dword(dumpfile) + junk = read_qword(dumpfile) + + print("HEADER: version 0x%x" % (version)) + + if version == 0xe02007: + event_decode_table = v7_event_table + replay_state.checkpoint_start = 12 + elif version == 0xe02006: + event_decode_table = v6_event_table + replay_state.checkpoint_start = 12 + else: + event_decode_table = v5_event_table + replay_state.checkpoint_start = 10 + + try: + decode_ok = True + while decode_ok: + event = read_event(dumpfile) + decode_ok = call_decode(event_decode_table, event, dumpfile) + finally: + dumpfile.close() + +if __name__ == "__main__": + args = parse_arguments() + decode_file(args.file) diff --git a/scripts/shaderinclude.pl b/scripts/shaderinclude.pl new file mode 100644 index 000000000..cd3bb40b1 --- /dev/null +++ b/scripts/shaderinclude.pl @@ -0,0 +1,16 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +my $file = shift; +open FILE, "<", $file or die "open $file: $!"; +my $name = $file; +$name =~ s|.*/||; +$name =~ s/[-.]/_/g; +print "static GLchar ${name}_src[] =\n"; +while (<FILE>) { + chomp; + printf " \"%s\\n\"\n", $_; +} +print " \"\\n\";\n"; +close FILE; diff --git a/scripts/signrom.py b/scripts/signrom.py new file mode 100755 index 000000000..43693dba5 --- /dev/null +++ b/scripts/signrom.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +# +# Option ROM signing utility +# +# Authors: +# Jan Kiszka <jan.kiszka@siemens.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. + +import sys +import struct + +if len(sys.argv) < 3: + print('usage: signrom.py input output') + sys.exit(1) + +fin = open(sys.argv[1], 'rb') +fout = open(sys.argv[2], 'wb') + +magic = fin.read(2) +if magic != b'\x55\xaa': + sys.exit("%s: option ROM does not begin with magic 55 aa" % sys.argv[1]) + +size_byte = ord(fin.read(1)) +fin.seek(0) +data = fin.read() + +size = size_byte * 512 +if len(data) > size: + sys.stderr.write('error: ROM is too large (%d > %d)\n' % (len(data), size)) + sys.exit(1) +elif len(data) < size: + # Add padding if necessary, rounding the whole input to a multiple of + # 512 bytes according to the third byte of the input. + # size-1 because a final byte is added below to store the checksum. + data = data.ljust(size-1, b'\0') +else: + if ord(data[-1:]) != 0: + sys.stderr.write('WARNING: ROM includes nonzero checksum\n') + data = data[:size-1] + +fout.write(data) + +checksum = 0 +for b in data: + checksum = (checksum - b) & 255 + +fout.write(struct.pack('B', checksum)) + +fin.close() +fout.close() diff --git a/scripts/simplebench/bench-backup.py b/scripts/simplebench/bench-backup.py new file mode 100755 index 000000000..5a0675c59 --- /dev/null +++ b/scripts/simplebench/bench-backup.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +# +# Bench backup block-job +# +# Copyright (c) 2020 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import argparse +import json + +import simplebench +from results_to_text import results_to_text +from bench_block_job import bench_block_copy, drv_file, drv_nbd, drv_qcow2 + + +def bench_func(env, case): + """ Handle one "cell" of benchmarking table. """ + cmd_options = env['cmd-options'] if 'cmd-options' in env else {} + return bench_block_copy(env['qemu-binary'], env['cmd'], + cmd_options, + case['source'], case['target']) + + +def bench(args): + test_cases = [] + + # paths with colon not supported, so we just split by ':' + dirs = dict(d.split(':') for d in args.dir) + + nbd_drv = None + if args.nbd: + nbd = args.nbd.split(':') + host = nbd[0] + port = '10809' if len(nbd) == 1 else nbd[1] + nbd_drv = drv_nbd(host, port) + + for t in args.test: + src, dst = t.split(':') + + if src == 'nbd' and dst == 'nbd': + raise ValueError("Can't use 'nbd' label for both src and dst") + + if (src == 'nbd' or dst == 'nbd') and not nbd_drv: + raise ValueError("'nbd' label used but --nbd is not given") + + if src == 'nbd': + source = nbd_drv + elif args.qcow2_sources: + source = drv_qcow2(drv_file(dirs[src] + '/test-source.qcow2')) + else: + source = drv_file(dirs[src] + '/test-source') + + if dst == 'nbd': + test_cases.append({'id': t, 'source': source, 'target': nbd_drv}) + continue + + if args.target_cache == 'both': + target_caches = ['direct', 'cached'] + else: + target_caches = [args.target_cache] + + for c in target_caches: + o_direct = c == 'direct' + fname = dirs[dst] + '/test-target' + if args.compressed: + fname += '.qcow2' + target = drv_file(fname, o_direct=o_direct) + if args.compressed: + target = drv_qcow2(target) + + test_id = t + if args.target_cache == 'both': + test_id += f'({c})' + + test_cases.append({'id': test_id, 'source': source, + 'target': target}) + + binaries = [] # list of (<label>, <path>, [<options>]) + for i, q in enumerate(args.env): + name_path = q.split(':') + if len(name_path) == 1: + label = f'q{i}' + path_opts = name_path[0].split(',') + else: + assert len(name_path) == 2 # paths with colon not supported + label = name_path[0] + path_opts = name_path[1].split(',') + + binaries.append((label, path_opts[0], path_opts[1:])) + + test_envs = [] + + bin_paths = {} + for i, q in enumerate(args.env): + opts = q.split(',') + label_path = opts[0] + opts = opts[1:] + + if ':' in label_path: + # path with colon inside is not supported + label, path = label_path.split(':') + bin_paths[label] = path + elif label_path in bin_paths: + label = label_path + path = bin_paths[label] + else: + path = label_path + label = f'q{i}' + bin_paths[label] = path + + x_perf = {} + is_mirror = False + for opt in opts: + if opt == 'mirror': + is_mirror = True + elif opt == 'copy-range=on': + x_perf['use-copy-range'] = True + elif opt == 'copy-range=off': + x_perf['use-copy-range'] = False + elif opt.startswith('max-workers='): + x_perf['max-workers'] = int(opt.split('=')[1]) + + backup_options = {} + if x_perf: + backup_options['x-perf'] = x_perf + + if args.compressed: + backup_options['compress'] = True + + if is_mirror: + assert not x_perf + test_envs.append({ + 'id': f'mirror({label})', + 'cmd': 'blockdev-mirror', + 'qemu-binary': path + }) + else: + test_envs.append({ + 'id': f'backup({label})\n' + '\n'.join(opts), + 'cmd': 'blockdev-backup', + 'cmd-options': backup_options, + 'qemu-binary': path + }) + + result = simplebench.bench(bench_func, test_envs, test_cases, + count=args.count, initial_run=args.initial_run, + drop_caches=args.drop_caches) + with open('results.json', 'w') as f: + json.dump(result, f, indent=4) + print(results_to_text(result)) + + +class ExtendAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + items = getattr(namespace, self.dest) or [] + items.extend(values) + setattr(namespace, self.dest, items) + + +if __name__ == '__main__': + p = argparse.ArgumentParser('Backup benchmark', epilog=''' +ENV format + + (LABEL:PATH|LABEL|PATH)[,max-workers=N][,use-copy-range=(on|off)][,mirror] + + LABEL short name for the binary + PATH path to the binary + max-workers set x-perf.max-workers of backup job + use-copy-range set x-perf.use-copy-range of backup job + mirror use mirror job instead of backup''', + formatter_class=argparse.RawTextHelpFormatter) + p.add_argument('--env', nargs='+', help='''\ +Qemu binaries with labels and options, see below +"ENV format" section''', + action=ExtendAction) + p.add_argument('--dir', nargs='+', help='''\ +Directories, each containing "test-source" and/or +"test-target" files, raw images to used in +benchmarking. File path with label, like +label:/path/to/directory''', + action=ExtendAction) + p.add_argument('--nbd', help='''\ +host:port for remote NBD image, (or just host, for +default port 10809). Use it in tests, label is "nbd" +(but you cannot create test nbd:nbd).''') + p.add_argument('--test', nargs='+', help='''\ +Tests, in form source-dir-label:target-dir-label''', + action=ExtendAction) + p.add_argument('--compressed', help='''\ +Use compressed backup. It automatically means +automatically creating qcow2 target with +lazy_refcounts for each test run''', action='store_true') + p.add_argument('--qcow2-sources', help='''\ +Use test-source.qcow2 images as sources instead of +test-source raw images''', action='store_true') + p.add_argument('--target-cache', help='''\ +Setup cache for target nodes. Options: + direct: default, use O_DIRECT and aio=native + cached: use system cache (Qemu default) and aio=threads (Qemu default) + both: generate two test cases for each src:dst pair''', + default='direct', choices=('direct', 'cached', 'both')) + + p.add_argument('--count', type=int, default=3, help='''\ +Number of test runs per table cell''') + + # BooleanOptionalAction helps to support --no-initial-run option + p.add_argument('--initial-run', action=argparse.BooleanOptionalAction, + help='''\ +Do additional initial run per cell which doesn't count in result, +default true''') + + p.add_argument('--drop-caches', action='store_true', help='''\ +Do "sync; echo 3 > /proc/sys/vm/drop_caches" before each test run''') + + bench(p.parse_args()) diff --git a/scripts/simplebench/bench-example.py b/scripts/simplebench/bench-example.py new file mode 100644 index 000000000..4864435f3 --- /dev/null +++ b/scripts/simplebench/bench-example.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# +# Benchmark example +# +# Copyright (c) 2019 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import simplebench +from results_to_text import results_to_text +from bench_block_job import bench_block_copy, drv_file, drv_nbd + + +def bench_func(env, case): + """ Handle one "cell" of benchmarking table. """ + return bench_block_copy(env['qemu_binary'], env['cmd'], {} + case['source'], case['target']) + + +# You may set the following five variables to correct values, to turn this +# example to real benchmark. +ssd_source = '/path-to-raw-source-image-at-ssd' +ssd_target = '/path-to-raw-target-image-at-ssd' +hdd_target = '/path-to-raw-source-image-at-hdd' +nbd_ip = 'nbd-ip-addr' +nbd_port = 'nbd-port-number' + +# Test-cases are "rows" in benchmark resulting table, 'id' is a caption for +# the row, other fields are handled by bench_func. +test_cases = [ + { + 'id': 'ssd -> ssd', + 'source': drv_file(ssd_source), + 'target': drv_file(ssd_target) + }, + { + 'id': 'ssd -> hdd', + 'source': drv_file(ssd_source), + 'target': drv_file(hdd_target) + }, + { + 'id': 'ssd -> nbd', + 'source': drv_file(ssd_source), + 'target': drv_nbd(nbd_ip, nbd_port) + }, +] + +# Test-envs are "columns" in benchmark resulting table, 'id is a caption for +# the column, other fields are handled by bench_func. +test_envs = [ + { + 'id': 'backup-1', + 'cmd': 'blockdev-backup', + 'qemu_binary': '/path-to-qemu-binary-1' + }, + { + 'id': 'backup-2', + 'cmd': 'blockdev-backup', + 'qemu_binary': '/path-to-qemu-binary-2' + }, + { + 'id': 'mirror', + 'cmd': 'blockdev-mirror', + 'qemu_binary': '/path-to-qemu-binary-1' + } +] + +result = simplebench.bench(bench_func, test_envs, test_cases, count=3) +print(results_to_text(result)) diff --git a/scripts/simplebench/bench_block_job.py b/scripts/simplebench/bench_block_job.py new file mode 100755 index 000000000..a403c35b0 --- /dev/null +++ b/scripts/simplebench/bench_block_job.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +# +# Benchmark block jobs +# +# Copyright (c) 2019 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + + +import sys +import os +import subprocess +import socket +import json + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) +from qemu.machine import QEMUMachine +from qemu.qmp import QMPConnectError +from qemu.aqmp import ConnectError + + +def bench_block_job(cmd, cmd_args, qemu_args): + """Benchmark block-job + + cmd -- qmp command to run block-job (like blockdev-backup) + cmd_args -- dict of qmp command arguments + qemu_args -- list of Qemu command line arguments, including path to Qemu + binary + + Returns {'seconds': int} on success and {'error': str} on failure, dict may + contain addional 'vm-log' field. Return value is compatible with + simplebench lib. + """ + + vm = QEMUMachine(qemu_args[0], args=qemu_args[1:]) + + try: + vm.launch() + except OSError as e: + return {'error': 'popen failed: ' + str(e)} + except (QMPConnectError, ConnectError, socket.timeout): + return {'error': 'qemu failed: ' + str(vm.get_log())} + + try: + res = vm.qmp(cmd, **cmd_args) + if res != {'return': {}}: + vm.shutdown() + return {'error': '"{}" command failed: {}'.format(cmd, str(res))} + + e = vm.event_wait('JOB_STATUS_CHANGE') + assert e['data']['status'] == 'created' + start_ms = e['timestamp']['seconds'] * 1000000 + \ + e['timestamp']['microseconds'] + + e = vm.events_wait((('BLOCK_JOB_READY', None), + ('BLOCK_JOB_COMPLETED', None), + ('BLOCK_JOB_FAILED', None)), timeout=True) + if e['event'] not in ('BLOCK_JOB_READY', 'BLOCK_JOB_COMPLETED'): + vm.shutdown() + return {'error': 'block-job failed: ' + str(e), + 'vm-log': vm.get_log()} + if 'error' in e['data']: + vm.shutdown() + return {'error': 'block-job failed: ' + e['data']['error'], + 'vm-log': vm.get_log()} + end_ms = e['timestamp']['seconds'] * 1000000 + \ + e['timestamp']['microseconds'] + finally: + vm.shutdown() + + return {'seconds': (end_ms - start_ms) / 1000000.0} + + +def get_image_size(path): + out = subprocess.run(['qemu-img', 'info', '--out=json', path], + stdout=subprocess.PIPE, check=True).stdout + return json.loads(out)['virtual-size'] + + +def get_blockdev_size(obj): + img = obj['filename'] if 'filename' in obj else obj['file']['filename'] + return get_image_size(img) + + +# Bench backup or mirror +def bench_block_copy(qemu_binary, cmd, cmd_options, source, target): + """Helper to run bench_block_job() for mirror or backup""" + assert cmd in ('blockdev-backup', 'blockdev-mirror') + + if target['driver'] == 'qcow2': + try: + os.remove(target['file']['filename']) + except OSError: + pass + + subprocess.run(['qemu-img', 'create', '-f', 'qcow2', + target['file']['filename'], + str(get_blockdev_size(source))], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, check=True) + + source['node-name'] = 'source' + target['node-name'] = 'target' + + cmd_options['job-id'] = 'job0' + cmd_options['device'] = 'source' + cmd_options['target'] = 'target' + cmd_options['sync'] = 'full' + + return bench_block_job(cmd, cmd_options, + [qemu_binary, + '-blockdev', json.dumps(source), + '-blockdev', json.dumps(target)]) + + +def drv_file(filename, o_direct=True): + node = {'driver': 'file', 'filename': filename} + if o_direct: + node['cache'] = {'direct': True} + node['aio'] = 'native' + + return node + + +def drv_nbd(host, port): + return {'driver': 'nbd', + 'server': {'type': 'inet', 'host': host, 'port': port}} + + +def drv_qcow2(file): + return {'driver': 'qcow2', 'file': file} + + +if __name__ == '__main__': + import sys + + if len(sys.argv) < 4: + print('USAGE: {} <qmp block-job command name> ' + '<json string of arguments for the command> ' + '<qemu binary path and arguments>'.format(sys.argv[0])) + exit(1) + + res = bench_block_job(sys.argv[1], json.loads(sys.argv[2]), sys.argv[3:]) + if 'seconds' in res: + print('{:.2f}'.format(res['seconds'])) + else: + print(res) diff --git a/scripts/simplebench/bench_prealloc.py b/scripts/simplebench/bench_prealloc.py new file mode 100755 index 000000000..85f588c59 --- /dev/null +++ b/scripts/simplebench/bench_prealloc.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# +# Benchmark preallocate filter +# +# Copyright (c) 2020 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + + +import sys +import os +import subprocess +import re +import json + +import simplebench +from results_to_text import results_to_text + + +def qemu_img_bench(args): + p = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + universal_newlines=True) + + if p.returncode == 0: + try: + m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout) + return {'seconds': float(m.group(1))} + except Exception: + return {'error': f'failed to parse qemu-img output: {p.stdout}'} + else: + return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'} + + +def bench_func(env, case): + fname = f"{case['dir']}/prealloc-test.qcow2" + try: + os.remove(fname) + except OSError: + pass + + subprocess.run([env['qemu-img-binary'], 'create', '-f', 'qcow2', fname, + '16G'], stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, check=True) + + args = [env['qemu-img-binary'], 'bench', '-c', str(case['count']), + '-d', '64', '-s', case['block-size'], '-t', 'none', '-n', '-w'] + if env['prealloc']: + args += ['--image-opts', + 'driver=qcow2,file.driver=preallocate,file.file.driver=file,' + f'file.file.filename={fname}'] + else: + args += ['-f', 'qcow2', fname] + + return qemu_img_bench(args) + + +def auto_count_bench_func(env, case): + case['count'] = 100 + while True: + res = bench_func(env, case) + if 'error' in res: + return res + + if res['seconds'] >= 1: + break + + case['count'] *= 10 + + if res['seconds'] < 5: + case['count'] = round(case['count'] * 5 / res['seconds']) + res = bench_func(env, case) + if 'error' in res: + return res + + res['iops'] = case['count'] / res['seconds'] + return res + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print(f'USAGE: {sys.argv[0]} <qemu-img binary> ' + 'DISK_NAME:DIR_PATH ...') + exit(1) + + qemu_img = sys.argv[1] + + envs = [ + { + 'id': 'no-prealloc', + 'qemu-img-binary': qemu_img, + 'prealloc': False + }, + { + 'id': 'prealloc', + 'qemu-img-binary': qemu_img, + 'prealloc': True + } + ] + + aligned_cases = [] + unaligned_cases = [] + + for disk in sys.argv[2:]: + name, path = disk.split(':') + aligned_cases.append({ + 'id': f'{name}, aligned sequential 16k', + 'block-size': '16k', + 'dir': path + }) + unaligned_cases.append({ + 'id': f'{name}, unaligned sequential 64k', + 'block-size': '16k', + 'dir': path + }) + + result = simplebench.bench(auto_count_bench_func, envs, + aligned_cases + unaligned_cases, count=5) + print(results_to_text(result)) + with open('results.json', 'w') as f: + json.dump(result, f, indent=4) diff --git a/scripts/simplebench/bench_write_req.py b/scripts/simplebench/bench_write_req.py new file mode 100755 index 000000000..da601ea2f --- /dev/null +++ b/scripts/simplebench/bench_write_req.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +# +# Test to compare performance of write requests for two qemu-img binary files. +# +# The idea of the test comes from intention to check the benefit of c8bb23cbdbe +# "qcow2: skip writing zero buffers to empty COW areas". +# +# Copyright (c) 2020 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + + +import sys +import os +import subprocess +import simplebench +from results_to_text import results_to_text + + +def bench_func(env, case): + """ Handle one "cell" of benchmarking table. """ + return bench_write_req(env['qemu_img'], env['image_name'], + case['block_size'], case['block_offset'], + case['cluster_size']) + + +def qemu_img_pipe(*args): + '''Run qemu-img and return its output''' + subp = subprocess.Popen(list(args), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True) + exitcode = subp.wait() + if exitcode < 0: + sys.stderr.write('qemu-img received signal %i: %s\n' + % (-exitcode, ' '.join(list(args)))) + return subp.communicate()[0] + + +def bench_write_req(qemu_img, image_name, block_size, block_offset, + cluster_size): + """Benchmark write requests + + The function creates a QCOW2 image with the given path/name. Then it runs + the 'qemu-img bench' command and makes series of write requests on the + image clusters. Finally, it returns the total time of the write operations + on the disk. + + qemu_img -- path to qemu_img executable file + image_name -- QCOW2 image name to create + block_size -- size of a block to write to clusters + block_offset -- offset of the block in clusters + cluster_size -- size of the image cluster + + Returns {'seconds': int} on success and {'error': str} on failure. + Return value is compatible with simplebench lib. + """ + + if not os.path.isfile(qemu_img): + print(f'File not found: {qemu_img}') + sys.exit(1) + + image_dir = os.path.dirname(os.path.abspath(image_name)) + if not os.path.isdir(image_dir): + print(f'Path not found: {image_name}') + sys.exit(1) + + image_size = 1024 * 1024 * 1024 + + args_create = [qemu_img, 'create', '-f', 'qcow2', '-o', + f'cluster_size={cluster_size}', + image_name, str(image_size)] + + count = int(image_size / cluster_size) - 1 + step = str(cluster_size) + + args_bench = [qemu_img, 'bench', '-w', '-n', '-t', 'none', '-c', + str(count), '-s', f'{block_size}', '-o', str(block_offset), + '-S', step, '-f', 'qcow2', image_name] + + try: + qemu_img_pipe(*args_create) + except OSError as e: + os.remove(image_name) + return {'error': 'qemu_img create failed: ' + str(e)} + + try: + ret = qemu_img_pipe(*args_bench) + except OSError as e: + os.remove(image_name) + return {'error': 'qemu_img bench failed: ' + str(e)} + + os.remove(image_name) + + if 'seconds' in ret: + ret_list = ret.split() + index = ret_list.index('seconds.') + return {'seconds': float(ret_list[index-1])} + else: + return {'error': 'qemu_img bench failed: ' + ret} + + +if __name__ == '__main__': + + if len(sys.argv) < 4: + program = os.path.basename(sys.argv[0]) + print(f'USAGE: {program} <path to qemu-img binary file> ' + '<path to another qemu-img to compare performance with> ' + '<full or relative name for QCOW2 image to create>') + exit(1) + + # Test-cases are "rows" in benchmark resulting table, 'id' is a caption + # for the row, other fields are handled by bench_func. + test_cases = [ + { + 'id': '<cluster front>', + 'block_size': 4096, + 'block_offset': 0, + 'cluster_size': 1048576 + }, + { + 'id': '<cluster middle>', + 'block_size': 4096, + 'block_offset': 524288, + 'cluster_size': 1048576 + }, + { + 'id': '<cross cluster>', + 'block_size': 1048576, + 'block_offset': 4096, + 'cluster_size': 1048576 + }, + { + 'id': '<cluster 64K>', + 'block_size': 4096, + 'block_offset': 0, + 'cluster_size': 65536 + }, + ] + + # Test-envs are "columns" in benchmark resulting table, 'id is a caption + # for the column, other fields are handled by bench_func. + # Set the paths below to desired values + test_envs = [ + { + 'id': '<qemu-img binary 1>', + 'qemu_img': f'{sys.argv[1]}', + 'image_name': f'{sys.argv[3]}' + }, + { + 'id': '<qemu-img binary 2>', + 'qemu_img': f'{sys.argv[2]}', + 'image_name': f'{sys.argv[3]}' + }, + ] + + result = simplebench.bench(bench_func, test_envs, test_cases, count=3, + initial_run=False) + print(results_to_text(result)) diff --git a/scripts/simplebench/img_bench_templater.py b/scripts/simplebench/img_bench_templater.py new file mode 100755 index 000000000..f8e1540ad --- /dev/null +++ b/scripts/simplebench/img_bench_templater.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# +# Process img-bench test templates +# +# Copyright (c) 2021 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + + +import sys +import subprocess +import re +import json + +import simplebench +from results_to_text import results_to_text +from table_templater import Templater + + +def bench_func(env, case): + test = templater.gen(env['data'], case['data']) + + p = subprocess.run(test, shell=True, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, universal_newlines=True) + + if p.returncode == 0: + try: + m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout) + return {'seconds': float(m.group(1))} + except Exception: + return {'error': f'failed to parse qemu-img output: {p.stdout}'} + else: + return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'} + + +if __name__ == '__main__': + if len(sys.argv) > 1: + print(""" +Usage: img_bench_templater.py < path/to/test-template.sh + +This script generates performance tests from a test template (example below), +runs them, and displays the results in a table. The template is read from +stdin. It must be written in bash and end with a `qemu-img bench` invocation +(whose result is parsed to get the test instance’s result). + +Use the following syntax in the template to create the various different test +instances: + + column templating: {var1|var2|...} - test will use different values in + different columns. You may use several {} constructions in the test, in this + case product of all choice-sets will be used. + + row templating: [var1|var2|...] - similar thing to define rows (test-cases) + +Test template example: + +Assume you want to compare two qemu-img binaries, called qemu-img-old and +qemu-img-new in your build directory in two test-cases with 4K writes and 64K +writes. The template may look like this: + +qemu_img=/path/to/qemu/build/qemu-img-{old|new} +$qemu_img create -f qcow2 /ssd/x.qcow2 1G +$qemu_img bench -c 100 -d 8 [-s 4K|-s 64K] -w -t none -n /ssd/x.qcow2 + +When passing this to stdin of img_bench_templater.py, the resulting comparison +table will contain two columns (for two binaries) and two rows (for two +test-cases). + +In addition to displaying the results, script also stores results in JSON +format into results.json file in current directory. +""") + sys.exit() + + templater = Templater(sys.stdin.read()) + + envs = [{'id': ' / '.join(x), 'data': x} for x in templater.columns] + cases = [{'id': ' / '.join(x), 'data': x} for x in templater.rows] + + result = simplebench.bench(bench_func, envs, cases, count=5, + initial_run=False) + print(results_to_text(result)) + with open('results.json', 'w') as f: + json.dump(result, f, indent=4) diff --git a/scripts/simplebench/results_to_text.py b/scripts/simplebench/results_to_text.py new file mode 100755 index 000000000..d561e5e2d --- /dev/null +++ b/scripts/simplebench/results_to_text.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# +# Simple benchmarking framework +# +# Copyright (c) 2019 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import math +import tabulate + +# We want leading whitespace for difference row cells (see below) +tabulate.PRESERVE_WHITESPACE = True + + +def format_value(x, stdev): + stdev_pr = stdev / x * 100 + if stdev_pr < 1.5: + # don't care too much + return f'{x:.2g}' + else: + return f'{x:.2g} ± {math.ceil(stdev_pr)}%' + + +def result_to_text(result): + """Return text representation of bench_one() returned dict.""" + if 'average' in result: + s = format_value(result['average'], result['stdev']) + if 'n-failed' in result: + s += '\n({} failed)'.format(result['n-failed']) + return s + else: + return 'FAILED' + + +def results_dimension(results): + dim = None + for case in results['cases']: + for env in results['envs']: + res = results['tab'][case['id']][env['id']] + if dim is None: + dim = res['dimension'] + else: + assert dim == res['dimension'] + + assert dim in ('iops', 'seconds') + + return dim + + +def results_to_text(results): + """Return text representation of bench() returned dict.""" + n_columns = len(results['envs']) + named_columns = n_columns > 2 + dim = results_dimension(results) + tab = [] + + if named_columns: + # Environment columns are named A, B, ... + tab.append([''] + [chr(ord('A') + i) for i in range(n_columns)]) + + tab.append([''] + [c['id'] for c in results['envs']]) + + for case in results['cases']: + row = [case['id']] + case_results = results['tab'][case['id']] + for env in results['envs']: + res = case_results[env['id']] + row.append(result_to_text(res)) + tab.append(row) + + # Add row of difference between columns. For each column starting from + # B we calculate difference with all previous columns. + row = ['', ''] # case name and first column + for i in range(1, n_columns): + cell = '' + env = results['envs'][i] + res = case_results[env['id']] + + if 'average' not in res: + # Failed result + row.append(cell) + continue + + for j in range(0, i): + env_j = results['envs'][j] + res_j = case_results[env_j['id']] + cell += ' ' + + if 'average' not in res_j: + # Failed result + cell += '--' + continue + + col_j = tab[0][j + 1] if named_columns else '' + diff_pr = round((res['average'] - res_j['average']) / + res_j['average'] * 100) + cell += f' {col_j}{diff_pr:+}%' + row.append(cell) + tab.append(row) + + return f'All results are in {dim}\n\n' + tabulate.tabulate(tab) + + +if __name__ == '__main__': + import sys + import json + + if len(sys.argv) < 2: + print(f'USAGE: {sys.argv[0]} results.json') + exit(1) + + with open(sys.argv[1]) as f: + print(results_to_text(json.load(f))) diff --git a/scripts/simplebench/simplebench.py b/scripts/simplebench/simplebench.py new file mode 100644 index 000000000..8efca2af9 --- /dev/null +++ b/scripts/simplebench/simplebench.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# +# Simple benchmarking framework +# +# Copyright (c) 2019 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import statistics +import subprocess +import time + + +def do_drop_caches(): + subprocess.run('sync; echo 3 > /proc/sys/vm/drop_caches', shell=True, + check=True) + + +def bench_one(test_func, test_env, test_case, count=5, initial_run=True, + slow_limit=100, drop_caches=False): + """Benchmark one test-case + + test_func -- benchmarking function with prototype + test_func(env, case), which takes test_env and test_case + arguments and on success returns dict with 'seconds' or + 'iops' (or both) fields, specifying the benchmark result. + If both 'iops' and 'seconds' provided, the 'iops' is + considered the main, and 'seconds' is just an additional + info. On failure test_func should return {'error': str}. + Returned dict may contain any other additional fields. + test_env -- test environment - opaque first argument for test_func + test_case -- test case - opaque second argument for test_func + count -- how many times to call test_func, to calculate average + initial_run -- do initial run of test_func, which don't get into result + slow_limit -- stop at slow run (that exceedes the slow_limit by seconds). + (initial run is not measured) + drop_caches -- drop caches before each run + + Returns dict with the following fields: + 'runs': list of test_func results + 'dimension': dimension of results, may be 'seconds' or 'iops' + 'average': average value (iops or seconds) per run (exists only if at + least one run succeeded) + 'stdev': standard deviation of results + (exists only if at least one run succeeded) + 'n-failed': number of failed runs (exists only if at least one run + failed) + """ + if initial_run: + print(' #initial run:') + do_drop_caches() + print(' ', test_func(test_env, test_case)) + + runs = [] + for i in range(count): + t = time.time() + + print(' #run {}'.format(i+1)) + do_drop_caches() + res = test_func(test_env, test_case) + print(' ', res) + runs.append(res) + + if time.time() - t > slow_limit: + print(' - run is too slow, stop here') + break + + count = len(runs) + + result = {'runs': runs} + + succeeded = [r for r in runs if ('seconds' in r or 'iops' in r)] + if succeeded: + if 'iops' in succeeded[0]: + assert all('iops' in r for r in succeeded) + dim = 'iops' + else: + assert all('seconds' in r for r in succeeded) + assert all('iops' not in r for r in succeeded) + dim = 'seconds' + result['dimension'] = dim + result['average'] = statistics.mean(r[dim] for r in succeeded) + if len(succeeded) == 1: + result['stdev'] = 0 + else: + result['stdev'] = statistics.stdev(r[dim] for r in succeeded) + + if len(succeeded) < count: + result['n-failed'] = count - len(succeeded) + + return result + + +def bench(test_func, test_envs, test_cases, *args, **vargs): + """Fill benchmark table + + test_func -- benchmarking function, see bench_one for description + test_envs -- list of test environments, see bench_one + test_cases -- list of test cases, see bench_one + args, vargs -- additional arguments for bench_one + + Returns dict with the following fields: + 'envs': test_envs + 'cases': test_cases + 'tab': filled 2D array, where cell [i][j] is bench_one result for + test_cases[i] for test_envs[j] (i.e., rows are test cases and + columns are test environments) + """ + tab = {} + results = { + 'envs': test_envs, + 'cases': test_cases, + 'tab': tab + } + n = 1 + n_tests = len(test_envs) * len(test_cases) + for env in test_envs: + for case in test_cases: + print('Testing {}/{}: {} :: {}'.format(n, n_tests, + env['id'], case['id'])) + if case['id'] not in tab: + tab[case['id']] = {} + tab[case['id']][env['id']] = bench_one(test_func, env, case, + *args, **vargs) + n += 1 + + print('Done') + return results diff --git a/scripts/simplebench/table_templater.py b/scripts/simplebench/table_templater.py new file mode 100644 index 000000000..950f3b302 --- /dev/null +++ b/scripts/simplebench/table_templater.py @@ -0,0 +1,62 @@ +# Parser for test templates +# +# Copyright (c) 2021 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import itertools +from lark import Lark + +grammar = """ +start: ( text | column_switch | row_switch )+ + +column_switch: "{" text ["|" text]+ "}" +row_switch: "[" text ["|" text]+ "]" +text: /[^|{}\[\]]+/ +""" + +parser = Lark(grammar) + +class Templater: + def __init__(self, template): + self.tree = parser.parse(template) + + c_switches = [] + r_switches = [] + for x in self.tree.children: + if x.data == 'column_switch': + c_switches.append([el.children[0].value for el in x.children]) + elif x.data == 'row_switch': + r_switches.append([el.children[0].value for el in x.children]) + + self.columns = list(itertools.product(*c_switches)) + self.rows = list(itertools.product(*r_switches)) + + def gen(self, column, row): + i = 0 + j = 0 + result = [] + + for x in self.tree.children: + if x.data == 'text': + result.append(x.children[0].value) + elif x.data == 'column_switch': + result.append(column[i]) + i += 1 + elif x.data == 'row_switch': + result.append(row[j]) + j += 1 + + return ''.join(result) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py new file mode 100755 index 000000000..1f6d1ae1f --- /dev/null +++ b/scripts/simpletrace.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +# +# Pretty-printer for simple trace backend binary trace files +# +# Copyright IBM, Corp. 2010 +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. +# +# For help see docs/devel/tracing.rst + +import struct +import inspect +from tracetool import read_events, Event +from tracetool.backend.simple import is_string + +header_event_id = 0xffffffffffffffff +header_magic = 0xf2b177cb0aa429b4 +dropped_event_id = 0xfffffffffffffffe + +record_type_mapping = 0 +record_type_event = 1 + +log_header_fmt = '=QQQ' +rec_header_fmt = '=QQII' + +def read_header(fobj, hfmt): + '''Read a trace record header''' + hlen = struct.calcsize(hfmt) + hdr = fobj.read(hlen) + if len(hdr) != hlen: + return None + return struct.unpack(hfmt, hdr) + +def get_record(edict, idtoname, rechdr, fobj): + """Deserialize a trace record from a file into a tuple + (name, timestamp, pid, arg1, ..., arg6).""" + if rechdr is None: + return None + if rechdr[0] != dropped_event_id: + event_id = rechdr[0] + name = idtoname[event_id] + rec = (name, rechdr[1], rechdr[3]) + try: + event = edict[name] + except KeyError as e: + import sys + sys.stderr.write('%s event is logged but is not declared ' \ + 'in the trace events file, try using ' \ + 'trace-events-all instead.\n' % str(e)) + sys.exit(1) + + for type, name in event.args: + if is_string(type): + l = fobj.read(4) + (len,) = struct.unpack('=L', l) + s = fobj.read(len) + rec = rec + (s,) + else: + (value,) = struct.unpack('=Q', fobj.read(8)) + rec = rec + (value,) + else: + rec = ("dropped", rechdr[1], rechdr[3]) + (value,) = struct.unpack('=Q', fobj.read(8)) + rec = rec + (value,) + return rec + +def get_mapping(fobj): + (event_id, ) = struct.unpack('=Q', fobj.read(8)) + (len, ) = struct.unpack('=L', fobj.read(4)) + name = fobj.read(len).decode() + + return (event_id, name) + +def read_record(edict, idtoname, fobj): + """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6).""" + rechdr = read_header(fobj, rec_header_fmt) + return get_record(edict, idtoname, rechdr, fobj) + +def read_trace_header(fobj): + """Read and verify trace file header""" + header = read_header(fobj, log_header_fmt) + if header is None: + raise ValueError('Not a valid trace file!') + if header[0] != header_event_id: + raise ValueError('Not a valid trace file, header id %d != %d' % + (header[0], header_event_id)) + if header[1] != header_magic: + raise ValueError('Not a valid trace file, header magic %d != %d' % + (header[1], header_magic)) + + log_version = header[2] + if log_version not in [0, 2, 3, 4]: + raise ValueError('Unknown version of tracelog format!') + if log_version != 4: + raise ValueError('Log format %d not supported with this QEMU release!' + % log_version) + +def read_trace_records(edict, idtoname, fobj): + """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6). + + Note that `idtoname` is modified if the file contains mapping records. + + Args: + edict (str -> Event): events dict, indexed by name + idtoname (int -> str): event names dict, indexed by event ID + fobj (file): input file + + """ + while True: + t = fobj.read(8) + if len(t) == 0: + break + + (rectype, ) = struct.unpack('=Q', t) + if rectype == record_type_mapping: + event_id, name = get_mapping(fobj) + idtoname[event_id] = name + else: + rec = read_record(edict, idtoname, fobj) + + yield rec + +class Analyzer(object): + """A trace file analyzer which processes trace records. + + An analyzer can be passed to run() or process(). The begin() method is + invoked, then each trace record is processed, and finally the end() method + is invoked. + + If a method matching a trace event name exists, it is invoked to process + that trace record. Otherwise the catchall() method is invoked. + + Example: + The following method handles the runstate_set(int new_state) trace event:: + + def runstate_set(self, new_state): + ... + + The method can also take a timestamp argument before the trace event + arguments:: + + def runstate_set(self, timestamp, new_state): + ... + + Timestamps have the uint64_t type and are in nanoseconds. + + The pid can be included in addition to the timestamp and is useful when + dealing with traces from multiple processes:: + + def runstate_set(self, timestamp, pid, new_state): + ... + """ + + def begin(self): + """Called at the start of the trace.""" + pass + + def catchall(self, event, rec): + """Called if no specific method for processing a trace event has been found.""" + pass + + def end(self): + """Called at the end of the trace.""" + pass + +def process(events, log, analyzer, read_header=True): + """Invoke an analyzer on each event in a log.""" + if isinstance(events, str): + events = read_events(open(events, 'r'), events) + if isinstance(log, str): + log = open(log, 'rb') + + if read_header: + read_trace_header(log) + + frameinfo = inspect.getframeinfo(inspect.currentframe()) + dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)", + frameinfo.lineno + 1, frameinfo.filename) + edict = {"dropped": dropped_event} + idtoname = {dropped_event_id: "dropped"} + + for event in events: + edict[event.name] = event + + # If there is no header assume event ID mapping matches events list + if not read_header: + for event_id, event in enumerate(events): + idtoname[event_id] = event.name + + def build_fn(analyzer, event): + if isinstance(event, str): + return analyzer.catchall + + fn = getattr(analyzer, event.name, None) + if fn is None: + return analyzer.catchall + + event_argcount = len(event.args) + fn_argcount = len(inspect.getargspec(fn)[0]) - 1 + if fn_argcount == event_argcount + 1: + # Include timestamp as first argument + return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount])) + elif fn_argcount == event_argcount + 2: + # Include timestamp and pid + return lambda _, rec: fn(*rec[1:3 + event_argcount]) + else: + # Just arguments, no timestamp or pid + return lambda _, rec: fn(*rec[3:3 + event_argcount]) + + analyzer.begin() + fn_cache = {} + for rec in read_trace_records(edict, idtoname, log): + event_num = rec[0] + event = edict[event_num] + if event_num not in fn_cache: + fn_cache[event_num] = build_fn(analyzer, event) + fn_cache[event_num](event, rec) + analyzer.end() + +def run(analyzer): + """Execute an analyzer on a trace file given on the command-line. + + This function is useful as a driver for simple analysis scripts. More + advanced scripts will want to call process() instead.""" + import sys + + read_header = True + if len(sys.argv) == 4 and sys.argv[1] == '--no-header': + read_header = False + del sys.argv[1] + elif len(sys.argv) != 3: + sys.stderr.write('usage: %s [--no-header] <trace-events> ' \ + '<trace-file>\n' % sys.argv[0]) + sys.exit(1) + + events = read_events(open(sys.argv[1], 'r'), sys.argv[1]) + process(events, sys.argv[2], analyzer, read_header=read_header) + +if __name__ == '__main__': + class Formatter(Analyzer): + def __init__(self): + self.last_timestamp = None + + def catchall(self, event, rec): + timestamp = rec[1] + if self.last_timestamp is None: + self.last_timestamp = timestamp + delta_ns = timestamp - self.last_timestamp + self.last_timestamp = timestamp + + fields = [event.name, '%0.3f' % (delta_ns / 1000.0), + 'pid=%d' % rec[2]] + i = 3 + for type, name in event.args: + if is_string(type): + fields.append('%s=%s' % (name, rec[i])) + else: + fields.append('%s=0x%x' % (name, rec[i])) + i += 1 + print(' '.join(fields)) + + run(Formatter()) diff --git a/scripts/switch-timer-api b/scripts/switch-timer-api new file mode 100755 index 000000000..41736d11d --- /dev/null +++ b/scripts/switch-timer-api @@ -0,0 +1,178 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; +use FindBin; + +my @legacy = qw(qemu_clock_ptr qemu_get_clock_ns qemu_get_clock_ms qemu_register_clock_reset_notifier qemu_unregister_clock_reset_notifier qemu_new_timer qemu_free_timer qemu_del_timer qemu_mod_timer_ns qemu_mod_timer qemu_run_timers qemu_new_timer_ns qemu_new_timer_us qemu_new_timer_ms); +my $legacyre = '\b('.join('|', @legacy).')\b'; +my $option_git; +my $option_dryrun; +my $option_quiet; +my $option_rtc; +my $suffix=".tmp.$$"; +my @files; +my $getfiles = 'git grep -l -E \'\b((host|rt|vm|rtc)_clock\b|qemu_\w*timer)\' | egrep \'\.[ch]$\' | egrep -v \'qemu-timer\.c$|include/qemu/timer\.h$\''; + +sub Syntax +{ + print STDERR <<STOP; +Usage: $FindBin::Script [options] FILE ... + +Translate each FILE to the new QEMU timer API. If no files +are passed, a reasonable guess is taken. + +Options: + -q, --quiet Do not show warnings etc + -d, --dry-run Do a dry run + -g, --git Generate a git commit for each change + -r, --rtc Only fix up rtc usage + -h, --help Print this message + +STOP +return; +} + +sub ParseOptions +{ + if (!GetOptions ( + "dry-run|d" => \$option_dryrun, + "git|g" => \$option_git, + "quiet|q" => \$option_quiet, + "rtc|r" => \$option_rtc, + "help|h" => sub { Syntax(); exit(0); } + )) + { + Syntax(); + die "Bad options"; + } + + if ($#ARGV >=0) + { + @files = @ARGV; + } + else + { + @files = split(/\s+/, `$getfiles`); + } + + foreach my $file (@files) + { + die "Cannot find $file" unless (-f $file && -r $file); + } +} + +sub DoWarn +{ + my $text = shift @_; + my $line = shift @_; + return if ($option_quiet); + chomp ($line); + print STDERR "$text\n"; + print STDERR "$line\n\n"; +} + +sub Process +{ + my $ifn = shift @_; + my $ofn = $ifn.$suffix; + + my $intext; + my $outtext; + my $linenum = 0; + + open my $input, "<", $ifn || die "Cannot open $ifn for read: $!"; + + while (<$input>) + { + my $line = $_; + $intext .= $line; + $linenum++; + + # fix the specific uses + unless ($option_rtc) + { + $line =~ s/\bqemu_new_timer(_[num]s)\s*\((vm_|rt_|host_)clock\b/timer_new$1(XXX_$2clock/g; + $line =~ s/\bqemu_new_timer\s*\((vm_|rt_|host_)clock\b/timer_new(XXX_$1clock/g; + $line =~ s/\bqemu_get_clock(_[num]s)\s*\((vm_|rt_|host_)clock\b/qemu_clock_get$1(XXX_$2clock/g; + } + + # rtc is different + $line =~ s/\bqemu_new_timer(_[num]s)\s*\(rtc_clock\b/timer_new$1(rtc_clock/g; + $line =~ s/\bqemu_new_timer\s*\(rtc_clock\b/timer_new(rtc_clock/g; + $line =~ s/\bqemu_get_clock(_[num]s)\s*\(rtc_clock\b/qemu_clock_get$1(rtc_clock/g; + $line =~ s/\bqemu_register_clock_reset_notifier\s*\(rtc_clock\b/qemu_register_clock_reset_notifier(qemu_clock_ptr(rtc_clock)/g; + + unless ($option_rtc) + { + # fix up comments + $line =~ s/\b(vm_|rt_|host_)clock\b/XXX_$1clock/g if ($line =~ m,^[/ ]+\*,); + + # spurious fprintf error reporting + $line =~ s/: qemu_new_timer_ns failed/: timer_new_ns failed/g; + + # these have just changed name + $line =~ s/\bqemu_mod_timer\b/timer_mod/g; + $line =~ s/\bqemu_mod_timer_(ns|us|ms)\b/timer_mod_$1/g; + $line =~ s/\bqemu_free_timer\b/timer_free/g; + $line =~ s/\bqemu_del_timer\b/timer_del/g; + } + + # fix up rtc_clock + $line =~ s/QEMUClock \*rtc_clock;/QEMUClockType rtc_clock;/g; + $line =~ s/\brtc_clock = (vm_|rt_|host_)clock\b/rtc_clock = XXX_$1clock/g; + + unless ($option_rtc) + { + # replace any more general uses + $line =~ s/\b(vm_|rt_|host_)clock\b/qemu_clock_ptr(XXX_$1clock)/g; + } + + # fix up the place holders + $line =~ s/\bXXX_vm_clock\b/QEMU_CLOCK_VIRTUAL/g; + $line =~ s/\bXXX_rt_clock\b/QEMU_CLOCK_REALTIME/g; + $line =~ s/\bXXX_host_clock\b/QEMU_CLOCK_HOST/g; + + unless ($option_rtc) + { + DoWarn("$ifn:$linenum WARNING: timer $1 not fixed up", $line) if ($line =~ /\b((vm_|rt_|host_)clock)\b/); + DoWarn("$ifn:$linenum WARNING: function $1 not fixed up", $line) if ($line =~ /\b(qemu_new_timer\w+)\b/); + DoWarn("$ifn:$linenum WARNING: legacy function $1 remains", $line) if ($line =~ /$legacyre/o); + } + + $outtext .= $line; + } + + close $input; + + if ($intext ne $outtext) + { + print STDERR "Patching $ifn\n" unless ($option_quiet); + unless ($option_dryrun) + { + open my $output, ">", $ofn || die "Cannot open $ofn for write: $!"; + print $output $outtext; + close $output; + rename ($ofn, $ifn) || die "Cannot rename temp file to $ifn: $!"; + return 1; + } + } + return 0; +} + +sub DoCommit +{ + my $file = shift @_; + open (my $git, "| git commit -F - $file") || die "Cannot run git commit on $file: $!"; + print $git "timers api: use new timer api in $file\n\nConvert $file to use new timer API.\nThis is an automated commit made by scripts/switch-timer-api\n"; + close ($git); +} + +ParseOptions; + +foreach my $file (@files) +{ + my $changed = Process ($file); + DoCommit($file) if ($changed && $option_git); +} diff --git a/scripts/tap-driver.pl b/scripts/tap-driver.pl new file mode 100755 index 000000000..b1d3880c5 --- /dev/null +++ b/scripts/tap-driver.pl @@ -0,0 +1,379 @@ +#! /usr/bin/env perl +# Copyright (C) 2011-2013 Free Software Foundation, Inc. +# Copyright (C) 2018 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# ---------------------------------- # +# Imports, static data, and setup. # +# ---------------------------------- # + +use warnings FATAL => 'all'; +use strict; +use Getopt::Long (); +use TAP::Parser; +use Term::ANSIColor qw(:constants); + +my $ME = "tap-driver.pl"; +my $VERSION = "2018-11-30"; + +my $USAGE = <<'END'; +Usage: + tap-driver [--test-name=TEST] [--color={always|never|auto}] + [--verbose] [--show-failures-only] +END + +my $HELP = "$ME: TAP-aware test driver for QEMU testsuite harness." . + "\n" . $USAGE; + +# It's important that NO_PLAN evaluates "false" as a boolean. +use constant NO_PLAN => 0; +use constant EARLY_PLAN => 1; +use constant LATE_PLAN => 2; + +use constant DIAG_STRING => "#"; + +# ------------------- # +# Global variables. # +# ------------------- # + +my $testno = 0; # Number of test results seen so far. +my $bailed_out = 0; # Whether a "Bail out!" directive has been seen. +my $failed = 0; # Final exit code + +# Whether the TAP plan has been seen or not, and if yes, which kind +# it is ("early" is seen before any test result, "late" otherwise). +my $plan_seen = NO_PLAN; + +# ----------------- # +# Option parsing. # +# ----------------- # + +my %cfg = ( + "color" => 0, + "verbose" => 0, + "show-failures-only" => 0, +); + +my $color = "auto"; +my $test_name = undef; + +# Perl's Getopt::Long allows options to take optional arguments after a space. +# Prevent --color by itself from consuming other arguments +foreach (@ARGV) { + if ($_ eq "--color" || $_ eq "-color") { + $_ = "--color=$color"; + } +} + +Getopt::Long::GetOptions + ( + 'help' => sub { print $HELP; exit 0; }, + 'version' => sub { print "$ME $VERSION\n"; exit 0; }, + 'test-name=s' => \$test_name, + 'color=s' => \$color, + 'show-failures-only' => sub { $cfg{"show-failures-only"} = 1; }, + 'verbose' => sub { $cfg{"verbose"} = 1; }, + ) or exit 1; + +if ($color =~ /^always$/i) { + $cfg{'color'} = 1; +} elsif ($color =~ /^never$/i) { + $cfg{'color'} = 0; +} elsif ($color =~ /^auto$/i) { + $cfg{'color'} = (-t STDOUT); +} else { + die "Invalid color mode: $color\n"; +} + +# ------------- # +# Prototypes. # +# ------------- # + +sub colored ($$); +sub decorate_result ($); +sub extract_tap_comment ($); +sub handle_tap_bailout ($); +sub handle_tap_plan ($); +sub handle_tap_result ($); +sub is_null_string ($); +sub main (); +sub report ($;$); +sub stringify_result_obj ($); +sub testsuite_error ($); + +# -------------- # +# Subroutines. # +# -------------- # + +# If the given string is undefined or empty, return true, otherwise +# return false. This function is useful to avoid pitfalls like: +# if ($message) { print "$message\n"; } +# which wouldn't print anything if $message is the literal "0". +sub is_null_string ($) +{ + my $str = shift; + return ! (defined $str and length $str); +} + +sub stringify_result_obj ($) +{ + my $result_obj = shift; + if ($result_obj->is_unplanned || $result_obj->number != $testno) + { + return "ERROR"; + } + elsif ($plan_seen == LATE_PLAN) + { + return "ERROR"; + } + elsif (!$result_obj->directive) + { + return $result_obj->is_ok ? "PASS" : "FAIL"; + } + elsif ($result_obj->has_todo) + { + return $result_obj->is_actual_ok ? "XPASS" : "XFAIL"; + } + elsif ($result_obj->has_skip) + { + return $result_obj->is_ok ? "SKIP" : "FAIL"; + } + die "$ME: INTERNAL ERROR"; # NOTREACHED +} + +sub colored ($$) +{ + my ($color_string, $text) = @_; + return $color_string . $text . RESET; +} + +sub decorate_result ($) +{ + my $result = shift; + return $result unless $cfg{"color"}; + my %color_for_result = + ( + "ERROR" => BOLD.MAGENTA, + "PASS" => GREEN, + "XPASS" => BOLD.YELLOW, + "FAIL" => BOLD.RED, + "XFAIL" => YELLOW, + "SKIP" => BLUE, + ); + if (my $color = $color_for_result{$result}) + { + return colored ($color, $result); + } + else + { + return $result; # Don't colorize unknown stuff. + } +} + +sub report ($;$) +{ + my ($msg, $result, $explanation) = (undef, @_); + if ($result =~ /^(?:X?(?:PASS|FAIL)|SKIP|ERROR)/) + { + # Output on console might be colorized. + $msg = decorate_result($result); + if ($result =~ /^(?:PASS|XFAIL|SKIP)/) + { + return if $cfg{"show-failures-only"}; + } + else + { + $failed = 1; + } + } + elsif ($result eq "#") + { + $msg = " "; + } + else + { + die "$ME: INTERNAL ERROR"; # NOTREACHED + } + $msg .= " $explanation" if defined $explanation; + print $msg . "\n"; +} + +sub testsuite_error ($) +{ + report "ERROR", "$test_name - $_[0]"; +} + +sub handle_tap_result ($) +{ + $testno++; + my $result_obj = shift; + + my $test_result = stringify_result_obj $result_obj; + my $string = $result_obj->number; + + my $description = $result_obj->description; + $string .= " $test_name" unless is_null_string $test_name; + $string .= " $description" unless is_null_string $description; + + if ($plan_seen == LATE_PLAN) + { + $string .= " # AFTER LATE PLAN"; + } + elsif ($result_obj->is_unplanned) + { + $string .= " # UNPLANNED"; + } + elsif ($result_obj->number != $testno) + { + $string .= " # OUT-OF-ORDER (expecting $testno)"; + } + elsif (my $directive = $result_obj->directive) + { + $string .= " # $directive"; + my $explanation = $result_obj->explanation; + $string .= " $explanation" + unless is_null_string $explanation; + } + + report $test_result, $string; +} + +sub handle_tap_plan ($) +{ + my $plan = shift; + if ($plan_seen) + { + # Error, only one plan per stream is acceptable. + testsuite_error "multiple test plans"; + return; + } + # The TAP plan can come before or after *all* the TAP results; we speak + # respectively of an "early" or a "late" plan. If we see the plan line + # after at least one TAP result has been seen, assume we have a late + # plan; in this case, any further test result seen after the plan will + # be flagged as an error. + $plan_seen = ($testno >= 1 ? LATE_PLAN : EARLY_PLAN); + # If $testno > 0, we have an error ("too many tests run") that will be + # automatically dealt with later, so don't worry about it here. If + # $plan_seen is true, we have an error due to a repeated plan, and that + # has already been dealt with above. Otherwise, we have a valid "plan + # with SKIP" specification, and should report it as a particular kind + # of SKIP result. + if ($plan->directive && $testno == 0) + { + my $explanation = is_null_string ($plan->explanation) ? + undef : "- " . $plan->explanation; + report "SKIP", $explanation; + } +} + +sub handle_tap_bailout ($) +{ + my ($bailout, $msg) = ($_[0], "Bail out!"); + $bailed_out = 1; + $msg .= " " . $bailout->explanation + unless is_null_string $bailout->explanation; + testsuite_error $msg; +} + +sub extract_tap_comment ($) +{ + my $line = shift; + if (index ($line, DIAG_STRING) == 0) + { + # Strip leading `DIAG_STRING' from `$line'. + $line = substr ($line, length (DIAG_STRING)); + # And strip any leading and trailing whitespace left. + $line =~ s/(?:^\s*|\s*$)//g; + # Return what is left (if any). + return $line; + } + return ""; +} + +sub main () +{ + my $iterator = TAP::Parser::Iterator::Stream->new(\*STDIN); + my $parser = TAP::Parser->new ({iterator => $iterator }); + + STDOUT->autoflush(1); + while (defined (my $cur = $parser->next)) + { + # Parsing of TAP input should stop after a "Bail out!" directive. + next if $bailed_out; + + if ($cur->is_plan) + { + handle_tap_plan ($cur); + } + elsif ($cur->is_test) + { + handle_tap_result ($cur); + } + elsif ($cur->is_bailout) + { + handle_tap_bailout ($cur); + } + elsif ($cfg{"verbose"}) + { + my $comment = extract_tap_comment ($cur->raw); + report "#", "$comment" if length $comment; + } + } + # A "Bail out!" directive should cause us to ignore any following TAP + # error. + if (!$bailed_out) + { + if (!$plan_seen) + { + testsuite_error "missing test plan"; + } + elsif ($parser->tests_planned != $parser->tests_run) + { + my ($planned, $run) = ($parser->tests_planned, $parser->tests_run); + my $bad_amount = $run > $planned ? "many" : "few"; + testsuite_error (sprintf "too %s tests run (expected %d, got %d)", + $bad_amount, $planned, $run); + } + } +} + +# ----------- # +# Main code. # +# ----------- # + +main; +exit($failed); + +# Local Variables: +# perl-indent-level: 2 +# perl-continued-statement-offset: 2 +# perl-continued-brace-offset: 0 +# perl-brace-offset: 0 +# perl-brace-imaginary-offset: 0 +# perl-label-offset: -2 +# cperl-indent-level: 2 +# cperl-brace-offset: 0 +# cperl-continued-brace-offset: 0 +# cperl-label-offset: -2 +# cperl-extra-newline-before-brace: t +# cperl-merge-trailing-else: nil +# cperl-continued-statement-offset: 2 +# End: diff --git a/scripts/tap-merge.pl b/scripts/tap-merge.pl new file mode 100755 index 000000000..10ccf57bb --- /dev/null +++ b/scripts/tap-merge.pl @@ -0,0 +1,111 @@ +#! /usr/bin/env perl +# Copyright (C) 2018 Red Hat, Inc. +# +# Author: Paolo Bonzini <pbonzini@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +# ---------------------------------- # +# Imports, static data, and setup. # +# ---------------------------------- # + +use warnings FATAL => 'all'; +use strict; +use Getopt::Long (); +use TAP::Parser; + +my $ME = "tap-merge.pl"; +my $VERSION = "2018-11-30"; + +my $HELP = "$ME: merge multiple TAP inputs from stdin."; + +use constant DIAG_STRING => "#"; + +# ----------------- # +# Option parsing. # +# ----------------- # + +Getopt::Long::GetOptions + ( + 'help' => sub { print $HELP; exit 0; }, + 'version' => sub { print "$ME $VERSION\n"; exit 0; }, + ); + +# -------------- # +# Subroutines. # +# -------------- # + +sub main () +{ + my $iterator = TAP::Parser::Iterator::Stream->new(\*STDIN); + my $parser = TAP::Parser->new ({iterator => $iterator }); + my $testno = 0; # Number of test results seen so far. + my $bailed_out = 0; # Whether a "Bail out!" directive has been seen. + + STDOUT->autoflush(1); + while (defined (my $cur = $parser->next)) + { + if ($cur->is_bailout) + { + $bailed_out = 1; + print DIAG_STRING . " " . $cur->as_string . "\n"; + next; + } + elsif ($cur->is_plan) + { + $bailed_out = 0; + next; + } + elsif ($cur->is_test) + { + $bailed_out = 0 if $cur->number == 1; + $testno++; + $cur = TAP::Parser::Result::Test->new({ + ok => $cur->ok, + test_num => $testno, + directive => $cur->directive, + explanation => $cur->explanation, + description => $cur->description + }); + } + elsif ($cur->is_version) + { + next if $testno > 0; + } + print $cur->as_string . "\n" unless $bailed_out; + } + print "1..$testno\n"; +} + +# ----------- # +# Main code. # +# ----------- # + +main; + +# Local Variables: +# perl-indent-level: 2 +# perl-continued-statement-offset: 2 +# perl-continued-brace-offset: 0 +# perl-brace-offset: 0 +# perl-brace-imaginary-offset: 0 +# perl-label-offset: -2 +# cperl-indent-level: 2 +# cperl-brace-offset: 0 +# cperl-continued-brace-offset: 0 +# cperl-label-offset: -2 +# cperl-extra-newline-before-brace: t +# cperl-merge-trailing-else: nil +# cperl-continued-statement-offset: 2 +# End: diff --git a/scripts/test-driver.py b/scripts/test-driver.py new file mode 100644 index 000000000..eef74b29a --- /dev/null +++ b/scripts/test-driver.py @@ -0,0 +1,35 @@ +#! /usr/bin/env python3 + +# Wrapper for tests that hides the output if they succeed. +# Used by "make check" +# +# Copyright (C) 2020 Red Hat, Inc. +# +# Author: Paolo Bonzini <pbonzini@redhat.com> + +import subprocess +import sys +import os +import argparse + +parser = argparse.ArgumentParser(description='Test driver for QEMU') +parser.add_argument('-C', metavar='DIR', dest='dir', default='.', + help='change to DIR before doing anything else') +parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', + help='be more verbose') +parser.add_argument('test_args', nargs=argparse.REMAINDER) + +args = parser.parse_args() +os.chdir(args.dir) + +test_args = args.test_args +if test_args[0] == '--': + test_args = test_args[1:] + +if args.verbose: + result = subprocess.run(test_args, stdout=None, stderr=None) +else: + result = subprocess.run(test_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + if result.returncode: + sys.stdout.buffer.write(result.stdout) +sys.exit(result.returncode) diff --git a/scripts/tracetool.py b/scripts/tracetool.py new file mode 100755 index 000000000..ab7653a5c --- /dev/null +++ b/scripts/tracetool.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Command-line wrapper for the tracetool machinery. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +import sys +import getopt + +from tracetool import error_write, out, out_open +import tracetool.backend +import tracetool.format + + +_SCRIPT = "" + +def error_opt(msg = None): + if msg is not None: + error_write("Error: " + msg + "\n") + + backend_descr = "\n".join([ " %-15s %s" % (n, d) + for n,d in tracetool.backend.get_list() ]) + format_descr = "\n".join([ " %-15s %s" % (n, d) + for n,d in tracetool.format.get_list() ]) + error_write("""\ +Usage: %(script)s --format=<format> --backends=<backends> [<options>] <trace-events> ... <output> + +Backends: +%(backends)s + +Formats: +%(formats)s + +Options: + --help This help message. + --list-backends Print list of available backends. + --check-backends Check if the given backend is valid. + --binary <path> Full path to QEMU binary. + --target-type <type> QEMU emulator target type ('system' or 'user'). + --target-name <name> QEMU emulator target name. + --group <name> Name of the event group + --probe-prefix <prefix> Prefix for dtrace probe names + (default: qemu-<target-type>-<target-name>).\ +""" % { + "script" : _SCRIPT, + "backends" : backend_descr, + "formats" : format_descr, + }) + + if msg is None: + sys.exit(0) + else: + sys.exit(1) + +def main(args): + global _SCRIPT + _SCRIPT = args[0] + + long_opts = ["backends=", "format=", "help", "list-backends", + "check-backends", "group="] + long_opts += ["binary=", "target-type=", "target-name=", "probe-prefix="] + + try: + opts, args = getopt.getopt(args[1:], "", long_opts) + except getopt.GetoptError as err: + error_opt(str(err)) + + check_backends = False + arg_backends = [] + arg_format = "" + arg_group = None + binary = None + target_type = None + target_name = None + probe_prefix = None + for opt, arg in opts: + if opt == "--help": + error_opt() + + elif opt == "--backends": + arg_backends = arg.split(",") + elif opt == "--group": + arg_group = arg + elif opt == "--format": + arg_format = arg + + elif opt == "--list-backends": + public_backends = tracetool.backend.get_list(only_public = True) + out(", ".join([ b for b,_ in public_backends ])) + sys.exit(0) + elif opt == "--check-backends": + check_backends = True + + elif opt == "--binary": + binary = arg + elif opt == '--target-type': + target_type = arg + elif opt == '--target-name': + target_name = arg + elif opt == '--probe-prefix': + probe_prefix = arg + + else: + error_opt("unhandled option: %s" % opt) + + if len(arg_backends) == 0: + error_opt("no backends specified") + + if check_backends: + for backend in arg_backends: + if not tracetool.backend.exists(backend): + sys.exit(1) + sys.exit(0) + + if arg_group is None: + error_opt("group name is required") + + if arg_format == "stap": + if binary is None: + error_opt("--binary is required for SystemTAP tapset generator") + if probe_prefix is None and target_type is None: + error_opt("--target-type is required for SystemTAP tapset generator") + if probe_prefix is None and target_name is None: + error_opt("--target-name is required for SystemTAP tapset generator") + + if probe_prefix is None: + probe_prefix = ".".join(["qemu", target_type, target_name]) + + if len(args) < 2: + error_opt("missing trace-events and output filepaths") + events = [] + for arg in args[:-1]: + with open(arg, "r") as fh: + events.extend(tracetool.read_events(fh, arg)) + + out_open(args[-1]) + + try: + tracetool.generate(events, arg_group, arg_format, arg_backends, + binary=binary, probe_prefix=probe_prefix) + except tracetool.TracetoolError as e: + error_opt(str(e)) + +if __name__ == "__main__": + main(sys.argv) diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py new file mode 100644 index 000000000..5bc94d95c --- /dev/null +++ b/scripts/tracetool/__init__.py @@ -0,0 +1,513 @@ +# -*- coding: utf-8 -*- + +""" +Machinery for generating tracing-related intermediate files. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +import re +import sys +import weakref + +import tracetool.format +import tracetool.backend +import tracetool.transform + + +def error_write(*lines): + """Write a set of error lines.""" + sys.stderr.writelines("\n".join(lines) + "\n") + +def error(*lines): + """Write a set of error lines and exit.""" + error_write(*lines) + sys.exit(1) + + +out_lineno = 1 +out_filename = '<none>' +out_fobj = sys.stdout + +def out_open(filename): + global out_filename, out_fobj + out_filename = filename + out_fobj = open(filename, 'wt') + +def out(*lines, **kwargs): + """Write a set of output lines. + + You can use kwargs as a shorthand for mapping variables when formatting all + the strings in lines. + + The 'out_lineno' kwarg is automatically added to reflect the current output + file line number. The 'out_next_lineno' kwarg is also automatically added + with the next output line number. The 'out_filename' kwarg is automatically + added with the output filename. + """ + global out_lineno + output = [] + for l in lines: + kwargs['out_lineno'] = out_lineno + kwargs['out_next_lineno'] = out_lineno + 1 + kwargs['out_filename'] = out_filename + output.append(l % kwargs) + out_lineno += 1 + + out_fobj.writelines("\n".join(output) + "\n") + +# We only want to allow standard C types or fixed sized +# integer types. We don't want QEMU specific types +# as we can't assume trace backends can resolve all the +# typedefs +ALLOWED_TYPES = [ + "int", + "long", + "short", + "char", + "bool", + "unsigned", + "signed", + "int8_t", + "uint8_t", + "int16_t", + "uint16_t", + "int32_t", + "uint32_t", + "int64_t", + "uint64_t", + "void", + "size_t", + "ssize_t", + "uintptr_t", + "ptrdiff_t", + # Magic substitution is done by tracetool + "TCGv", +] + +def validate_type(name): + bits = name.split(" ") + for bit in bits: + bit = re.sub("\*", "", bit) + if bit == "": + continue + if bit == "const": + continue + if bit not in ALLOWED_TYPES: + raise ValueError("Argument type '%s' is not allowed. " + "Only standard C types and fixed size integer " + "types should be used. struct, union, and " + "other complex pointer types should be " + "declared as 'void *'" % name) + +class Arguments: + """Event arguments description.""" + + def __init__(self, args): + """ + Parameters + ---------- + args : + List of (type, name) tuples or Arguments objects. + """ + self._args = [] + for arg in args: + if isinstance(arg, Arguments): + self._args.extend(arg._args) + else: + self._args.append(arg) + + def copy(self): + """Create a new copy.""" + return Arguments(list(self._args)) + + @staticmethod + def build(arg_str): + """Build and Arguments instance from an argument string. + + Parameters + ---------- + arg_str : str + String describing the event arguments. + """ + res = [] + for arg in arg_str.split(","): + arg = arg.strip() + if not arg: + raise ValueError("Empty argument (did you forget to use 'void'?)") + if arg == 'void': + continue + + if '*' in arg: + arg_type, identifier = arg.rsplit('*', 1) + arg_type += '*' + identifier = identifier.strip() + else: + arg_type, identifier = arg.rsplit(None, 1) + + validate_type(arg_type) + res.append((arg_type, identifier)) + return Arguments(res) + + def __getitem__(self, index): + if isinstance(index, slice): + return Arguments(self._args[index]) + else: + return self._args[index] + + def __iter__(self): + """Iterate over the (type, name) pairs.""" + return iter(self._args) + + def __len__(self): + """Number of arguments.""" + return len(self._args) + + def __str__(self): + """String suitable for declaring function arguments.""" + if len(self._args) == 0: + return "void" + else: + return ", ".join([ " ".join([t, n]) for t,n in self._args ]) + + def __repr__(self): + """Evaluable string representation for this object.""" + return "Arguments(\"%s\")" % str(self) + + def names(self): + """List of argument names.""" + return [ name for _, name in self._args ] + + def types(self): + """List of argument types.""" + return [ type_ for type_, _ in self._args ] + + def casted(self): + """List of argument names casted to their type.""" + return ["(%s)%s" % (type_, name) for type_, name in self._args] + + def transform(self, *trans): + """Return a new Arguments instance with transformed types. + + The types in the resulting Arguments instance are transformed according + to tracetool.transform.transform_type. + """ + res = [] + for type_, name in self._args: + res.append((tracetool.transform.transform_type(type_, *trans), + name)) + return Arguments(res) + + +class Event(object): + """Event description. + + Attributes + ---------- + name : str + The event name. + fmt : str + The event format string. + properties : set(str) + Properties of the event. + args : Arguments + The event arguments. + lineno : int + The line number in the input file. + filename : str + The path to the input file. + + """ + + _CRE = re.compile("((?P<props>[\w\s]+)\s+)?" + "(?P<name>\w+)" + "\((?P<args>[^)]*)\)" + "\s*" + "(?:(?:(?P<fmt_trans>\".+),)?\s*(?P<fmt>\".+))?" + "\s*") + + _VALID_PROPS = set(["disable", "tcg", "tcg-trans", "tcg-exec", "vcpu"]) + + def __init__(self, name, props, fmt, args, lineno, filename, orig=None, + event_trans=None, event_exec=None): + """ + Parameters + ---------- + name : string + Event name. + props : list of str + Property names. + fmt : str, list of str + Event printing format string(s). + args : Arguments + Event arguments. + lineno : int + The line number in the input file. + filename : str + The path to the input file. + orig : Event or None + Original Event before transformation/generation. + event_trans : Event or None + Generated translation-time event ("tcg" property). + event_exec : Event or None + Generated execution-time event ("tcg" property). + + """ + self.name = name + self.properties = props + self.fmt = fmt + self.args = args + self.lineno = int(lineno) + self.filename = str(filename) + self.event_trans = event_trans + self.event_exec = event_exec + + if len(args) > 10: + raise ValueError("Event '%s' has more than maximum permitted " + "argument count" % name) + + if orig is None: + self.original = weakref.ref(self) + else: + self.original = orig + + unknown_props = set(self.properties) - self._VALID_PROPS + if len(unknown_props) > 0: + raise ValueError("Unknown properties: %s" + % ", ".join(unknown_props)) + assert isinstance(self.fmt, str) or len(self.fmt) == 2 + + def copy(self): + """Create a new copy.""" + return Event(self.name, list(self.properties), self.fmt, + self.args.copy(), self.lineno, self.filename, + self, self.event_trans, self.event_exec) + + @staticmethod + def build(line_str, lineno, filename): + """Build an Event instance from a string. + + Parameters + ---------- + line_str : str + Line describing the event. + lineno : int + Line number in input file. + filename : str + Path to input file. + """ + m = Event._CRE.match(line_str) + assert m is not None + groups = m.groupdict('') + + name = groups["name"] + props = groups["props"].split() + fmt = groups["fmt"] + fmt_trans = groups["fmt_trans"] + if fmt.find("%m") != -1 or fmt_trans.find("%m") != -1: + raise ValueError("Event format '%m' is forbidden, pass the error " + "as an explicit trace argument") + if fmt.endswith(r'\n"'): + raise ValueError("Event format must not end with a newline " + "character") + + if len(fmt_trans) > 0: + fmt = [fmt_trans, fmt] + args = Arguments.build(groups["args"]) + + if "tcg-trans" in props: + raise ValueError("Invalid property 'tcg-trans'") + if "tcg-exec" in props: + raise ValueError("Invalid property 'tcg-exec'") + if "tcg" not in props and not isinstance(fmt, str): + raise ValueError("Only events with 'tcg' property can have two format strings") + if "tcg" in props and isinstance(fmt, str): + raise ValueError("Events with 'tcg' property must have two format strings") + + event = Event(name, props, fmt, args, lineno, filename) + + # add implicit arguments when using the 'vcpu' property + import tracetool.vcpu + event = tracetool.vcpu.transform_event(event) + + return event + + def __repr__(self): + """Evaluable string representation for this object.""" + if isinstance(self.fmt, str): + fmt = self.fmt + else: + fmt = "%s, %s" % (self.fmt[0], self.fmt[1]) + return "Event('%s %s(%s) %s')" % (" ".join(self.properties), + self.name, + self.args, + fmt) + # Star matching on PRI is dangerous as one might have multiple + # arguments with that format, hence the non-greedy version of it. + _FMT = re.compile("(%[\d\.]*\w+|%.*?PRI\S+)") + + def formats(self): + """List conversion specifiers in the argument print format string.""" + assert not isinstance(self.fmt, list) + return self._FMT.findall(self.fmt) + + QEMU_TRACE = "trace_%(name)s" + QEMU_TRACE_NOCHECK = "_nocheck__" + QEMU_TRACE + QEMU_TRACE_TCG = QEMU_TRACE + "_tcg" + QEMU_DSTATE = "_TRACE_%(NAME)s_DSTATE" + QEMU_BACKEND_DSTATE = "TRACE_%(NAME)s_BACKEND_DSTATE" + QEMU_EVENT = "_TRACE_%(NAME)s_EVENT" + + def api(self, fmt=None): + if fmt is None: + fmt = Event.QEMU_TRACE + return fmt % {"name": self.name, "NAME": self.name.upper()} + + def transform(self, *trans): + """Return a new Event with transformed Arguments.""" + return Event(self.name, + list(self.properties), + self.fmt, + self.args.transform(*trans), + self.lineno, + self.filename, + self) + + +def read_events(fobj, fname): + """Generate the output for the given (format, backends) pair. + + Parameters + ---------- + fobj : file + Event description file. + fname : str + Name of event file + + Returns a list of Event objects + """ + + events = [] + for lineno, line in enumerate(fobj, 1): + if line[-1] != '\n': + raise ValueError("%s does not end with a new line" % fname) + if not line.strip(): + continue + if line.lstrip().startswith('#'): + continue + + try: + event = Event.build(line, lineno, fname) + except ValueError as e: + arg0 = 'Error at %s:%d: %s' % (fname, lineno, e.args[0]) + e.args = (arg0,) + e.args[1:] + raise + + # transform TCG-enabled events + if "tcg" not in event.properties: + events.append(event) + else: + event_trans = event.copy() + event_trans.name += "_trans" + event_trans.properties += ["tcg-trans"] + event_trans.fmt = event.fmt[0] + # ignore TCG arguments + args_trans = [] + for atrans, aorig in zip( + event_trans.transform(tracetool.transform.TCG_2_HOST).args, + event.args): + if atrans == aorig: + args_trans.append(atrans) + event_trans.args = Arguments(args_trans) + + event_exec = event.copy() + event_exec.name += "_exec" + event_exec.properties += ["tcg-exec"] + event_exec.fmt = event.fmt[1] + event_exec.args = event_exec.args.transform(tracetool.transform.TCG_2_HOST) + + new_event = [event_trans, event_exec] + event.event_trans, event.event_exec = new_event + + events.extend(new_event) + + return events + + +class TracetoolError (Exception): + """Exception for calls to generate.""" + pass + + +def try_import(mod_name, attr_name=None, attr_default=None): + """Try to import a module and get an attribute from it. + + Parameters + ---------- + mod_name : str + Module name. + attr_name : str, optional + Name of an attribute in the module. + attr_default : optional + Default value if the attribute does not exist in the module. + + Returns + ------- + A pair indicating whether the module could be imported and the module or + object or attribute value. + """ + try: + module = __import__(mod_name, globals(), locals(), ["__package__"]) + if attr_name is None: + return True, module + return True, getattr(module, str(attr_name), attr_default) + except ImportError: + return False, None + + +def generate(events, group, format, backends, + binary=None, probe_prefix=None): + """Generate the output for the given (format, backends) pair. + + Parameters + ---------- + events : list + list of Event objects to generate for + group: str + Name of the tracing group + format : str + Output format name. + backends : list + Output backend names. + binary : str or None + See tracetool.backend.dtrace.BINARY. + probe_prefix : str or None + See tracetool.backend.dtrace.PROBEPREFIX. + """ + # fix strange python error (UnboundLocalError tracetool) + import tracetool + + format = str(format) + if len(format) == 0: + raise TracetoolError("format not set") + if not tracetool.format.exists(format): + raise TracetoolError("unknown format: %s" % format) + + if len(backends) == 0: + raise TracetoolError("no backends specified") + for backend in backends: + if not tracetool.backend.exists(backend): + raise TracetoolError("unknown backend: %s" % backend) + backend = tracetool.backend.Wrapper(backends, format) + + import tracetool.backend.dtrace + tracetool.backend.dtrace.BINARY = binary + tracetool.backend.dtrace.PROBEPREFIX = probe_prefix + + tracetool.format.generate(events, format, backend, group) diff --git a/scripts/tracetool/backend/__init__.py b/scripts/tracetool/backend/__init__.py new file mode 100644 index 000000000..7bfcc86cc --- /dev/null +++ b/scripts/tracetool/backend/__init__.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- + +""" +Backend management. + + +Creating new backends +--------------------- + +A new backend named 'foo-bar' corresponds to Python module +'tracetool/backend/foo_bar.py'. + +A backend module should provide a docstring, whose first non-empty line will be +considered its short description. + +All backends must generate their contents through the 'tracetool.out' routine. + + +Backend attributes +------------------ + +========= ==================================================================== +Attribute Description +========= ==================================================================== +PUBLIC If exists and is set to 'True', the backend is considered "public". +========= ==================================================================== + + +Backend functions +----------------- + +All the following functions are optional, and no output will be generated if +they do not exist. + +=============================== ============================================== +Function Description +=============================== ============================================== +generate_<format>_begin(events) Generate backend- and format-specific file + header contents. +generate_<format>_end(events) Generate backend- and format-specific file + footer contents. +generate_<format>(event) Generate backend- and format-specific contents + for the given event. +=============================== ============================================== + +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +import os + +import tracetool + + +def get_list(only_public = False): + """Get a list of (name, description) pairs.""" + res = [("nop", "Tracing disabled.")] + modnames = [] + for filename in os.listdir(tracetool.backend.__path__[0]): + if filename.endswith('.py') and filename != '__init__.py': + modnames.append(filename.rsplit('.', 1)[0]) + for modname in sorted(modnames): + module = tracetool.try_import("tracetool.backend." + modname) + + # just in case; should never fail unless non-module files are put there + if not module[0]: + continue + module = module[1] + + public = getattr(module, "PUBLIC", False) + if only_public and not public: + continue + + doc = module.__doc__ + if doc is None: + doc = "" + doc = doc.strip().split("\n")[0] + + name = modname.replace("_", "-") + res.append((name, doc)) + return res + + +def exists(name): + """Return whether the given backend exists.""" + if len(name) == 0: + return False + if name == "nop": + return True + name = name.replace("-", "_") + return tracetool.try_import("tracetool.backend." + name)[1] + + +class Wrapper: + def __init__(self, backends, format): + self._backends = [backend.replace("-", "_") for backend in backends] + self._format = format.replace("-", "_") + for backend in self._backends: + assert exists(backend) + assert tracetool.format.exists(self._format) + + def _run_function(self, name, *args, **kwargs): + for backend in self._backends: + func = tracetool.try_import("tracetool.backend." + backend, + name % self._format, None)[1] + if func is not None: + func(*args, **kwargs) + + def generate_begin(self, events, group): + self._run_function("generate_%s_begin", events, group) + + def generate(self, event, group): + self._run_function("generate_%s", event, group) + + def generate_backend_dstate(self, event, group): + self._run_function("generate_%s_backend_dstate", event, group) + + def generate_end(self, events, group): + self._run_function("generate_%s_end", events, group) diff --git a/scripts/tracetool/backend/dtrace.py b/scripts/tracetool/backend/dtrace.py new file mode 100644 index 000000000..e17edc9b9 --- /dev/null +++ b/scripts/tracetool/backend/dtrace.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +""" +DTrace/SystemTAP backend. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +PUBLIC = True + + +PROBEPREFIX = None + +def probeprefix(): + if PROBEPREFIX is None: + raise ValueError("you must set PROBEPREFIX") + return PROBEPREFIX + + +BINARY = None + +def binary(): + if BINARY is None: + raise ValueError("you must set BINARY") + return BINARY + + +def generate_h_begin(events, group): + if group == "root": + header = "trace-dtrace-root.h" + else: + header = "trace-dtrace-%s.h" % group + + # Workaround for ust backend, which also includes <sys/sdt.h> and may + # require SDT_USE_VARIADIC to be defined. If dtrace includes <sys/sdt.h> + # first without defining SDT_USE_VARIADIC then ust breaks because the + # STAP_PROBEV() macro is not defined. + out('#ifndef SDT_USE_VARIADIC') + out('#define SDT_USE_VARIADIC 1') + out('#endif') + + out('#include "%s"' % header, + '') + + out('#undef SDT_USE_VARIADIC') + + # SystemTap defines <provider>_<name>_ENABLED() but other DTrace + # implementations might not. + for e in events: + out('#ifndef QEMU_%(uppername)s_ENABLED', + '#define QEMU_%(uppername)s_ENABLED() true', + '#endif', + uppername=e.name.upper()) + +def generate_h(event, group): + out(' QEMU_%(uppername)s(%(argnames)s);', + uppername=event.name.upper(), + argnames=", ".join(event.args.names())) + + +def generate_h_backend_dstate(event, group): + out(' QEMU_%(uppername)s_ENABLED() || \\', + uppername=event.name.upper()) diff --git a/scripts/tracetool/backend/ftrace.py b/scripts/tracetool/backend/ftrace.py new file mode 100644 index 000000000..5fa30ccc0 --- /dev/null +++ b/scripts/tracetool/backend/ftrace.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +""" +Ftrace built-in backend. +""" + +__author__ = "Eiichi Tsukata <eiichi.tsukata.xh@hitachi.com>" +__copyright__ = "Copyright (C) 2013 Hitachi, Ltd." +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +PUBLIC = True + + +def generate_h_begin(events, group): + out('#include "trace/ftrace.h"', + '') + + +def generate_h(event, group): + argnames = ", ".join(event.args.names()) + if len(event.args) > 0: + argnames = ", " + argnames + + out(' {', + ' char ftrace_buf[MAX_TRACE_STRLEN];', + ' int unused __attribute__ ((unused));', + ' int trlen;', + ' if (trace_event_get_state(%(event_id)s)) {', + '#line %(event_lineno)d "%(event_filename)s"', + ' trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,', + ' "%(name)s " %(fmt)s "\\n" %(argnames)s);', + '#line %(out_next_lineno)d "%(out_filename)s"', + ' trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);', + ' unused = write(trace_marker_fd, ftrace_buf, trlen);', + ' }', + ' }', + name=event.name, + args=event.args, + event_id="TRACE_" + event.name.upper(), + event_lineno=event.lineno, + event_filename=event.filename, + fmt=event.fmt.rstrip("\n"), + argnames=argnames) + + +def generate_h_backend_dstate(event, group): + out(' trace_event_get_state_dynamic_by_id(%(event_id)s) || \\', + event_id="TRACE_" + event.name.upper()) diff --git a/scripts/tracetool/backend/log.py b/scripts/tracetool/backend/log.py new file mode 100644 index 000000000..17ba1cd90 --- /dev/null +++ b/scripts/tracetool/backend/log.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +""" +Stderr built-in backend. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +PUBLIC = True + + +def generate_h_begin(events, group): + out('#include "qemu/log-for-trace.h"', + '#include "qemu/error-report.h"', + '') + + +def generate_h(event, group): + argnames = ", ".join(event.args.names()) + if len(event.args) > 0: + argnames = ", " + argnames + + if "vcpu" in event.properties: + # already checked on the generic format code + cond = "true" + else: + cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper()) + + out(' if (%(cond)s && qemu_loglevel_mask(LOG_TRACE)) {', + ' if (message_with_timestamp) {', + ' struct timeval _now;', + ' gettimeofday(&_now, NULL);', + '#line %(event_lineno)d "%(event_filename)s"', + ' qemu_log("%%d@%%zu.%%06zu:%(name)s " %(fmt)s "\\n",', + ' qemu_get_thread_id(),', + ' (size_t)_now.tv_sec, (size_t)_now.tv_usec', + ' %(argnames)s);', + '#line %(out_next_lineno)d "%(out_filename)s"', + ' } else {', + '#line %(event_lineno)d "%(event_filename)s"', + ' qemu_log("%(name)s " %(fmt)s "\\n"%(argnames)s);', + '#line %(out_next_lineno)d "%(out_filename)s"', + ' }', + ' }', + cond=cond, + event_lineno=event.lineno, + event_filename=event.filename, + name=event.name, + fmt=event.fmt.rstrip("\n"), + argnames=argnames) + + +def generate_h_backend_dstate(event, group): + out(' trace_event_get_state_dynamic_by_id(%(event_id)s) || \\', + event_id="TRACE_" + event.name.upper()) diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py new file mode 100644 index 000000000..a74d61fcd --- /dev/null +++ b/scripts/tracetool/backend/simple.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- + +""" +Simple built-in backend. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +PUBLIC = True + + +def is_string(arg): + strtype = ('const char*', 'char*', 'const char *', 'char *') + arg_strip = arg.lstrip() + if arg_strip.startswith(strtype) and arg_strip.count('*') == 1: + return True + else: + return False + + +def generate_h_begin(events, group): + for event in events: + out('void _simple_%(api)s(%(args)s);', + api=event.api(), + args=event.args) + out('') + + +def generate_h(event, group): + out(' _simple_%(api)s(%(args)s);', + api=event.api(), + args=", ".join(event.args.names())) + + +def generate_h_backend_dstate(event, group): + out(' trace_event_get_state_dynamic_by_id(%(event_id)s) || \\', + event_id="TRACE_" + event.name.upper()) + + +def generate_c_begin(events, group): + out('#include "qemu/osdep.h"', + '#include "trace/control.h"', + '#include "trace/simple.h"', + '') + + +def generate_c(event, group): + out('void _simple_%(api)s(%(args)s)', + '{', + ' TraceBufferRecord rec;', + api=event.api(), + args=event.args) + sizes = [] + for type_, name in event.args: + if is_string(type_): + out(' size_t arg%(name)s_len = %(name)s ? MIN(strlen(%(name)s), MAX_TRACE_STRLEN) : 0;', + name=name) + strsizeinfo = "4 + arg%s_len" % name + sizes.append(strsizeinfo) + else: + sizes.append("8") + sizestr = " + ".join(sizes) + if len(event.args) == 0: + sizestr = '0' + + event_id = 'TRACE_' + event.name.upper() + if "vcpu" in event.properties: + # already checked on the generic format code + cond = "true" + else: + cond = "trace_event_get_state(%s)" % event_id + + out('', + ' if (!%(cond)s) {', + ' return;', + ' }', + '', + ' if (trace_record_start(&rec, %(event_obj)s.id, %(size_str)s)) {', + ' return; /* Trace Buffer Full, Event Dropped ! */', + ' }', + cond=cond, + event_obj=event.api(event.QEMU_EVENT), + size_str=sizestr) + + if len(event.args) > 0: + for type_, name in event.args: + # string + if is_string(type_): + out(' trace_record_write_str(&rec, %(name)s, arg%(name)s_len);', + name=name) + # pointer var (not string) + elif type_.endswith('*'): + out(' trace_record_write_u64(&rec, (uintptr_t)(uint64_t *)%(name)s);', + name=name) + # primitive data type + else: + out(' trace_record_write_u64(&rec, (uint64_t)%(name)s);', + name=name) + + out(' trace_record_finish(&rec);', + '}', + '') diff --git a/scripts/tracetool/backend/syslog.py b/scripts/tracetool/backend/syslog.py new file mode 100644 index 000000000..5a3a00fe3 --- /dev/null +++ b/scripts/tracetool/backend/syslog.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +""" +Syslog built-in backend. +""" + +__author__ = "Paul Durrant <paul.durrant@citrix.com>" +__copyright__ = "Copyright 2016, Citrix Systems Inc." +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +PUBLIC = True + + +def generate_h_begin(events, group): + out('#include <syslog.h>', + '') + + +def generate_h(event, group): + argnames = ", ".join(event.args.names()) + if len(event.args) > 0: + argnames = ", " + argnames + + if "vcpu" in event.properties: + # already checked on the generic format code + cond = "true" + else: + cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper()) + + out(' if (%(cond)s) {', + '#line %(event_lineno)d "%(event_filename)s"', + ' syslog(LOG_INFO, "%(name)s " %(fmt)s %(argnames)s);', + '#line %(out_next_lineno)d "%(out_filename)s"', + ' }', + cond=cond, + event_lineno=event.lineno, + event_filename=event.filename, + name=event.name, + fmt=event.fmt.rstrip("\n"), + argnames=argnames) + + +def generate_h_backend_dstate(event, group): + out(' trace_event_get_state_dynamic_by_id(%(event_id)s) || \\', + event_id="TRACE_" + event.name.upper()) diff --git a/scripts/tracetool/backend/ust.py b/scripts/tracetool/backend/ust.py new file mode 100644 index 000000000..c857516f2 --- /dev/null +++ b/scripts/tracetool/backend/ust.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +""" +LTTng User Space Tracing backend. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +PUBLIC = True + + +def generate_h_begin(events, group): + header = 'trace-ust-' + group + '.h' + out('#include <lttng/tracepoint.h>', + '#include "%s"' % header, + '', + '/* tracepoint_enabled() was introduced in LTTng UST 2.7 */', + '#ifndef tracepoint_enabled', + '#define tracepoint_enabled(a, b) true', + '#endif', + '') + + +def generate_h(event, group): + argnames = ", ".join(event.args.names()) + if len(event.args) > 0: + argnames = ", " + argnames + + out(' tracepoint(qemu, %(name)s%(tp_args)s);', + name=event.name, + tp_args=argnames) + + +def generate_h_backend_dstate(event, group): + out(' tracepoint_enabled(qemu, %(name)s) || \\', + name=event.name) diff --git a/scripts/tracetool/format/__init__.py b/scripts/tracetool/format/__init__.py new file mode 100644 index 000000000..2dc46f3dd --- /dev/null +++ b/scripts/tracetool/format/__init__.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +""" +Format management. + + +Creating new formats +-------------------- + +A new format named 'foo-bar' corresponds to Python module +'tracetool/format/foo_bar.py'. + +A format module should provide a docstring, whose first non-empty line will be +considered its short description. + +All formats must generate their contents through the 'tracetool.out' routine. + + +Format functions +---------------- + +======== ================================================================== +Function Description +======== ================================================================== +generate Called to generate a format-specific file. +======== ================================================================== + +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +import os + +import tracetool + + +def get_list(): + """Get a list of (name, description) pairs.""" + res = [] + modnames = [] + for filename in os.listdir(tracetool.format.__path__[0]): + if filename.endswith('.py') and filename != '__init__.py': + modnames.append(filename.rsplit('.', 1)[0]) + for modname in sorted(modnames): + module = tracetool.try_import("tracetool.format." + modname) + + # just in case; should never fail unless non-module files are put there + if not module[0]: + continue + module = module[1] + + doc = module.__doc__ + if doc is None: + doc = "" + doc = doc.strip().split("\n")[0] + + name = modname.replace("_", "-") + res.append((name, doc)) + return res + + +def exists(name): + """Return whether the given format exists.""" + if len(name) == 0: + return False + name = name.replace("-", "_") + return tracetool.try_import("tracetool.format." + name)[1] + + +def generate(events, format, backend, group): + if not exists(format): + raise ValueError("unknown format: %s" % format) + format = format.replace("-", "_") + func = tracetool.try_import("tracetool.format." + format, + "generate")[1] + if func is None: + raise AttributeError("format has no 'generate': %s" % format) + func(events, backend, group) diff --git a/scripts/tracetool/format/c.py b/scripts/tracetool/format/c.py new file mode 100644 index 000000000..c390c1844 --- /dev/null +++ b/scripts/tracetool/format/c.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +""" +trace/generated-tracers.c +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +def generate(events, backend, group): + active_events = [e for e in events + if "disable" not in e.properties] + + header = "trace-" + group + ".h" + + out('/* This file is autogenerated by tracetool, do not edit. */', + '', + '#include "qemu/osdep.h"', + '#include "qemu/module.h"', + '#include "%s"' % header, + '') + + for e in events: + out('uint16_t %s;' % e.api(e.QEMU_DSTATE)) + + for e in events: + if "vcpu" in e.properties: + vcpu_id = 0 + else: + vcpu_id = "TRACE_VCPU_EVENT_NONE" + out('TraceEvent %(event)s = {', + ' .id = 0,', + ' .vcpu_id = %(vcpu_id)s,', + ' .name = \"%(name)s\",', + ' .sstate = %(sstate)s,', + ' .dstate = &%(dstate)s ', + '};', + event = e.api(e.QEMU_EVENT), + vcpu_id = vcpu_id, + name = e.name, + sstate = "TRACE_%s_ENABLED" % e.name.upper(), + dstate = e.api(e.QEMU_DSTATE)) + + out('TraceEvent *%(group)s_trace_events[] = {', + group = group.lower()) + + for e in events: + out(' &%(event)s,', event = e.api(e.QEMU_EVENT)) + + out(' NULL,', + '};', + '') + + out('static void trace_%(group)s_register_events(void)', + '{', + ' trace_event_register_group(%(group)s_trace_events);', + '}', + 'trace_init(trace_%(group)s_register_events)', + group = group.lower()) + + backend.generate_begin(active_events, group) + for event in active_events: + backend.generate(event, group) + backend.generate_end(active_events, group) diff --git a/scripts/tracetool/format/d.py b/scripts/tracetool/format/d.py new file mode 100644 index 000000000..ebfb71420 --- /dev/null +++ b/scripts/tracetool/format/d.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +""" +trace/generated-tracers.dtrace (DTrace only). +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out +from sys import platform + + +# Reserved keywords from +# https://wikis.oracle.com/display/DTrace/Types,+Operators+and+Expressions +RESERVED_WORDS = ( + 'auto', 'goto', 'sizeof', 'break', 'if', 'static', 'case', 'import', + 'string', 'char', 'inline', 'stringof', 'const', 'int', 'struct', + 'continue', 'long', 'switch', 'counter', 'offsetof', 'this', + 'default', 'probe', 'translator', 'do', 'provider', 'typedef', + 'double', 'register', 'union', 'else', 'restrict', 'unsigned', + 'enum', 'return', 'void', 'extern', 'self', 'volatile', 'float', + 'short', 'while', 'for', 'signed', 'xlate', +) + + +def generate(events, backend, group): + events = [e for e in events + if "disable" not in e.properties] + + # SystemTap's dtrace(1) warns about empty "provider qemu {}" but is happy + # with an empty file. Avoid the warning. + # But dtrace on macOS can't deal with empty files. + if not events and platform != "darwin": + return + + out('/* This file is autogenerated by tracetool, do not edit. */' + '', + 'provider qemu {') + + for e in events: + args = [] + for type_, name in e.args: + if platform == "darwin": + # macOS dtrace accepts only C99 _Bool + if type_ == 'bool': + type_ = '_Bool' + if type_ == 'bool *': + type_ = '_Bool *' + # It converts int8_t * in probe points to char * in header + # files and introduces [-Wpointer-sign] warning. + # Avoid it by changing probe type to signed char * beforehand. + if type_ == 'int8_t *': + type_ = 'signed char *' + + # SystemTap dtrace(1) emits a warning when long long is used + type_ = type_.replace('unsigned long long', 'uint64_t') + type_ = type_.replace('signed long long', 'int64_t') + type_ = type_.replace('long long', 'int64_t') + + if name in RESERVED_WORDS: + name += '_' + args.append(type_ + ' ' + name) + + # Define prototype for probe arguments + out('', + 'probe %(name)s(%(args)s);', + name=e.name, + args=','.join(args)) + + out('', + '};') diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py new file mode 100644 index 000000000..e94f0be7d --- /dev/null +++ b/scripts/tracetool/format/h.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +""" +trace/generated-tracers.h +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +def generate(events, backend, group): + if group == "root": + header = "trace/control-vcpu.h" + else: + header = "trace/control.h" + + out('/* This file is autogenerated by tracetool, do not edit. */', + '', + '#ifndef TRACE_%s_GENERATED_TRACERS_H' % group.upper(), + '#define TRACE_%s_GENERATED_TRACERS_H' % group.upper(), + '', + '#include "%s"' % header, + '') + + for e in events: + out('extern TraceEvent %(event)s;', + event = e.api(e.QEMU_EVENT)) + + for e in events: + out('extern uint16_t %s;' % e.api(e.QEMU_DSTATE)) + + # static state + for e in events: + if 'disable' in e.properties: + enabled = 0 + else: + enabled = 1 + if "tcg-exec" in e.properties: + # a single define for the two "sub-events" + out('#define TRACE_%(name)s_ENABLED %(enabled)d', + name=e.original.name.upper(), + enabled=enabled) + out('#define TRACE_%s_ENABLED %d' % (e.name.upper(), enabled)) + + backend.generate_begin(events, group) + + for e in events: + # tracer-specific dstate + out('', + '#define %(api)s() ( \\', + api=e.api(e.QEMU_BACKEND_DSTATE)) + + if "disable" not in e.properties: + backend.generate_backend_dstate(e, group) + + out(' false)') + + # tracer without checks + out('', + 'static inline void %(api)s(%(args)s)', + '{', + api=e.api(e.QEMU_TRACE_NOCHECK), + args=e.args) + + if "disable" not in e.properties: + backend.generate(e, group) + + out('}') + + # tracer wrapper with checks (per-vCPU tracing) + if "vcpu" in e.properties: + trace_cpu = next(iter(e.args))[1] + cond = "trace_event_get_vcpu_state(%(cpu)s,"\ + " TRACE_%(id)s)"\ + % dict( + cpu=trace_cpu, + id=e.name.upper()) + else: + cond = "true" + + out('', + 'static inline void %(api)s(%(args)s)', + '{', + ' if (%(cond)s) {', + ' %(api_nocheck)s(%(names)s);', + ' }', + '}', + api=e.api(), + api_nocheck=e.api(e.QEMU_TRACE_NOCHECK), + args=e.args, + names=", ".join(e.args.names()), + cond=cond) + + backend.generate_end(events, group) + + out('#endif /* TRACE_%s_GENERATED_TRACERS_H */' % group.upper()) diff --git a/scripts/tracetool/format/log_stap.py b/scripts/tracetool/format/log_stap.py new file mode 100644 index 000000000..0b6549d53 --- /dev/null +++ b/scripts/tracetool/format/log_stap.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- + +""" +Generate .stp file that printfs log messages (DTrace with SystemTAP only). +""" + +__author__ = "Daniel P. Berrange <berrange@redhat.com>" +__copyright__ = "Copyright (C) 2014-2019, Red Hat, Inc." +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Daniel Berrange" +__email__ = "berrange@redhat.com" + +import re + +from tracetool import out +from tracetool.backend.dtrace import binary, probeprefix +from tracetool.backend.simple import is_string +from tracetool.format.stap import stap_escape + +def global_var_name(name): + return probeprefix().replace(".", "_") + "_" + name + +STATE_SKIP = 0 +STATE_LITERAL = 1 +STATE_MACRO = 2 + +def c_macro_to_format(macro): + if macro.startswith("PRI"): + return macro[3] + + raise Exception("Unhandled macro '%s'" % macro) + +def c_fmt_to_stap(fmt): + state = 0 + bits = [] + literal = "" + macro = "" + escape = 0; + for i in range(len(fmt)): + if fmt[i] == '\\': + if escape: + escape = 0 + else: + escape = 1 + if state != STATE_LITERAL: + raise Exception("Unexpected escape outside string literal") + literal = literal + fmt[i] + elif fmt[i] == '"' and not escape: + if state == STATE_LITERAL: + state = STATE_SKIP + bits.append(literal) + literal = "" + else: + if state == STATE_MACRO: + bits.append(c_macro_to_format(macro)) + macro = "" + state = STATE_LITERAL + elif fmt[i] == ' ' or fmt[i] == '\t': + if state == STATE_MACRO: + bits.append(c_macro_to_format(macro)) + macro = "" + state = STATE_SKIP + elif state == STATE_LITERAL: + literal = literal + fmt[i] + else: + escape = 0 + if state == STATE_SKIP: + state = STATE_MACRO + + if state == STATE_LITERAL: + literal = literal + fmt[i] + else: + macro = macro + fmt[i] + + if state == STATE_MACRO: + bits.append(c_macro_to_format(macro)) + elif state == STATE_LITERAL: + bits.append(literal) + + # All variables in systemtap are 64-bit in size + # The "%l" integer size qualifier is thus redundant + # and "%ll" is not valid at all. Similarly the size_t + # based "%z" size qualifier is not valid. We just + # strip all size qualifiers for sanity. + fmt = re.sub("%(\d*)(l+|z)(x|u|d)", "%\\1\\3", "".join(bits)) + return fmt + +def generate(events, backend, group): + out('/* This file is autogenerated by tracetool, do not edit. */', + '') + + for event_id, e in enumerate(events): + if 'disable' in e.properties: + continue + + out('probe %(probeprefix)s.log.%(name)s = %(probeprefix)s.%(name)s ?', + '{', + probeprefix=probeprefix(), + name=e.name) + + # Get references to userspace strings + for type_, name in e.args: + name = stap_escape(name) + if is_string(type_): + out(' try {', + ' arg%(name)s_str = %(name)s ? ' + + 'user_string_n(%(name)s, 512) : "<null>"', + ' } catch {}', + name=name) + + # Determine systemtap's view of variable names + fields = ["pid()", "gettimeofday_ns()"] + for type_, name in e.args: + name = stap_escape(name) + if is_string(type_): + fields.append("arg" + name + "_str") + else: + fields.append(name) + + # Emit the entire record in a single SystemTap printf() + arg_str = ', '.join(arg for arg in fields) + fmt_str = "%d@%d " + e.name + " " + c_fmt_to_stap(e.fmt) + "\\n" + out(' printf("%(fmt_str)s", %(arg_str)s)', + fmt_str=fmt_str, arg_str=arg_str) + + out('}') + + out() diff --git a/scripts/tracetool/format/simpletrace_stap.py b/scripts/tracetool/format/simpletrace_stap.py new file mode 100644 index 000000000..4f4633b4e --- /dev/null +++ b/scripts/tracetool/format/simpletrace_stap.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +""" +Generate .stp file that outputs simpletrace binary traces (DTrace with SystemTAP only). +""" + +__author__ = "Stefan Hajnoczi <redhat.com>" +__copyright__ = "Copyright (C) 2014, Red Hat, Inc." +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out +from tracetool.backend.dtrace import probeprefix +from tracetool.backend.simple import is_string +from tracetool.format.stap import stap_escape + +def global_var_name(name): + return probeprefix().replace(".", "_") + "_" + name + +def generate(events, backend, group): + out('/* This file is autogenerated by tracetool, do not edit. */', + '') + + for event_id, e in enumerate(events): + if 'disable' in e.properties: + continue + + out('probe %(probeprefix)s.simpletrace.%(name)s = %(probeprefix)s.%(name)s ?', + '{', + probeprefix=probeprefix(), + name=e.name) + + # Calculate record size + sizes = ['24'] # sizeof(TraceRecord) + for type_, name in e.args: + name = stap_escape(name) + if is_string(type_): + out(' try {', + ' arg%(name)s_str = %(name)s ? user_string_n(%(name)s, 512) : "<null>"', + ' } catch {}', + ' arg%(name)s_len = strlen(arg%(name)s_str)', + name=name) + sizes.append('4 + arg%s_len' % name) + else: + sizes.append('8') + sizestr = ' + '.join(sizes) + + # Generate format string and value pairs for record header and arguments + fields = [('8b', str(event_id)), + ('8b', 'gettimeofday_ns()'), + ('4b', sizestr), + ('4b', 'pid()')] + for type_, name in e.args: + name = stap_escape(name) + if is_string(type_): + fields.extend([('4b', 'arg%s_len' % name), + ('.*s', 'arg%s_len, arg%s_str' % (name, name))]) + else: + fields.append(('8b', name)) + + # Emit the entire record in a single SystemTap printf() + fmt_str = '%'.join(fmt for fmt, _ in fields) + arg_str = ', '.join(arg for _, arg in fields) + out(' printf("%%8b%%%(fmt_str)s", 1, %(arg_str)s)', + fmt_str=fmt_str, arg_str=arg_str) + + out('}') + + out() diff --git a/scripts/tracetool/format/stap.py b/scripts/tracetool/format/stap.py new file mode 100644 index 000000000..a218b0445 --- /dev/null +++ b/scripts/tracetool/format/stap.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +""" +Generate .stp file (DTrace with SystemTAP only). +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2014, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out +from tracetool.backend.dtrace import binary, probeprefix + + +# Technically 'self' is not used by systemtap yet, but +# they recommended we keep it in the reserved list anyway +RESERVED_WORDS = ( + 'break', 'catch', 'continue', 'delete', 'else', 'for', + 'foreach', 'function', 'global', 'if', 'in', 'limit', + 'long', 'next', 'probe', 'return', 'self', 'string', + 'try', 'while' + ) + + +def stap_escape(identifier): + # Append underscore to reserved keywords + if identifier in RESERVED_WORDS: + return identifier + '_' + return identifier + + +def generate(events, backend, group): + events = [e for e in events + if "disable" not in e.properties] + + out('/* This file is autogenerated by tracetool, do not edit. */', + '') + + for e in events: + # Define prototype for probe arguments + out('probe %(probeprefix)s.%(name)s = process("%(binary)s").mark("%(name)s")', + '{', + probeprefix=probeprefix(), + name=e.name, + binary=binary()) + + i = 1 + if len(e.args) > 0: + for name in e.args.names(): + name = stap_escape(name) + out(' %s = $arg%d;' % (name, i)) + i += 1 + + out('}') + + out() diff --git a/scripts/tracetool/format/tcg_h.py b/scripts/tracetool/format/tcg_h.py new file mode 100644 index 000000000..4d84440af --- /dev/null +++ b/scripts/tracetool/format/tcg_h.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +""" +Generate .h file for TCG code generation. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out, Arguments +import tracetool.vcpu + + +def vcpu_transform_args(args): + assert len(args) == 1 + return Arguments([ + args, + # NOTE: this name must be kept in sync with the one in "tcg_h" + # NOTE: Current helper code uses TCGv_env (CPUArchState*) + ("TCGv_env", "__tcg_" + args.names()[0]), + ]) + + +def generate(events, backend, group): + if group == "root": + header = "trace/trace-root.h" + else: + header = "trace.h" + + out('/* This file is autogenerated by tracetool, do not edit. */', + '/* You must include this file after the inclusion of helper.h */', + '', + '#ifndef TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(), + '#define TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(), + '', + '#include "exec/helper-proto.h"', + '#include "%s"' % header, + '', + ) + + for e in events: + # just keep one of them + if "tcg-exec" not in e.properties: + continue + + out('static inline void %(name_tcg)s(%(args)s)', + '{', + name_tcg=e.original.api(e.QEMU_TRACE_TCG), + args=tracetool.vcpu.transform_args("tcg_h", e.original)) + + if "disable" not in e.properties: + args_trans = e.original.event_trans.args + args_exec = tracetool.vcpu.transform_args( + "tcg_helper_c", e.original.event_exec, "wrapper") + if "vcpu" in e.properties: + trace_cpu = e.args.names()[0] + cond = "trace_event_get_vcpu_state(%(cpu)s,"\ + " TRACE_%(id)s)"\ + % dict( + cpu=trace_cpu, + id=e.original.event_exec.name.upper()) + else: + cond = "true" + + out(' %(name_trans)s(%(argnames_trans)s);', + ' if (%(cond)s) {', + ' gen_helper_%(name_exec)s(%(argnames_exec)s);', + ' }', + name_trans=e.original.event_trans.api(e.QEMU_TRACE), + name_exec=e.original.event_exec.api(e.QEMU_TRACE), + argnames_trans=", ".join(args_trans.names()), + argnames_exec=", ".join(args_exec.names()), + cond=cond) + + out('}') + + out('', + '#endif /* TRACE_%s_GENERATED_TCG_TRACERS_H */' % group.upper()) diff --git a/scripts/tracetool/format/tcg_helper_c.py b/scripts/tracetool/format/tcg_helper_c.py new file mode 100644 index 000000000..72576e67d --- /dev/null +++ b/scripts/tracetool/format/tcg_helper_c.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- + +""" +Generate trace/generated-helpers.c. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import Arguments, out +from tracetool.transform import * +import tracetool.vcpu + + +def vcpu_transform_args(args, mode): + assert len(args) == 1 + # NOTE: this name must be kept in sync with the one in "tcg_h" + args = Arguments([(args.types()[0], "__tcg_" + args.names()[0])]) + if mode == "code": + return Arguments([ + # Does cast from helper requirements to tracing types + ("CPUState *", "env_cpu(%s)" % args.names()[0]), + ]) + else: + args = Arguments([ + # NOTE: Current helper code uses TCGv_env (CPUArchState*) + ("CPUArchState *", args.names()[0]), + ]) + if mode == "header": + return args + elif mode == "wrapper": + return args.transform(HOST_2_TCG) + else: + assert False + + +def generate(events, backend, group): + if group == "root": + header = "trace/trace-root.h" + else: + header = "trace.h" + + events = [e for e in events + if "disable" not in e.properties] + + out('/* This file is autogenerated by tracetool, do not edit. */', + '', + '#include "qemu/osdep.h"', + '#include "cpu.h"', + '#include "exec/helper-proto.h"', + '#include "%s"' % header, + '', + ) + + for e in events: + if "tcg-exec" not in e.properties: + continue + + e_args_api = tracetool.vcpu.transform_args( + "tcg_helper_c", e.original, "header").transform( + HOST_2_TCG_COMPAT, TCG_2_TCG_HELPER_DEF) + e_args_call = tracetool.vcpu.transform_args( + "tcg_helper_c", e, "code") + + out('void %(name_tcg)s(%(args_api)s)', + '{', + # NOTE: the check was already performed at TCG-generation time + ' %(name)s(%(args_call)s);', + '}', + name_tcg="helper_%s_proxy" % e.api(), + name=e.api(e.QEMU_TRACE_NOCHECK), + args_api=e_args_api, + args_call=", ".join(e_args_call.casted()), + ) diff --git a/scripts/tracetool/format/tcg_helper_h.py b/scripts/tracetool/format/tcg_helper_h.py new file mode 100644 index 000000000..08554fbc8 --- /dev/null +++ b/scripts/tracetool/format/tcg_helper_h.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +""" +Generate trace/generated-helpers.h. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2016, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out +from tracetool.transform import * +import tracetool.vcpu + + +def generate(events, backend, group): + events = [e for e in events + if "disable" not in e.properties] + + out('/* This file is autogenerated by tracetool, do not edit. */', + '', + ) + + for e in events: + if "tcg-exec" not in e.properties: + continue + + # TCG helper proxy declaration + fmt = "DEF_HELPER_FLAGS_%(argc)d(%(name)s, %(flags)svoid%(types)s)" + e_args = tracetool.vcpu.transform_args("tcg_helper_c", e.original, "header") + args = e_args.transform(HOST_2_TCG_COMPAT, HOST_2_TCG, + TCG_2_TCG_HELPER_DECL) + types = ", ".join(args.types()) + if types != "": + types = ", " + types + + flags = "TCG_CALL_NO_RWG, " + + out(fmt, + flags=flags, + argc=len(args), + name=e.api() + "_proxy", + types=types, + ) diff --git a/scripts/tracetool/format/tcg_helper_wrapper_h.py b/scripts/tracetool/format/tcg_helper_wrapper_h.py new file mode 100644 index 000000000..0c5a9797d --- /dev/null +++ b/scripts/tracetool/format/tcg_helper_wrapper_h.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +""" +Generate trace/generated-helpers-wrappers.h. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2016, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out +from tracetool.transform import * +import tracetool.vcpu + + +def generate(events, backend, group): + events = [e for e in events + if "disable" not in e.properties] + + out('/* This file is autogenerated by tracetool, do not edit. */', + '', + '#define tcg_temp_new_nop(v) (v)', + '#define tcg_temp_free_nop(v)', + '', + ) + + for e in events: + if "tcg-exec" not in e.properties: + continue + + # tracetool.generate always transforms types to host + e_args = tracetool.vcpu.transform_args("tcg_helper_c", e.original, "wrapper") + + # mixed-type to TCG helper bridge + args_tcg_compat = e_args.transform(HOST_2_TCG_COMPAT) + + code_new = [ + "%(tcg_type)s __%(name)s = %(tcg_func)s(%(name)s);" % + {"tcg_type": transform_type(type_, HOST_2_TCG), + "tcg_func": transform_type(type_, HOST_2_TCG_TMP_NEW), + "name": name} + for (type_, name) in args_tcg_compat + ] + + code_free = [ + "%(tcg_func)s(__%(name)s);" % + {"tcg_func": transform_type(type_, HOST_2_TCG_TMP_FREE), + "name": name} + for (type_, name) in args_tcg_compat + ] + + gen_name = "gen_helper_" + e.api() + + out('static inline void %(name)s(%(args)s)', + '{', + ' %(code_new)s', + ' %(proxy_name)s(%(tmp_names)s);', + ' %(code_free)s', + '}', + name=gen_name, + args=e_args, + proxy_name=gen_name + "_proxy", + code_new="\n ".join(code_new), + code_free="\n ".join(code_free), + tmp_names=", ".join(["__%s" % name for _, name in e_args]), + ) diff --git a/scripts/tracetool/format/ust_events_c.py b/scripts/tracetool/format/ust_events_c.py new file mode 100644 index 000000000..deced9533 --- /dev/null +++ b/scripts/tracetool/format/ust_events_c.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +""" +trace/generated-ust.c +""" + +__author__ = "Mohamad Gebai <mohamad.gebai@polymtl.ca>" +__copyright__ = "Copyright 2012, Mohamad Gebai <mohamad.gebai@polymtl.ca>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +def generate(events, backend, group): + events = [e for e in events + if "disabled" not in e.properties] + + out('/* This file is autogenerated by tracetool, do not edit. */', + '', + '#include "qemu/osdep.h"', + '', + '#define TRACEPOINT_DEFINE', + '#define TRACEPOINT_CREATE_PROBES', + '', + '/* If gcc version 4.7 or older is used, LTTng ust gives a warning when compiling with', + ' -Wredundant-decls.', + ' */', + '#pragma GCC diagnostic ignored "-Wredundant-decls"', + '', + '#include "trace-ust-all.h"') diff --git a/scripts/tracetool/format/ust_events_h.py b/scripts/tracetool/format/ust_events_h.py new file mode 100644 index 000000000..6ce559f6c --- /dev/null +++ b/scripts/tracetool/format/ust_events_h.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +""" +trace/generated-ust-provider.h +""" + +__author__ = "Mohamad Gebai <mohamad.gebai@polymtl.ca>" +__copyright__ = "Copyright 2012, Mohamad Gebai <mohamad.gebai@polymtl.ca>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import out + + +def generate(events, backend, group): + events = [e for e in events + if "disabled" not in e.properties] + + if group == "all": + include = "trace-ust-all.h" + else: + include = "trace-ust.h" + + out('/* This file is autogenerated by tracetool, do not edit. */', + '', + '#undef TRACEPOINT_PROVIDER', + '#define TRACEPOINT_PROVIDER qemu', + '', + '#undef TRACEPOINT_INCLUDE_FILE', + '#define TRACEPOINT_INCLUDE_FILE ./%s' % include, + '', + '#if !defined (TRACE_%s_GENERATED_UST_H) || \\' % group.upper(), + ' defined(TRACEPOINT_HEADER_MULTI_READ)', + '#define TRACE_%s_GENERATED_UST_H' % group.upper(), + '', + '#include <lttng/tracepoint.h>', + '', + '/*', + ' * LTTng ust 2.0 does not allow you to use TP_ARGS(void) for tracepoints', + ' * requiring no arguments. We define these macros introduced in more recent' + ' * versions of LTTng ust as a workaround', + ' */', + '#ifndef _TP_EXPROTO1', + '#define _TP_EXPROTO1(a) void', + '#endif', + '#ifndef _TP_EXDATA_PROTO1', + '#define _TP_EXDATA_PROTO1(a) void *__tp_data', + '#endif', + '#ifndef _TP_EXDATA_VAR1', + '#define _TP_EXDATA_VAR1(a) __tp_data', + '#endif', + '#ifndef _TP_EXVAR1', + '#define _TP_EXVAR1(a)', + '#endif', + '') + + for e in events: + if len(e.args) > 0: + out('TRACEPOINT_EVENT(', + ' qemu,', + ' %(name)s,', + ' TP_ARGS(%(args)s),', + ' TP_FIELDS(', + name=e.name, + args=", ".join(", ".join(i) for i in e.args)) + + types = e.args.types() + names = e.args.names() + fmts = e.formats() + for t,n,f in zip(types, names, fmts): + if ('char *' in t) or ('char*' in t): + out(' ctf_string(' + n + ', ' + n + ')') + elif ("%p" in f) or ("x" in f) or ("PRIx" in f): + out(' ctf_integer_hex('+ t + ', ' + n + ', ' + n + ')') + elif ("ptr" in t) or ("*" in t): + out(' ctf_integer_hex('+ t + ', ' + n + ', ' + n + ')') + elif ('int' in t) or ('long' in t) or ('unsigned' in t) \ + or ('size_t' in t) or ('bool' in t): + out(' ctf_integer(' + t + ', ' + n + ', ' + n + ')') + elif ('double' in t) or ('float' in t): + out(' ctf_float(' + t + ', ' + n + ', ' + n + ')') + elif ('void *' in t) or ('void*' in t): + out(' ctf_integer_hex(unsigned long, ' + n + ', ' + n + ')') + + out(' )', + ')', + '') + + else: + out('TRACEPOINT_EVENT(', + ' qemu,', + ' %(name)s,', + ' TP_ARGS(void),', + ' TP_FIELDS()', + ')', + '', + name=e.name) + + out('#endif /* TRACE_%s_GENERATED_UST_H */' % group.upper(), + '', + '/* This part must be outside ifdef protection */', + '#include <lttng/tracepoint-event.h>') diff --git a/scripts/tracetool/transform.py b/scripts/tracetool/transform.py new file mode 100644 index 000000000..ea8b27799 --- /dev/null +++ b/scripts/tracetool/transform.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- + +""" +Type-transformation rules. +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2012-2016, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +def _transform_type(type_, trans): + if isinstance(trans, str): + return trans + elif isinstance(trans, dict): + if type_ in trans: + return _transform_type(type_, trans[type_]) + elif None in trans: + return _transform_type(type_, trans[None]) + else: + return type_ + elif callable(trans): + return trans(type_) + else: + raise ValueError("Invalid type transformation rule: %s" % trans) + + +def transform_type(type_, *trans): + """Return a new type transformed according to the given rules. + + Applies each of the transformation rules in trans in order. + + If an element of trans is a string, return it. + + If an element of trans is a function, call it with type_ as its only + argument. + + If an element of trans is a dict, search type_ in its keys. If type_ is + a key, use the value as a transformation rule for type_. Otherwise, if + None is a key use the value as a transformation rule for type_. + + Otherwise, return type_. + + Parameters + ---------- + type_ : str + Type to transform. + trans : list of function or dict + Type transformation rules. + """ + if len(trans) == 0: + raise ValueError + res = type_ + for t in trans: + res = _transform_type(res, t) + return res + + +################################################## +# tcg -> host + +def _tcg_2_host(type_): + if type_ == "TCGv": + # force a fixed-size type (target-independent) + return "uint64_t" + else: + return type_ + +TCG_2_HOST = { + "TCGv_i32": "uint32_t", + "TCGv_i64": "uint64_t", + "TCGv_ptr": "void *", + None: _tcg_2_host, + } + + +################################################## +# host -> host compatible with tcg sizes + +HOST_2_TCG_COMPAT = { + "uint8_t": "uint32_t", + "uint16_t": "uint32_t", + } + + +################################################## +# host/tcg -> tcg + +def _host_2_tcg(type_): + if type_.startswith("TCGv"): + return type_ + raise ValueError("Don't know how to translate '%s' into a TCG type\n" % type_) + +HOST_2_TCG = { + "uint32_t": "TCGv_i32", + "uint64_t": "TCGv_i64", + "void *" : "TCGv_ptr", + "CPUArchState *": "TCGv_env", + None: _host_2_tcg, + } + + +################################################## +# tcg -> tcg helper definition + +def _tcg_2_helper_def(type_): + if type_ == "TCGv": + return "target_ulong" + else: + return type_ + +TCG_2_TCG_HELPER_DEF = { + "TCGv_i32": "uint32_t", + "TCGv_i64": "uint64_t", + "TCGv_ptr": "void *", + None: _tcg_2_helper_def, + } + + +################################################## +# tcg -> tcg helper declaration + +def _tcg_2_tcg_helper_decl_error(type_): + raise ValueError("Don't know how to translate type '%s' into a TCG helper declaration type\n" % type_) + +TCG_2_TCG_HELPER_DECL = { + "TCGv" : "tl", + "TCGv_ptr": "ptr", + "TCGv_i32": "i32", + "TCGv_i64": "i64", + "TCGv_env": "env", + None: _tcg_2_tcg_helper_decl_error, + } + + +################################################## +# host/tcg -> tcg temporal constant allocation + +def _host_2_tcg_tmp_new(type_): + if type_.startswith("TCGv"): + return "tcg_temp_new_nop" + raise ValueError("Don't know how to translate type '%s' into a TCG temporal allocation" % type_) + +HOST_2_TCG_TMP_NEW = { + "uint32_t": "tcg_const_i32", + "uint64_t": "tcg_const_i64", + "void *" : "tcg_const_ptr", + None: _host_2_tcg_tmp_new, + } + + +################################################## +# host/tcg -> tcg temporal constant deallocation + +def _host_2_tcg_tmp_free(type_): + if type_.startswith("TCGv"): + return "tcg_temp_free_nop" + raise ValueError("Don't know how to translate type '%s' into a TCG temporal deallocation" % type_) + +HOST_2_TCG_TMP_FREE = { + "uint32_t": "tcg_temp_free_i32", + "uint64_t": "tcg_temp_free_i64", + "void *" : "tcg_temp_free_ptr", + None: _host_2_tcg_tmp_free, + } diff --git a/scripts/tracetool/vcpu.py b/scripts/tracetool/vcpu.py new file mode 100644 index 000000000..868b4cb04 --- /dev/null +++ b/scripts/tracetool/vcpu.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +""" +Generic management for the 'vcpu' property. + +""" + +__author__ = "Lluís Vilanova <vilanova@ac.upc.edu>" +__copyright__ = "Copyright 2016, Lluís Vilanova <vilanova@ac.upc.edu>" +__license__ = "GPL version 2 or (at your option) any later version" + +__maintainer__ = "Stefan Hajnoczi" +__email__ = "stefanha@redhat.com" + + +from tracetool import Arguments, try_import + + +def transform_event(event): + """Transform event to comply with the 'vcpu' property (if present).""" + if "vcpu" in event.properties: + # events with 'tcg-trans' and 'tcg-exec' are auto-generated from + # already-patched events + assert "tcg-trans" not in event.properties + assert "tcg-exec" not in event.properties + + event.args = Arguments([("void *", "__cpu"), event.args]) + if "tcg" in event.properties: + fmt = "\"cpu=%p \"" + event.fmt = [fmt + event.fmt[0], + fmt + event.fmt[1]] + else: + fmt = "\"cpu=%p \"" + event.fmt = fmt + event.fmt + return event + + +def transform_args(format, event, *args, **kwargs): + """Transforms the arguments to suit the specified format. + + The format module must implement function 'vcpu_args', which receives the + implicit arguments added by the 'vcpu' property, and must return suitable + arguments for the given format. + + The function is only called for events with the 'vcpu' property. + + Parameters + ========== + format : str + Format module name. + event : Event + args, kwargs + Passed to 'vcpu_transform_args'. + + Returns + ======= + Arguments + The transformed arguments, including the non-implicit ones. + + """ + if "vcpu" in event.properties: + ok, func = try_import("tracetool.format." + format, + "vcpu_transform_args") + assert ok + assert func + return Arguments([func(event.args[:1], *args, **kwargs), + event.args[1:]]) + else: + return event.args diff --git a/scripts/u2f-setup-gen.py b/scripts/u2f-setup-gen.py new file mode 100755 index 000000000..2122598fe --- /dev/null +++ b/scripts/u2f-setup-gen.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# +# Libu2f-emu setup directory generator for USB U2F key emulation. +# +# Copyright (c) 2020 César Belley <cesar.belley@lse.epita.fr> +# Written by César Belley <cesar.belley@lse.epita.fr> +# +# This work is licensed under the terms of the GNU GPL, version 2 +# or, at your option, any later version. See the COPYING file in +# the top-level directory. + +import sys +import os +from random import randint +from typing import Tuple + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.asymmetric import ec +from cryptography.hazmat.primitives.serialization import Encoding, \ + NoEncryption, PrivateFormat, PublicFormat +from OpenSSL import crypto + + +def write_setup_dir(dirpath: str, privkey_pem: bytes, cert_pem: bytes, + entropy: bytes, counter: int) -> None: + """ + Write the setup directory. + + Args: + dirpath: The directory path. + key_pem: The private key PEM. + cert_pem: The certificate PEM. + entropy: The 48 bytes of entropy. + counter: The counter value. + """ + # Directory + os.mkdir(dirpath) + + # Private key + with open(f'{dirpath}/private-key.pem', 'bw') as f: + f.write(privkey_pem) + + # Certificate + with open(f'{dirpath}/certificate.pem', 'bw') as f: + f.write(cert_pem) + + # Entropy + with open(f'{dirpath}/entropy', 'wb') as f: + f.write(entropy) + + # Counter + with open(f'{dirpath}/counter', 'w') as f: + f.write(f'{str(counter)}\n') + + +def generate_ec_key_pair() -> Tuple[str, str]: + """ + Generate an ec key pair. + + Returns: + The private and public key PEM. + """ + # Key generation + privkey = ec.generate_private_key(ec.SECP256R1, default_backend()) + pubkey = privkey.public_key() + + # PEM serialization + privkey_pem = privkey.private_bytes(encoding=Encoding.PEM, + format=PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=NoEncryption()) + pubkey_pem = pubkey.public_bytes(encoding=Encoding.PEM, + format=PublicFormat.SubjectPublicKeyInfo) + return privkey_pem, pubkey_pem + + +def generate_certificate(privkey_pem: str, pubkey_pem: str) -> str: + """ + Generate a x509 certificate from a key pair. + + Args: + privkey_pem: The private key PEM. + pubkey_pem: The public key PEM. + + Returns: + The certificate PEM. + """ + # Convert key pair + privkey = crypto.load_privatekey(crypto.FILETYPE_PEM, privkey_pem) + pubkey = crypto.load_publickey(crypto.FILETYPE_PEM, pubkey_pem) + + # New x509v3 certificate + cert = crypto.X509() + cert.set_version(0x2) + + # Serial number + cert.set_serial_number(randint(1, 2 ** 64)) + + # Before / After + cert.gmtime_adj_notBefore(0) + cert.gmtime_adj_notAfter(4 * (365 * 24 * 60 * 60)) + + # Public key + cert.set_pubkey(pubkey) + + # Subject name and issueer + cert.get_subject().CN = "U2F emulated" + cert.set_issuer(cert.get_subject()) + + # Extensions + cert.add_extensions([ + crypto.X509Extension(b"subjectKeyIdentifier", + False, b"hash", subject=cert), + ]) + cert.add_extensions([ + crypto.X509Extension(b"authorityKeyIdentifier", + False, b"keyid:always", issuer=cert), + ]) + cert.add_extensions([ + crypto.X509Extension(b"basicConstraints", True, b"CA:TRUE") + ]) + + # Signature + cert.sign(privkey, 'sha256') + + return crypto.dump_certificate(crypto.FILETYPE_PEM, cert) + + +def generate_setup_dir(dirpath: str) -> None: + """ + Generates the setup directory. + + Args: + dirpath: The directory path. + """ + # Key pair + privkey_pem, pubkey_pem = generate_ec_key_pair() + + # Certificate + certificate_pem = generate_certificate(privkey_pem, pubkey_pem) + + # Entropy + entropy = os.urandom(48) + + # Counter + counter = 0 + + # Write + write_setup_dir(dirpath, privkey_pem, certificate_pem, entropy, counter) + + +def main() -> None: + """ + Main function + """ + # Dir path + if len(sys.argv) != 2: + sys.stderr.write(f'Usage: {sys.argv[0]} <setup_dir>\n') + exit(2) + dirpath = sys.argv[1] + + # Dir non existence + if os.path.exists(dirpath): + sys.stderr.write(f'Directory: {dirpath} already exists.\n') + exit(1) + + generate_setup_dir(dirpath) + + +if __name__ == '__main__': + main() diff --git a/scripts/undefsym.py b/scripts/undefsym.py new file mode 100644 index 000000000..4b6a72d95 --- /dev/null +++ b/scripts/undefsym.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +# Before a shared module's DSO is produced, a static library is built for it +# and passed to this script. The script generates -Wl,-u options to force +# the inclusion of symbol from libqemuutil.a if the shared modules need them, +# This is necessary because the modules may use functions not needed by the +# executable itself, which would cause the function to not be linked in. +# Then the DSO loading would fail because of the missing symbol. + + +import sys +import subprocess + +def filter_lines_set(stdout, from_staticlib): + linesSet = set() + for line in stdout.splitlines(): + tokens = line.split(b' ') + if len(tokens) >= 2: + if from_staticlib and tokens[1] == b'U': + continue + if not from_staticlib and tokens[1] != b'U': + continue + new_line = b'-Wl,-u,' + tokens[0] + if not new_line in linesSet: + linesSet.add(new_line) + return linesSet + +def main(args): + if len(args) <= 3: + sys.exit(0) + + nm = args[1] + staticlib = args[2] + pc = subprocess.run([nm, "-P", "-g", staticlib], stdout=subprocess.PIPE) + if pc.returncode != 0: + sys.exit(1) + staticlib_syms = filter_lines_set(pc.stdout, True) + + shared_modules = args[3:] + pc = subprocess.run([nm, "-P", "-g"] + shared_modules, stdout=subprocess.PIPE) + if pc.returncode != 0: + sys.exit(1) + modules_undef_syms = filter_lines_set(pc.stdout, False) + lines = sorted(staticlib_syms.intersection(modules_undef_syms)) + sys.stdout.buffer.write(b'\n'.join(lines)) + +if __name__ == "__main__": + main(sys.argv) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh new file mode 100755 index 000000000..fea4d6eb6 --- /dev/null +++ b/scripts/update-linux-headers.sh @@ -0,0 +1,243 @@ +#!/bin/sh -e +# +# Update Linux kernel headers QEMU requires from a specified kernel tree. +# +# Copyright (C) 2011 Siemens AG +# +# Authors: +# Jan Kiszka <jan.kiszka@siemens.com> +# +# This work is licensed under the terms of the GNU GPL version 2. +# See the COPYING file in the top-level directory. + +tmpdir=$(mktemp -d) +linux="$1" +output="$2" + +if [ -z "$linux" ] || ! [ -d "$linux" ]; then + cat << EOF +usage: update-kernel-headers.sh LINUX_PATH [OUTPUT_PATH] + +LINUX_PATH Linux kernel directory to obtain the headers from +OUTPUT_PATH output directory, usually the qemu source tree (default: $PWD) +EOF + exit 1 +fi + +if [ -z "$output" ]; then + output="$PWD" +fi + +cp_portable() { + f=$1 + to=$2 + if + grep '#include' "$f" | grep -v -e 'linux/virtio' \ + -e 'linux/types' \ + -e 'linux/ioctl' \ + -e 'stdint' \ + -e 'linux/if_ether' \ + -e 'input-event-codes' \ + -e 'sys/' \ + -e 'pvrdma_verbs' \ + -e 'drm.h' \ + -e 'limits' \ + -e 'linux/const' \ + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ + -e 'asm-generic/kvm_para' \ + > /dev/null + then + echo "Unexpected #include in input file $f". + exit 2 + fi + + header=$(basename "$f"); + sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \ + -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/__s\([0-9][0-9]*\)/int\1_t/g' \ + -e 's/__le\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/<linux\/\([^>]*\)>/"standard-headers\/linux\/\1"/' \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ + -e 's/__BITS_PER_LONG/HOST_LONG_BITS/' \ + -e '/\"drm.h\"/d' \ + -e '/sys\/ioctl.h/d' \ + -e '/linux\/ioctl.h/d' \ + -e 's/SW_MAX/SW_MAX_/' \ + -e 's/atomic_t/int/' \ + -e 's/__kernel_long_t/long/' \ + -e 's/__kernel_ulong_t/unsigned long/' \ + -e 's/struct ethhdr/struct eth_header/' \ + -e '/\#define _LINUX_ETHTOOL_H/a \\n\#include "net/eth.h"' \ + "$f" > "$to/$header"; +} + +# This will pick up non-directories too (eg "Kconfig") but we will +# ignore them in the next loop. +ARCHLIST=$(cd "$linux/arch" && echo *) + +for arch in $ARCHLIST; do + # Discard anything which isn't a KVM-supporting architecture + if ! [ -e "$linux/arch/$arch/include/asm/kvm.h" ] && + ! [ -e "$linux/arch/$arch/include/uapi/asm/kvm.h" ] ; then + continue + fi + + if [ "$arch" = x86 ]; then + arch_var=SRCARCH + else + arch_var=ARCH + fi + + make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do + cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" + done + + if [ $arch = mips ]; then + cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" + cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" + cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" + cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" + fi + if [ $arch = powerpc ]; then + cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" + cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" + fi + + rm -rf "$output/include/standard-headers/asm-$arch" + mkdir -p "$output/include/standard-headers/asm-$arch" + if [ $arch = s390 ]; then + cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" + cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" + cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" + fi + if [ $arch = arm ]; then + cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" + cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" + cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + fi + if [ $arch = arm64 ]; then + cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" + fi + if [ $arch = x86 ]; then + cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" + cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" + cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" + cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ + "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" + cp_portable "$tmpdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" + fi +done + +rm -rf "$output/linux-headers/linux" +mkdir -p "$output/linux-headers/linux" +for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h mman.h; do + cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" +done + +rm -rf "$output/linux-headers/asm-generic" +mkdir -p "$output/linux-headers/asm-generic" +for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do + cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" +done + +if [ -L "$linux/source" ]; then + cp "$linux/source/COPYING" "$output/linux-headers" +else + cp "$linux/COPYING" "$output/linux-headers" +fi + +# Recent kernel sources split the copyright/license info into multiple +# files, which we need to copy. This set of licenses is the set that +# are referred to by SPDX lines in the headers we currently copy. +# We don't copy the Documentation/process/license-rules.rst which +# is also referred to by COPYING, since it's explanatory rather than license. +if [ -d "$linux/LICENSES" ]; then + mkdir -p "$output/linux-headers/LICENSES/preferred" \ + "$output/linux-headers/LICENSES/exceptions" + for l in preferred/GPL-2.0 preferred/BSD-2-Clause preferred/BSD-3-Clause \ + exceptions/Linux-syscall-note; do + cp "$linux/LICENSES/$l" "$output/linux-headers/LICENSES/$l" + done +fi + +cat <<EOF >$output/linux-headers/linux/virtio_config.h +#include "standard-headers/linux/virtio_config.h" +EOF +cat <<EOF >$output/linux-headers/linux/virtio_ring.h +#include "standard-headers/linux/virtio_ring.h" +EOF +cat <<EOF >$output/linux-headers/linux/vhost_types.h +#include "standard-headers/linux/vhost_types.h" +EOF + +rm -rf "$output/include/standard-headers/linux" +mkdir -p "$output/include/standard-headers/linux" +for i in "$tmpdir"/include/linux/*virtio*.h \ + "$tmpdir/include/linux/qemu_fw_cfg.h" \ + "$tmpdir/include/linux/fuse.h" \ + "$tmpdir/include/linux/input.h" \ + "$tmpdir/include/linux/input-event-codes.h" \ + "$tmpdir/include/linux/udmabuf.h" \ + "$tmpdir/include/linux/pci_regs.h" \ + "$tmpdir/include/linux/ethtool.h" \ + "$tmpdir/include/linux/const.h" \ + "$tmpdir/include/linux/kernel.h" \ + "$tmpdir/include/linux/vhost_types.h" \ + "$tmpdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" +done +mkdir -p "$output/include/standard-headers/drm" +cp_portable "$tmpdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" + +rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma" +mkdir -p "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma" + +# Remove the unused functions from pvrdma_verbs.h avoiding the unnecessary +# import of several infiniband/networking/other headers +tmp_pvrdma_verbs="$tmpdir/pvrdma_verbs.h" +# Parse the entire file instead of single lines to match +# function declarations expanding over multiple lines +# and strip the declarations starting with pvrdma prefix. +sed -e '1h;2,$H;$!d;g' -e 's/[^};]*pvrdma[^(| ]*([^)]*);//g' \ + "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h" > \ + "$tmp_pvrdma_verbs"; + +for i in "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h" \ + "$tmp_pvrdma_verbs"; do \ + cp_portable "$i" \ + "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/" +done + +rm -rf "$output/include/standard-headers/rdma/" +mkdir -p "$output/include/standard-headers/rdma/" +for i in "$tmpdir/include/rdma/vmw_pvrdma-abi.h"; do + cp_portable "$i" \ + "$output/include/standard-headers/rdma/" +done + +cat <<EOF >$output/include/standard-headers/linux/types.h +/* For QEMU all types are already defined via osdep.h, so this + * header does not need to do anything. + */ +EOF +cat <<EOF >$output/include/standard-headers/linux/if_ether.h +#define ETH_ALEN 6 +EOF + +rm -rf "$tmpdir" diff --git a/scripts/update-mips-syscall-args.sh b/scripts/update-mips-syscall-args.sh new file mode 100755 index 000000000..5a529b699 --- /dev/null +++ b/scripts/update-mips-syscall-args.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +URL=https://raw.githubusercontent.com/strace/strace/master/src +FILES="sysent.h sysent_shorthand_defs.h linux/mips/syscallent-compat.h \ + linux/mips/syscallent-o32.h linux/32/syscallent-common-32.h \ + linux/generic/syscallent-common.h" + +output="$1" +if [ "$output" = "" ] ; then + output="$PWD" +fi + +INC=linux-user/mips/syscall-args-o32.c.inc + +TMP=$(mktemp -d) +cd $TMP + +for file in $FILES; do + curl --create-dirs $URL/$file -o $TMP/$file +done + +> linux/generic/subcallent.h +> linux/32/subcallent.h + +cat > gen_mips_o32.c <<EOF +#include <stdio.h> + +#define LINUX_MIPSO32 +#define MAX_ARGS 7 + +#include "sysent.h" +#include "sysent_shorthand_defs.h" + +#define SEN(syscall_name) 0,0 +const struct_sysent sysent0[] = { +#include "syscallent-o32.h" +}; + +int main(void) +{ + int i; + + for (i = 4000; i < sizeof(sysent0) / sizeof(struct_sysent); i++) { + if (sysent0[i].sys_name == NULL) { + printf(" [% 4d] = MIPS_SYSCALL_NUMBER_UNUSED,\n", i - 4000); + } else { + printf(" [% 4d] = %d, /* %s */\n", i - 4000, + sysent0[i].nargs, sysent0[i].sys_name); + } + } + + return 0; +} +EOF + +cc -o gen_mips_o32 -I linux/mips -I linux/generic gen_mips_o32.c && ./gen_mips_o32 > "$output/$INC" + +rm -fr "$TMP" diff --git a/scripts/update-syscalltbl.sh b/scripts/update-syscalltbl.sh new file mode 100755 index 000000000..2d23e5680 --- /dev/null +++ b/scripts/update-syscalltbl.sh @@ -0,0 +1,49 @@ +TBL_LIST="\ +arch/alpha/kernel/syscalls/syscall.tbl,linux-user/alpha/syscall.tbl \ +arch/arm/tools/syscall.tbl,linux-user/arm/syscall.tbl \ +arch/m68k/kernel/syscalls/syscall.tbl,linux-user/m68k/syscall.tbl \ +arch/microblaze/kernel/syscalls/syscall.tbl,linux-user/microblaze/syscall.tbl \ +arch/mips/kernel/syscalls/syscall_n32.tbl,linux-user/mips64/syscall_n32.tbl \ +arch/mips/kernel/syscalls/syscall_n64.tbl,linux-user/mips64/syscall_n64.tbl \ +arch/mips/kernel/syscalls/syscall_o32.tbl,linux-user/mips/syscall_o32.tbl \ +arch/parisc/kernel/syscalls/syscall.tbl,linux-user/hppa/syscall.tbl \ +arch/powerpc/kernel/syscalls/syscall.tbl,linux-user/ppc/syscall.tbl \ +arch/s390/kernel/syscalls/syscall.tbl,linux-user/s390x/syscall.tbl \ +arch/sh/kernel/syscalls/syscall.tbl,linux-user/sh4/syscall.tbl \ +arch/sparc/kernel/syscalls/syscall.tbl,linux-user/sparc64/syscall.tbl \ +arch/sparc/kernel/syscalls/syscall.tbl,linux-user/sparc/syscall.tbl \ +arch/x86/entry/syscalls/syscall_32.tbl,linux-user/i386/syscall_32.tbl \ +arch/x86/entry/syscalls/syscall_64.tbl,linux-user/x86_64/syscall_64.tbl \ +arch/xtensa/kernel/syscalls/syscall.tbl,linux-user/xtensa/syscall.tbl\ +" + +linux="$1" +output="$2" + +if [ -z "$linux" ] || ! [ -d "$linux" ]; then + cat << EOF +usage: update-syscalltbl.sh LINUX_PATH [OUTPUT_PATH] + +LINUX_PATH Linux kernel directory to obtain the syscall.tbl from +OUTPUT_PATH output directory, usually the qemu source tree (default: $PWD) +EOF + exit 1 +fi + +if [ -z "$output" ]; then + output="$PWD" +fi + +for entry in $TBL_LIST; do + OFS="$IFS" + IFS=, + set $entry + src=$1 + dst=$2 + IFS="$OFS" + if ! cp "$linux/$src" "$output/$dst" ; then + echo "Cannot copy $linux/$src to $output/$dst" 1>&2 + exit 1 + fi +done + diff --git a/scripts/userfaultfd-wrlat.py b/scripts/userfaultfd-wrlat.py new file mode 100755 index 000000000..0684be4e0 --- /dev/null +++ b/scripts/userfaultfd-wrlat.py @@ -0,0 +1,122 @@ +#!/usr/bin/python3 +# +# userfaultfd-wrlat Summarize userfaultfd write fault latencies. +# Events are continuously accumulated for the +# run, while latency distribution histogram is +# dumped each 'interval' seconds. +# +# For Linux, uses BCC, eBPF. +# +# USAGE: userfaultfd-lat [interval [count]] +# +# Copyright Virtuozzo GmbH, 2020 +# +# Authors: +# Andrey Gruzdev <andrey.gruzdev@virtuozzo.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +from __future__ import print_function +from bcc import BPF +from ctypes import c_ushort, c_int, c_ulonglong +from time import sleep +from sys import argv + +def usage(): + print("USAGE: %s [interval [count]]" % argv[0]) + exit() + +# define BPF program +bpf_text = """ +#include <uapi/linux/ptrace.h> +#include <linux/mm.h> + +BPF_HASH(ev_start, u32, u64); +BPF_HISTOGRAM(ev_delta_hist, u64); + +/* Trace UFFD page fault start event. */ +static void do_event_start() +{ + /* Using "(u32)" to drop group ID which is upper 32 bits */ + u32 tid = (u32) bpf_get_current_pid_tgid(); + u64 ts = bpf_ktime_get_ns(); + + ev_start.update(&tid, &ts); +} + +/* Trace UFFD page fault end event. */ +static void do_event_end() +{ + /* Using "(u32)" to drop group ID which is upper 32 bits */ + u32 tid = (u32) bpf_get_current_pid_tgid(); + u64 ts = bpf_ktime_get_ns(); + u64 *tsp; + + tsp = ev_start.lookup(&tid); + if (tsp) { + u64 delta = ts - (*tsp); + /* Transform time delta to milliseconds */ + ev_delta_hist.increment(bpf_log2l(delta / 1000000)); + ev_start.delete(&tid); + } +} + +/* KPROBE for handle_userfault(). */ +int probe_handle_userfault(struct pt_regs *ctx, struct vm_fault *vmf, + unsigned long reason) +{ + /* Trace only UFFD write faults. */ + if (reason & VM_UFFD_WP) { + do_event_start(); + } + return 0; +} + +/* KRETPROBE for handle_userfault(). */ +int retprobe_handle_userfault(struct pt_regs *ctx) +{ + do_event_end(); + return 0; +} +""" + +# arguments +interval = 10 +count = -1 +if len(argv) > 1: + try: + interval = int(argv[1]) + if interval == 0: + raise + if len(argv) > 2: + count = int(argv[2]) + except: # also catches -h, --help + usage() + +# load BPF program +b = BPF(text=bpf_text) +# attach KRPOBEs +b.attach_kprobe(event="handle_userfault", fn_name="probe_handle_userfault") +b.attach_kretprobe(event="handle_userfault", fn_name="retprobe_handle_userfault") + +# header +print("Tracing UFFD-WP write fault latency... Hit Ctrl-C to end.") + +# output +loop = 0 +do_exit = 0 +while (1): + if count > 0: + loop += 1 + if loop > count: + exit() + try: + sleep(interval) + except KeyboardInterrupt: + pass; do_exit = 1 + + print() + b["ev_delta_hist"].print_log2_hist("msecs") + if do_exit: + exit() diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py new file mode 100755 index 000000000..539ead62b --- /dev/null +++ b/scripts/vmstate-static-checker.py @@ -0,0 +1,432 @@ +#!/usr/bin/env python3 +# +# Compares vmstate information stored in JSON format, obtained from +# the -dump-vmstate QEMU command. +# +# Copyright 2014 Amit Shah <amit.shah@redhat.com> +# Copyright 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, see <http://www.gnu.org/licenses/>. + +import argparse +import json +import sys + +# Count the number of errors found +taint = 0 + +def bump_taint(): + global taint + + # Ensure we don't wrap around or reset to 0 -- the shell only has + # an 8-bit return value. + if taint < 255: + taint = taint + 1 + + +def check_fields_match(name, s_field, d_field): + if s_field == d_field: + return True + + # Some fields changed names between qemu versions. This list + # is used to whitelist such changes in each section / description. + changed_names = { + 'apic': ['timer', 'timer_expiry'], + 'e1000': ['dev', 'parent_obj'], + 'ehci': ['dev', 'pcidev'], + 'I440FX': ['dev', 'parent_obj'], + 'ich9_ahci': ['card', 'parent_obj'], + 'ich9-ahci': ['ahci', 'ich9_ahci'], + 'ioh3420': ['PCIDevice', 'PCIEDevice'], + 'ioh-3240-express-root-port': ['port.br.dev', + 'parent_obj.parent_obj.parent_obj', + 'port.br.dev.exp.aer_log', + 'parent_obj.parent_obj.parent_obj.exp.aer_log'], + 'cirrus_vga': ['hw_cursor_x', 'vga.hw_cursor_x', + 'hw_cursor_y', 'vga.hw_cursor_y'], + 'lsiscsi': ['dev', 'parent_obj'], + 'mch': ['d', 'parent_obj'], + 'pci_bridge': ['bridge.dev', 'parent_obj', 'bridge.dev.shpc', 'shpc'], + 'pcnet': ['pci_dev', 'parent_obj'], + 'PIIX3': ['pci_irq_levels', 'pci_irq_levels_vmstate'], + 'piix4_pm': ['dev', 'parent_obj', 'pci0_status', + 'acpi_pci_hotplug.acpi_pcihp_pci_status[0x0]', + 'pm1a.sts', 'ar.pm1.evt.sts', 'pm1a.en', 'ar.pm1.evt.en', + 'pm1_cnt.cnt', 'ar.pm1.cnt.cnt', + 'tmr.timer', 'ar.tmr.timer', + 'tmr.overflow_time', 'ar.tmr.overflow_time', + 'gpe', 'ar.gpe'], + 'rtl8139': ['dev', 'parent_obj'], + 'qxl': ['num_surfaces', 'ssd.num_surfaces'], + 'usb-ccid': ['abProtocolDataStructure', 'abProtocolDataStructure.data'], + 'usb-host': ['dev', 'parent_obj'], + 'usb-mouse': ['usb-ptr-queue', 'HIDPointerEventQueue'], + 'usb-tablet': ['usb-ptr-queue', 'HIDPointerEventQueue'], + 'vmware_vga': ['card', 'parent_obj'], + 'vmware_vga_internal': ['depth', 'new_depth'], + 'xhci': ['pci_dev', 'parent_obj'], + 'x3130-upstream': ['PCIDevice', 'PCIEDevice'], + 'xio3130-express-downstream-port': ['port.br.dev', + 'parent_obj.parent_obj.parent_obj', + 'port.br.dev.exp.aer_log', + 'parent_obj.parent_obj.parent_obj.exp.aer_log'], + 'xio3130-downstream': ['PCIDevice', 'PCIEDevice'], + 'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj', + 'br.dev.exp.aer_log', + 'parent_obj.parent_obj.exp.aer_log'], + 'spapr_pci': ['dma_liobn[0]', 'mig_liobn', + 'mem_win_addr', 'mig_mem_win_addr', + 'mem_win_size', 'mig_mem_win_size', + 'io_win_addr', 'mig_io_win_addr', + 'io_win_size', 'mig_io_win_size'], + } + + if not name in changed_names: + return False + + if s_field in changed_names[name] and d_field in changed_names[name]: + return True + + return False + +def get_changed_sec_name(sec): + # Section names can change -- see commit 292b1634 for an example. + changes = { + "ICH9 LPC": "ICH9-LPC", + "e1000-82540em": "e1000", + } + + for item in changes: + if item == sec: + return changes[item] + if changes[item] == sec: + return item + return "" + +def exists_in_substruct(fields, item): + # Some QEMU versions moved a few fields inside a substruct. This + # kept the on-wire format the same. This function checks if + # something got shifted inside a substruct. For example, the + # change in commit 1f42d22233b4f3d1a2933ff30e8d6a6d9ee2d08f + + if not "Description" in fields: + return False + + if not "Fields" in fields["Description"]: + return False + + substruct_fields = fields["Description"]["Fields"] + + if substruct_fields == []: + return False + + return check_fields_match(fields["Description"]["name"], + substruct_fields[0]["field"], item) + + +def check_fields(src_fields, dest_fields, desc, sec): + # This function checks for all the fields in a section. If some + # fields got embedded into a substruct, this function will also + # attempt to check inside the substruct. + + d_iter = iter(dest_fields) + s_iter = iter(src_fields) + + # Using these lists as stacks to store previous value of s_iter + # and d_iter, so that when time comes to exit out of a substruct, + # we can go back one level up and continue from where we left off. + + s_iter_list = [] + d_iter_list = [] + + advance_src = True + advance_dest = True + unused_count = 0 + + while True: + if advance_src: + try: + s_item = next(s_iter) + except StopIteration: + if s_iter_list == []: + break + + s_iter = s_iter_list.pop() + continue + else: + if unused_count == 0: + # We want to avoid advancing just once -- when entering a + # dest substruct, or when exiting one. + advance_src = True + + if advance_dest: + try: + d_item = next(d_iter) + except StopIteration: + if d_iter_list == []: + # We were not in a substruct + print("Section \"" + sec + "\",", end=' ') + print("Description " + "\"" + desc + "\":", end=' ') + print("expected field \"" + s_item["field"] + "\",", end=' ') + print("while dest has no further fields") + bump_taint() + break + + d_iter = d_iter_list.pop() + advance_src = False + continue + else: + if unused_count == 0: + advance_dest = True + + if unused_count != 0: + if advance_dest == False: + unused_count = unused_count - s_item["size"] + if unused_count == 0: + advance_dest = True + continue + if unused_count < 0: + print("Section \"" + sec + "\",", end=' ') + print("Description \"" + desc + "\":", end=' ') + print("unused size mismatch near \"", end=' ') + print(s_item["field"] + "\"") + bump_taint() + break + continue + + if advance_src == False: + unused_count = unused_count - d_item["size"] + if unused_count == 0: + advance_src = True + continue + if unused_count < 0: + print("Section \"" + sec + "\",", end=' ') + print("Description \"" + desc + "\":", end=' ') + print("unused size mismatch near \"", end=' ') + print(d_item["field"] + "\"") + bump_taint() + break + continue + + if not check_fields_match(desc, s_item["field"], d_item["field"]): + # Some fields were put in substructs, keeping the + # on-wire format the same, but breaking static tools + # like this one. + + # First, check if dest has a new substruct. + if exists_in_substruct(d_item, s_item["field"]): + # listiterators don't have a prev() function, so we + # have to store our current location, descend into the + # substruct, and ensure we come out as if nothing + # happened when the substruct is over. + # + # Essentially we're opening the substructs that got + # added which didn't change the wire format. + d_iter_list.append(d_iter) + substruct_fields = d_item["Description"]["Fields"] + d_iter = iter(substruct_fields) + advance_src = False + continue + + # Next, check if src has substruct that dest removed + # (can happen in backward migration: 2.0 -> 1.5) + if exists_in_substruct(s_item, d_item["field"]): + s_iter_list.append(s_iter) + substruct_fields = s_item["Description"]["Fields"] + s_iter = iter(substruct_fields) + advance_dest = False + continue + + if s_item["field"] == "unused" or d_item["field"] == "unused": + if s_item["size"] == d_item["size"]: + continue + + if d_item["field"] == "unused": + advance_dest = False + unused_count = d_item["size"] - s_item["size"] + continue + + if s_item["field"] == "unused": + advance_src = False + unused_count = s_item["size"] - d_item["size"] + continue + + print("Section \"" + sec + "\",", end=' ') + print("Description \"" + desc + "\":", end=' ') + print("expected field \"" + s_item["field"] + "\",", end=' ') + print("got \"" + d_item["field"] + "\"; skipping rest") + bump_taint() + break + + check_version(s_item, d_item, sec, desc) + + if not "Description" in s_item: + # Check size of this field only if it's not a VMSTRUCT entry + check_size(s_item, d_item, sec, desc, s_item["field"]) + + check_description_in_list(s_item, d_item, sec, desc) + + +def check_subsections(src_sub, dest_sub, desc, sec): + for s_item in src_sub: + found = False + for d_item in dest_sub: + if s_item["name"] != d_item["name"]: + continue + + found = True + check_descriptions(s_item, d_item, sec) + + if not found: + print("Section \"" + sec + "\", Description \"" + desc + "\":", end=' ') + print("Subsection \"" + s_item["name"] + "\" not found") + bump_taint() + + +def check_description_in_list(s_item, d_item, sec, desc): + if not "Description" in s_item: + return + + if not "Description" in d_item: + print("Section \"" + sec + "\", Description \"" + desc + "\",", end=' ') + print("Field \"" + s_item["field"] + "\": missing description") + bump_taint() + return + + check_descriptions(s_item["Description"], d_item["Description"], sec) + + +def check_descriptions(src_desc, dest_desc, sec): + check_version(src_desc, dest_desc, sec, src_desc["name"]) + + if not check_fields_match(sec, src_desc["name"], dest_desc["name"]): + print("Section \"" + sec + "\":", end=' ') + print("Description \"" + src_desc["name"] + "\"", end=' ') + print("missing, got \"" + dest_desc["name"] + "\" instead; skipping") + bump_taint() + return + + for f in src_desc: + if not f in dest_desc: + print("Section \"" + sec + "\"", end=' ') + print("Description \"" + src_desc["name"] + "\":", end=' ') + print("Entry \"" + f + "\" missing") + bump_taint() + continue + + if f == 'Fields': + check_fields(src_desc[f], dest_desc[f], src_desc["name"], sec) + + if f == 'Subsections': + check_subsections(src_desc[f], dest_desc[f], src_desc["name"], sec) + + +def check_version(s, d, sec, desc=None): + if s["version_id"] > d["version_id"]: + print("Section \"" + sec + "\"", end=' ') + if desc: + print("Description \"" + desc + "\":", end=' ') + print("version error:", s["version_id"], ">", d["version_id"]) + bump_taint() + + if not "minimum_version_id" in d: + return + + if s["version_id"] < d["minimum_version_id"]: + print("Section \"" + sec + "\"", end=' ') + if desc: + print("Description \"" + desc + "\":", end=' ') + print("minimum version error:", s["version_id"], "<", end=' ') + print(d["minimum_version_id"]) + bump_taint() + + +def check_size(s, d, sec, desc=None, field=None): + if s["size"] != d["size"]: + print("Section \"" + sec + "\"", end=' ') + if desc: + print("Description \"" + desc + "\"", end=' ') + if field: + print("Field \"" + field + "\"", end=' ') + print("size mismatch:", s["size"], ",", d["size"]) + bump_taint() + + +def check_machine_type(s, d): + if s["Name"] != d["Name"]: + print("Warning: checking incompatible machine types:", end=' ') + print("\"" + s["Name"] + "\", \"" + d["Name"] + "\"") + return + + +def main(): + help_text = "Parse JSON-formatted vmstate dumps from QEMU in files SRC and DEST. Checks whether migration from SRC to DEST QEMU versions would break based on the VMSTATE information contained within the JSON outputs. The JSON output is created from a QEMU invocation with the -dump-vmstate parameter and a filename argument to it. Other parameters to QEMU do not matter, except the -M (machine type) parameter." + + parser = argparse.ArgumentParser(description=help_text) + parser.add_argument('-s', '--src', type=argparse.FileType('r'), + required=True, + help='json dump from src qemu') + parser.add_argument('-d', '--dest', type=argparse.FileType('r'), + required=True, + help='json dump from dest qemu') + parser.add_argument('--reverse', required=False, default=False, + action='store_true', + help='reverse the direction') + args = parser.parse_args() + + src_data = json.load(args.src) + dest_data = json.load(args.dest) + args.src.close() + args.dest.close() + + if args.reverse: + temp = src_data + src_data = dest_data + dest_data = temp + + for sec in src_data: + dest_sec = sec + if not dest_sec in dest_data: + # Either the section name got changed, or the section + # doesn't exist in dest. + dest_sec = get_changed_sec_name(sec) + if not dest_sec in dest_data: + print("Section \"" + sec + "\" does not exist in dest") + bump_taint() + continue + + s = src_data[sec] + d = dest_data[dest_sec] + + if sec == "vmschkmachine": + check_machine_type(s, d) + continue + + check_version(s, d, sec) + + for entry in s: + if not entry in d: + print("Section \"" + sec + "\": Entry \"" + entry + "\"", end=' ') + print("missing") + bump_taint() + continue + + if entry == "Description": + check_descriptions(s[entry], d[entry], sec) + + return taint + + +if __name__ == '__main__': + sys.exit(main()) |