diff --git a/srcs/buildtool/microlibs_process.go b/srcs/buildtool/microlibs_process.go index 86b3b11..ff8eed9 100644 --- a/srcs/buildtool/microlibs_process.go +++ b/srcs/buildtool/microlibs_process.go @@ -203,10 +203,7 @@ func matchLibs(unikraftLibs string, data *u.Data) ([]string, map[string]string, } dataMap := putJsonSymbolsTogether(data) - //matchedLibs = append(matchedLibs, POSIXPROCESS) - //matchedLibs = append(matchedLibs, POSIXUSER) - //matchedLibs = append(matchedLibs, POSIXSYSINFO) - //matchedLibs = append(matchedLibs, POSIXLIBDL) + // Perform the symbol matching matchedLibs = matchSymbols(matchedLibs, dataMap, mapSymbols) diff --git a/srcs/dependtool/parserClang.py b/srcs/dependtool/parserClang.py new file mode 100755 index 0000000..502ed2c --- /dev/null +++ b/srcs/dependtool/parserClang.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +#--------------------------------------------------------------------- +# (*) Installation: +# +# pip3 install clang +# +# cd /usr/lib/x86_64-linux-gnu/ +# sudo ln -s libclang-X.Y.so.1 libclang-14.so (X.Y the version number) +# +# (*) Run: +# +# python3 parserClang.py [includepathsfile] +# +# where filepath can be a repository/folder or a file (c/cpp/h/hpp) +# +# +# Gaulthier Gain +# License: BSD +#--------------------------------------------------------------------- + +import getopt +import os +import sys +import json +import clang.cindex +import clang +import platform +from clang.cindex import CursorKind +from collections import Counter + +verbose = False # Change it to verbose mode + +global_funcs = Counter() +global_calls = Counter() + +silent_flag = False + +# Check if a path is a directory or a file +def check_input_path(path, includePaths): + if os.path.isdir(path): + iterate_root_folder(path, includePaths) + elif os.path.isfile(path): + check_type_file(path, includePaths) + else: + print("Unable to analyse this file") + exit(1) + +def get_include_paths(rootdir, includepathsFile): + paths = [] + with open(includepathsFile, 'r') as file: + for includePath in file.readlines(): + path = '-isystem ' + rootdir + includePath.replace('\n', '') + paths.append(path) + + return ' '.join(paths) + +# Check type/exenstion of a given file +def check_type_file(filepath, includePaths): + cplusplusOptions = '-x c++ --std=c++11' + cOptions = '' + + if includePaths is not None: + cplusplusOptions = cplusplusOptions + ' ' + includePaths + cOptions = cOptions + ' ' + includePaths + if silent_flag is False: + print("Gathering symbols of " + filepath) + if filepath.endswith(".cpp") or filepath.endswith(".hpp"): + parse_file(filepath, cplusplusOptions) + elif filepath.endswith(".c") or filepath.endswith(".h"): + parse_file(filepath, cOptions) + +# Iterate through a root folder +def iterate_root_folder(rootdir, includePaths): + for subdir, dirs, files in os.walk(rootdir): + for file in files: + filepath = subdir + os.sep + file + check_type_file(filepath, includePaths) + +# Print info about symbols (verbose mode) +def display_info_function(funcs, calls): + for f in funcs: + print(fully_qualified(f), f.location) + for c in calls: + if is_function_call(f, c): + print('-', c.location) + print() + +# Parse a given file to generate a AST +def parse_file(filepath, arguments): + + idx = clang.cindex.Index.create() + args = arguments.split() + tu = idx.parse(filepath, args=args) + funcs, calls = find_funcs_and_calls(tu) + if verbose: + display_info_function(funcs, calls) + print(list(tu.diagnostics)) + + +# Retrieve a fully qualified function name (with namespaces) +def fully_qualified(c): + if c is None: + return '' + elif c.kind == CursorKind.TRANSLATION_UNIT: + return '' + else: + res = fully_qualified(c.semantic_parent) + if res != '': + return res + '::' + c.spelling + return c.spelling + +# Determine where a call-expression cursor refers to a particular +# function declaration +def is_function_call(funcdecl, c): + defn = c.get_definition() + return (defn is not None) and (defn == funcdecl) + +# Filter name to take only the function name (remove "(args)") +def filter_func_name(displayname): + if "(" in displayname: + funcName = displayname.split('(')[0] + else: + funcName = displayname + return funcName + +# Retrieve lists of function declarations and call expressions in a +#translation unit +def find_funcs_and_calls(tu): + filename = tu.cursor.spelling + calls = [] + funcs = [] + for c in tu.cursor.walk_preorder(): + if c.kind == CursorKind.CALL_EXPR: + calls.append(c) + # filter name to take only the name if necessary + funcName = filter_func_name(c.displayname) + global_calls[funcName] += 1 + elif c.kind == CursorKind.FUNCTION_DECL: + funcs.append(c) + # filter name to take only the name if necessary + funcName = filter_func_name(c.displayname) + global_funcs[funcName] += 1 + return funcs, calls + +# Write data to json file +def write_to_json(output_filename, data): + with open(output_filename + '.json', 'w') as fp: + json.dump(data, fp, indent=4, sort_keys=True) + +# Open data to json file +def read_from_json(filename): + with open(output_filename + '.json', 'r') as fp: + data = json.load(fp) + return data + +# Read the list of syscalls (text file) +def read_syscalls_list(filename): + syscalls = set() + with open(filename) as f: + for line in f: + syscalls.add(line.strip()) + return syscalls + +# Check which syscall is called +def compare_syscalls(syscalls): + if silent_flag is False: + print("Gathered syscalls from function calls:") + + return [key for key in global_calls.keys() if key not in syscalls] + + + +# Main function +def main(): + optlist, args = getopt.getopt(sys.argv[1:], "o:qvt") + input_file_names = None + includepathsFile = None + output_file_name = None + textFormat = False + for opt in optlist: + if opt[0] == "-i": + includepathFile = opt[1] + if opt[0] == "-o": + output_file_name = opt[1] + if opt[0] == "-q": + global silent_flag + silent_flag = True + if opt[0] == "-v": + global verbose + verbose = True + if opt[0] == "-t": + textFormat = True + + + + input_file_names = args + if len(input_file_names) == 0: + if silent_flag is False: + print("No input files supplied") + exit(1) + if includepathsFile is not None: + includePaths = get_include_paths(input_file_name, includepathsFile) + for input_file_name in input_file_names: + check_input_path(input_file_name, includePaths) + else: + for input_file_name in input_file_names: + check_input_path(input_file_name, None) + + if silent_flag is False: + print("---------------------------------------------------------") + + if textFormat: + i = 0 + for key,value in global_funcs.items(): + if i < len(global_funcs.items())-1: + print(key, end=',') + else: + print(key) + i = i + 1 + else: + # Dump function declarations and calls to json + output_dikt = { + 'functions':'', + 'calls':'' + } + output_dikt['functions'] = [{'name':key, 'value':value} for key,value in + global_funcs.items()] + output_dikt['calls'] = [{'name':key, 'value':value} for key,value in global_calls.items()] + if includepathsFile is not None: + # Read syscalls from txt file + all_syscalls = read_syscalls_list('syscall_list.txt') + called_syscalls = compare_syscalls(all_syscalls) + output_dikt['syscalls'] = called_syscalls + + output_file = sys.stdout + json.dump(output_dikt, output_file) + + +if __name__== "__main__": + if platform.system() == "Darwin": + clang.cindex.Config.set_library_file + ("/Applications/Xcode.app/Contents/Frameworks/libclang.dylib") + main() diff --git a/srcs/dependtool/static_analyser.go b/srcs/dependtool/static_analyser.go index 6790e59..bc90a41 100644 --- a/srcs/dependtool/static_analyser.go +++ b/srcs/dependtool/static_analyser.go @@ -11,6 +11,9 @@ import ( "debug/elf" "fmt" "os" + "os/exec" + "path/filepath" + "strings" u "tools/srcs/common" ) @@ -191,12 +194,106 @@ func executeDependAptCache(programName string, data *u.StaticData, return nil } +// findSourcesFiles puts together all C/C++ source files found in a given application folder. +// +// It returns a slice containing the found source file names and an error if any. Otherwise, it +// returns nil. +func findSourcesFiles(workspace string) ([]string, error) { + + var filenames []string + + err := filepath.Walk(workspace, + func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + ext := filepath.Ext(info.Name()) + if ext == ".c" || ext == ".cpp" { + filenames = append(filenames, path) + } + return nil + }) + if err != nil { + return nil, err + } + return filenames, nil +} + +// TODO REPLACE +// ExecuteCommand a single command without displaying the output. +// +// It returns a string which represents stdout and an error if any, otherwise +// it returns nil. +func ExecuteCommand(command string, arguments []string) (string, error) { + out, err := exec.Command(command, arguments...).CombinedOutput() + if err != nil { + return "", err + } + return string(out), nil +} + +// addSourceFileSymbols adds all the symbols present in 'output' to the static data field in +// 'data'. +func addSourceFileSymbols(output string, data *u.Data) { + outputTab := strings.Split(output, ",") + + // Get the list of system calls + systemCalls := initSystemCalls() + + for _, s := range outputTab { + if _, isSyscall := systemCalls[s]; isSyscall { + data.StaticData.SystemCalls[s] = systemCalls[s] + } else { + data.StaticData.Symbols[s] = "" + } + } +} + +// extractPrototype executes the parserClang.py script on each source file to extracts all possible +// symbols of each of these files. +// +// It returns an error if any, otherwise it returns nil. +func extractPrototype(sourcesFiltered []string, data *u.Data) error { + + for _, f := range sourcesFiltered { + script := filepath.Join(os.Getenv("GOPATH"), "src", "tools", "srcs", "dependtool", + "parserClang.py") + output, err := ExecuteCommand("python3", []string{script, "-q", "-t", f}) + if err != nil { + u.PrintWarning("Incomplete analysis with file " + f) + continue + } + addSourceFileSymbols(output, data) + } + return nil +} + +// gatherSourceFileSymbols gathers symbols of source files from a given application folder. +// +// It returns an error if any, otherwise it returns nil. +func gatherSourceFileSymbols(data *u.Data, programPath string) error { + + tmp := strings.Split(programPath, "/") + folderPath := strings.Join(tmp[:len(tmp)-1], "/") + + files, err := findSourcesFiles(folderPath) + if err != nil { + u.PrintErr(err) + } + + if err := extractPrototype(files, data); err != nil { + u.PrintErr(err) + } + return nil +} + // -------------------------------------Run------------------------------------- // staticAnalyser runs the static analysis to get shared libraries, // system calls and library calls of a given application. -// -func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments, data *u.Data, programPath string) { +func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments, data *u.Data, + programPath string) { programName := *args.StringArg[programArg] fullDeps := *args.BoolArg[fullDepsArg] @@ -240,9 +337,16 @@ func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments } } + // Detect symbols from source files + u.PrintHeader2("(*) Gathering symbols from source files") + if err := gatherSourceFileSymbols(data, programPath); err != nil { + u.PrintWarning(err) + } + // Detect symbols from shared libraries if fullStaticAnalysis && isLinux { - u.PrintHeader2("(*) Gathering symbols and system calls of shared libraries from binary file") + u.PrintHeader2("(*) Gathering symbols and system calls of shared libraries from binary" + + "file") for key, path := range staticData.SharedLibs { if len(path) > 0 { fmt.Printf("\t-> Analysing %s - %s\n", key, path[0])