dependtool first improvement

This commit is contained in:
Rob1103 2022-11-05 16:53:12 +01:00
parent 7f26ca5d3d
commit 6d88b02718
3 changed files with 351 additions and 7 deletions

View file

@ -203,10 +203,7 @@ func matchLibs(unikraftLibs string, data *u.Data) ([]string, map[string]string,
} }
dataMap := putJsonSymbolsTogether(data) dataMap := putJsonSymbolsTogether(data)
//matchedLibs = append(matchedLibs, POSIXPROCESS)
//matchedLibs = append(matchedLibs, POSIXUSER)
//matchedLibs = append(matchedLibs, POSIXSYSINFO)
//matchedLibs = append(matchedLibs, POSIXLIBDL)
// Perform the symbol matching // Perform the symbol matching
matchedLibs = matchSymbols(matchedLibs, dataMap, mapSymbols) matchedLibs = matchSymbols(matchedLibs, dataMap, mapSymbols)

243
srcs/dependtool/parserClang.py Executable file
View file

@ -0,0 +1,243 @@
#!/usr/bin/env python3
#---------------------------------------------------------------------
# (*) Installation:
#
# pip3 install clang
#
# cd /usr/lib/x86_64-linux-gnu/
# sudo ln -s libclang-X.Y.so.1 libclang-14.so (X.Y the version number)
#
# (*) Run:
#
# python3 parserClang.py <filepath> [includepathsfile]
#
# where filepath can be a repository/folder or a file (c/cpp/h/hpp)
#
#
# Gaulthier Gain <gaulthier.gain@uliege.be>
# License: BSD
#---------------------------------------------------------------------
import getopt
import os
import sys
import json
import clang.cindex
import clang
import platform
from clang.cindex import CursorKind
from collections import Counter
verbose = False # Change it to verbose mode
global_funcs = Counter()
global_calls = Counter()
silent_flag = False
# Check if a path is a directory or a file
def check_input_path(path, includePaths):
if os.path.isdir(path):
iterate_root_folder(path, includePaths)
elif os.path.isfile(path):
check_type_file(path, includePaths)
else:
print("Unable to analyse this file")
exit(1)
def get_include_paths(rootdir, includepathsFile):
paths = []
with open(includepathsFile, 'r') as file:
for includePath in file.readlines():
path = '-isystem ' + rootdir + includePath.replace('\n', '')
paths.append(path)
return ' '.join(paths)
# Check type/exenstion of a given file
def check_type_file(filepath, includePaths):
cplusplusOptions = '-x c++ --std=c++11'
cOptions = ''
if includePaths is not None:
cplusplusOptions = cplusplusOptions + ' ' + includePaths
cOptions = cOptions + ' ' + includePaths
if silent_flag is False:
print("Gathering symbols of " + filepath)
if filepath.endswith(".cpp") or filepath.endswith(".hpp"):
parse_file(filepath, cplusplusOptions)
elif filepath.endswith(".c") or filepath.endswith(".h"):
parse_file(filepath, cOptions)
# Iterate through a root folder
def iterate_root_folder(rootdir, includePaths):
for subdir, dirs, files in os.walk(rootdir):
for file in files:
filepath = subdir + os.sep + file
check_type_file(filepath, includePaths)
# Print info about symbols (verbose mode)
def display_info_function(funcs, calls):
for f in funcs:
print(fully_qualified(f), f.location)
for c in calls:
if is_function_call(f, c):
print('-', c.location)
print()
# Parse a given file to generate a AST
def parse_file(filepath, arguments):
idx = clang.cindex.Index.create()
args = arguments.split()
tu = idx.parse(filepath, args=args)
funcs, calls = find_funcs_and_calls(tu)
if verbose:
display_info_function(funcs, calls)
print(list(tu.diagnostics))
# Retrieve a fully qualified function name (with namespaces)
def fully_qualified(c):
if c is None:
return ''
elif c.kind == CursorKind.TRANSLATION_UNIT:
return ''
else:
res = fully_qualified(c.semantic_parent)
if res != '':
return res + '::' + c.spelling
return c.spelling
# Determine where a call-expression cursor refers to a particular
# function declaration
def is_function_call(funcdecl, c):
defn = c.get_definition()
return (defn is not None) and (defn == funcdecl)
# Filter name to take only the function name (remove "(args)")
def filter_func_name(displayname):
if "(" in displayname:
funcName = displayname.split('(')[0]
else:
funcName = displayname
return funcName
# Retrieve lists of function declarations and call expressions in a
#translation unit
def find_funcs_and_calls(tu):
filename = tu.cursor.spelling
calls = []
funcs = []
for c in tu.cursor.walk_preorder():
if c.kind == CursorKind.CALL_EXPR:
calls.append(c)
# filter name to take only the name if necessary
funcName = filter_func_name(c.displayname)
global_calls[funcName] += 1
elif c.kind == CursorKind.FUNCTION_DECL:
funcs.append(c)
# filter name to take only the name if necessary
funcName = filter_func_name(c.displayname)
global_funcs[funcName] += 1
return funcs, calls
# Write data to json file
def write_to_json(output_filename, data):
with open(output_filename + '.json', 'w') as fp:
json.dump(data, fp, indent=4, sort_keys=True)
# Open data to json file
def read_from_json(filename):
with open(output_filename + '.json', 'r') as fp:
data = json.load(fp)
return data
# Read the list of syscalls (text file)
def read_syscalls_list(filename):
syscalls = set()
with open(filename) as f:
for line in f:
syscalls.add(line.strip())
return syscalls
# Check which syscall is called
def compare_syscalls(syscalls):
if silent_flag is False:
print("Gathered syscalls from function calls:")
return [key for key in global_calls.keys() if key not in syscalls]
# Main function
def main():
optlist, args = getopt.getopt(sys.argv[1:], "o:qvt")
input_file_names = None
includepathsFile = None
output_file_name = None
textFormat = False
for opt in optlist:
if opt[0] == "-i":
includepathFile = opt[1]
if opt[0] == "-o":
output_file_name = opt[1]
if opt[0] == "-q":
global silent_flag
silent_flag = True
if opt[0] == "-v":
global verbose
verbose = True
if opt[0] == "-t":
textFormat = True
input_file_names = args
if len(input_file_names) == 0:
if silent_flag is False:
print("No input files supplied")
exit(1)
if includepathsFile is not None:
includePaths = get_include_paths(input_file_name, includepathsFile)
for input_file_name in input_file_names:
check_input_path(input_file_name, includePaths)
else:
for input_file_name in input_file_names:
check_input_path(input_file_name, None)
if silent_flag is False:
print("---------------------------------------------------------")
if textFormat:
i = 0
for key,value in global_funcs.items():
if i < len(global_funcs.items())-1:
print(key, end=',')
else:
print(key)
i = i + 1
else:
# Dump function declarations and calls to json
output_dikt = {
'functions':'',
'calls':''
}
output_dikt['functions'] = [{'name':key, 'value':value} for key,value in
global_funcs.items()]
output_dikt['calls'] = [{'name':key, 'value':value} for key,value in global_calls.items()]
if includepathsFile is not None:
# Read syscalls from txt file
all_syscalls = read_syscalls_list('syscall_list.txt')
called_syscalls = compare_syscalls(all_syscalls)
output_dikt['syscalls'] = called_syscalls
output_file = sys.stdout
json.dump(output_dikt, output_file)
if __name__== "__main__":
if platform.system() == "Darwin":
clang.cindex.Config.set_library_file
("/Applications/Xcode.app/Contents/Frameworks/libclang.dylib")
main()

View file

@ -11,6 +11,9 @@ import (
"debug/elf" "debug/elf"
"fmt" "fmt"
"os" "os"
"os/exec"
"path/filepath"
"strings"
u "tools/srcs/common" u "tools/srcs/common"
) )
@ -191,12 +194,106 @@ func executeDependAptCache(programName string, data *u.StaticData,
return nil return nil
} }
// findSourcesFiles puts together all C/C++ source files found in a given application folder.
//
// It returns a slice containing the found source file names and an error if any. Otherwise, it
// returns nil.
func findSourcesFiles(workspace string) ([]string, error) {
var filenames []string
err := filepath.Walk(workspace,
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
ext := filepath.Ext(info.Name())
if ext == ".c" || ext == ".cpp" {
filenames = append(filenames, path)
}
return nil
})
if err != nil {
return nil, err
}
return filenames, nil
}
// TODO REPLACE
// ExecuteCommand a single command without displaying the output.
//
// It returns a string which represents stdout and an error if any, otherwise
// it returns nil.
func ExecuteCommand(command string, arguments []string) (string, error) {
out, err := exec.Command(command, arguments...).CombinedOutput()
if err != nil {
return "", err
}
return string(out), nil
}
// addSourceFileSymbols adds all the symbols present in 'output' to the static data field in
// 'data'.
func addSourceFileSymbols(output string, data *u.Data) {
outputTab := strings.Split(output, ",")
// Get the list of system calls
systemCalls := initSystemCalls()
for _, s := range outputTab {
if _, isSyscall := systemCalls[s]; isSyscall {
data.StaticData.SystemCalls[s] = systemCalls[s]
} else {
data.StaticData.Symbols[s] = ""
}
}
}
// extractPrototype executes the parserClang.py script on each source file to extracts all possible
// symbols of each of these files.
//
// It returns an error if any, otherwise it returns nil.
func extractPrototype(sourcesFiltered []string, data *u.Data) error {
for _, f := range sourcesFiltered {
script := filepath.Join(os.Getenv("GOPATH"), "src", "tools", "srcs", "dependtool",
"parserClang.py")
output, err := ExecuteCommand("python3", []string{script, "-q", "-t", f})
if err != nil {
u.PrintWarning("Incomplete analysis with file " + f)
continue
}
addSourceFileSymbols(output, data)
}
return nil
}
// gatherSourceFileSymbols gathers symbols of source files from a given application folder.
//
// It returns an error if any, otherwise it returns nil.
func gatherSourceFileSymbols(data *u.Data, programPath string) error {
tmp := strings.Split(programPath, "/")
folderPath := strings.Join(tmp[:len(tmp)-1], "/")
files, err := findSourcesFiles(folderPath)
if err != nil {
u.PrintErr(err)
}
if err := extractPrototype(files, data); err != nil {
u.PrintErr(err)
}
return nil
}
// -------------------------------------Run------------------------------------- // -------------------------------------Run-------------------------------------
// staticAnalyser runs the static analysis to get shared libraries, // staticAnalyser runs the static analysis to get shared libraries,
// system calls and library calls of a given application. // system calls and library calls of a given application.
// func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments, data *u.Data,
func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments, data *u.Data, programPath string) { programPath string) {
programName := *args.StringArg[programArg] programName := *args.StringArg[programArg]
fullDeps := *args.BoolArg[fullDepsArg] fullDeps := *args.BoolArg[fullDepsArg]
@ -240,9 +337,16 @@ func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments
} }
} }
// Detect symbols from source files
u.PrintHeader2("(*) Gathering symbols from source files")
if err := gatherSourceFileSymbols(data, programPath); err != nil {
u.PrintWarning(err)
}
// Detect symbols from shared libraries // Detect symbols from shared libraries
if fullStaticAnalysis && isLinux { if fullStaticAnalysis && isLinux {
u.PrintHeader2("(*) Gathering symbols and system calls of shared libraries from binary file") u.PrintHeader2("(*) Gathering symbols and system calls of shared libraries from binary" +
"file")
for key, path := range staticData.SharedLibs { for key, path := range staticData.SharedLibs {
if len(path) > 0 { if len(path) > 0 {
fmt.Printf("\t-> Analysing %s - %s\n", key, path[0]) fmt.Printf("\t-> Analysing %s - %s\n", key, path[0])