dependtool first improvement
This commit is contained in:
parent
7f26ca5d3d
commit
6d88b02718
3 changed files with 351 additions and 7 deletions
|
@ -203,10 +203,7 @@ func matchLibs(unikraftLibs string, data *u.Data) ([]string, map[string]string,
|
||||||
}
|
}
|
||||||
|
|
||||||
dataMap := putJsonSymbolsTogether(data)
|
dataMap := putJsonSymbolsTogether(data)
|
||||||
//matchedLibs = append(matchedLibs, POSIXPROCESS)
|
|
||||||
//matchedLibs = append(matchedLibs, POSIXUSER)
|
|
||||||
//matchedLibs = append(matchedLibs, POSIXSYSINFO)
|
|
||||||
//matchedLibs = append(matchedLibs, POSIXLIBDL)
|
|
||||||
// Perform the symbol matching
|
// Perform the symbol matching
|
||||||
matchedLibs = matchSymbols(matchedLibs, dataMap, mapSymbols)
|
matchedLibs = matchSymbols(matchedLibs, dataMap, mapSymbols)
|
||||||
|
|
||||||
|
|
243
srcs/dependtool/parserClang.py
Executable file
243
srcs/dependtool/parserClang.py
Executable file
|
@ -0,0 +1,243 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
#---------------------------------------------------------------------
|
||||||
|
# (*) Installation:
|
||||||
|
#
|
||||||
|
# pip3 install clang
|
||||||
|
#
|
||||||
|
# cd /usr/lib/x86_64-linux-gnu/
|
||||||
|
# sudo ln -s libclang-X.Y.so.1 libclang-14.so (X.Y the version number)
|
||||||
|
#
|
||||||
|
# (*) Run:
|
||||||
|
#
|
||||||
|
# python3 parserClang.py <filepath> [includepathsfile]
|
||||||
|
#
|
||||||
|
# where filepath can be a repository/folder or a file (c/cpp/h/hpp)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Gaulthier Gain <gaulthier.gain@uliege.be>
|
||||||
|
# License: BSD
|
||||||
|
#---------------------------------------------------------------------
|
||||||
|
|
||||||
|
import getopt
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import clang.cindex
|
||||||
|
import clang
|
||||||
|
import platform
|
||||||
|
from clang.cindex import CursorKind
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
verbose = False # Change it to verbose mode
|
||||||
|
|
||||||
|
global_funcs = Counter()
|
||||||
|
global_calls = Counter()
|
||||||
|
|
||||||
|
silent_flag = False
|
||||||
|
|
||||||
|
# Check if a path is a directory or a file
|
||||||
|
def check_input_path(path, includePaths):
|
||||||
|
if os.path.isdir(path):
|
||||||
|
iterate_root_folder(path, includePaths)
|
||||||
|
elif os.path.isfile(path):
|
||||||
|
check_type_file(path, includePaths)
|
||||||
|
else:
|
||||||
|
print("Unable to analyse this file")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
def get_include_paths(rootdir, includepathsFile):
|
||||||
|
paths = []
|
||||||
|
with open(includepathsFile, 'r') as file:
|
||||||
|
for includePath in file.readlines():
|
||||||
|
path = '-isystem ' + rootdir + includePath.replace('\n', '')
|
||||||
|
paths.append(path)
|
||||||
|
|
||||||
|
return ' '.join(paths)
|
||||||
|
|
||||||
|
# Check type/exenstion of a given file
|
||||||
|
def check_type_file(filepath, includePaths):
|
||||||
|
cplusplusOptions = '-x c++ --std=c++11'
|
||||||
|
cOptions = ''
|
||||||
|
|
||||||
|
if includePaths is not None:
|
||||||
|
cplusplusOptions = cplusplusOptions + ' ' + includePaths
|
||||||
|
cOptions = cOptions + ' ' + includePaths
|
||||||
|
if silent_flag is False:
|
||||||
|
print("Gathering symbols of " + filepath)
|
||||||
|
if filepath.endswith(".cpp") or filepath.endswith(".hpp"):
|
||||||
|
parse_file(filepath, cplusplusOptions)
|
||||||
|
elif filepath.endswith(".c") or filepath.endswith(".h"):
|
||||||
|
parse_file(filepath, cOptions)
|
||||||
|
|
||||||
|
# Iterate through a root folder
|
||||||
|
def iterate_root_folder(rootdir, includePaths):
|
||||||
|
for subdir, dirs, files in os.walk(rootdir):
|
||||||
|
for file in files:
|
||||||
|
filepath = subdir + os.sep + file
|
||||||
|
check_type_file(filepath, includePaths)
|
||||||
|
|
||||||
|
# Print info about symbols (verbose mode)
|
||||||
|
def display_info_function(funcs, calls):
|
||||||
|
for f in funcs:
|
||||||
|
print(fully_qualified(f), f.location)
|
||||||
|
for c in calls:
|
||||||
|
if is_function_call(f, c):
|
||||||
|
print('-', c.location)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Parse a given file to generate a AST
|
||||||
|
def parse_file(filepath, arguments):
|
||||||
|
|
||||||
|
idx = clang.cindex.Index.create()
|
||||||
|
args = arguments.split()
|
||||||
|
tu = idx.parse(filepath, args=args)
|
||||||
|
funcs, calls = find_funcs_and_calls(tu)
|
||||||
|
if verbose:
|
||||||
|
display_info_function(funcs, calls)
|
||||||
|
print(list(tu.diagnostics))
|
||||||
|
|
||||||
|
|
||||||
|
# Retrieve a fully qualified function name (with namespaces)
|
||||||
|
def fully_qualified(c):
|
||||||
|
if c is None:
|
||||||
|
return ''
|
||||||
|
elif c.kind == CursorKind.TRANSLATION_UNIT:
|
||||||
|
return ''
|
||||||
|
else:
|
||||||
|
res = fully_qualified(c.semantic_parent)
|
||||||
|
if res != '':
|
||||||
|
return res + '::' + c.spelling
|
||||||
|
return c.spelling
|
||||||
|
|
||||||
|
# Determine where a call-expression cursor refers to a particular
|
||||||
|
# function declaration
|
||||||
|
def is_function_call(funcdecl, c):
|
||||||
|
defn = c.get_definition()
|
||||||
|
return (defn is not None) and (defn == funcdecl)
|
||||||
|
|
||||||
|
# Filter name to take only the function name (remove "(args)")
|
||||||
|
def filter_func_name(displayname):
|
||||||
|
if "(" in displayname:
|
||||||
|
funcName = displayname.split('(')[0]
|
||||||
|
else:
|
||||||
|
funcName = displayname
|
||||||
|
return funcName
|
||||||
|
|
||||||
|
# Retrieve lists of function declarations and call expressions in a
|
||||||
|
#translation unit
|
||||||
|
def find_funcs_and_calls(tu):
|
||||||
|
filename = tu.cursor.spelling
|
||||||
|
calls = []
|
||||||
|
funcs = []
|
||||||
|
for c in tu.cursor.walk_preorder():
|
||||||
|
if c.kind == CursorKind.CALL_EXPR:
|
||||||
|
calls.append(c)
|
||||||
|
# filter name to take only the name if necessary
|
||||||
|
funcName = filter_func_name(c.displayname)
|
||||||
|
global_calls[funcName] += 1
|
||||||
|
elif c.kind == CursorKind.FUNCTION_DECL:
|
||||||
|
funcs.append(c)
|
||||||
|
# filter name to take only the name if necessary
|
||||||
|
funcName = filter_func_name(c.displayname)
|
||||||
|
global_funcs[funcName] += 1
|
||||||
|
return funcs, calls
|
||||||
|
|
||||||
|
# Write data to json file
|
||||||
|
def write_to_json(output_filename, data):
|
||||||
|
with open(output_filename + '.json', 'w') as fp:
|
||||||
|
json.dump(data, fp, indent=4, sort_keys=True)
|
||||||
|
|
||||||
|
# Open data to json file
|
||||||
|
def read_from_json(filename):
|
||||||
|
with open(output_filename + '.json', 'r') as fp:
|
||||||
|
data = json.load(fp)
|
||||||
|
return data
|
||||||
|
|
||||||
|
# Read the list of syscalls (text file)
|
||||||
|
def read_syscalls_list(filename):
|
||||||
|
syscalls = set()
|
||||||
|
with open(filename) as f:
|
||||||
|
for line in f:
|
||||||
|
syscalls.add(line.strip())
|
||||||
|
return syscalls
|
||||||
|
|
||||||
|
# Check which syscall is called
|
||||||
|
def compare_syscalls(syscalls):
|
||||||
|
if silent_flag is False:
|
||||||
|
print("Gathered syscalls from function calls:")
|
||||||
|
|
||||||
|
return [key for key in global_calls.keys() if key not in syscalls]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Main function
|
||||||
|
def main():
|
||||||
|
optlist, args = getopt.getopt(sys.argv[1:], "o:qvt")
|
||||||
|
input_file_names = None
|
||||||
|
includepathsFile = None
|
||||||
|
output_file_name = None
|
||||||
|
textFormat = False
|
||||||
|
for opt in optlist:
|
||||||
|
if opt[0] == "-i":
|
||||||
|
includepathFile = opt[1]
|
||||||
|
if opt[0] == "-o":
|
||||||
|
output_file_name = opt[1]
|
||||||
|
if opt[0] == "-q":
|
||||||
|
global silent_flag
|
||||||
|
silent_flag = True
|
||||||
|
if opt[0] == "-v":
|
||||||
|
global verbose
|
||||||
|
verbose = True
|
||||||
|
if opt[0] == "-t":
|
||||||
|
textFormat = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
input_file_names = args
|
||||||
|
if len(input_file_names) == 0:
|
||||||
|
if silent_flag is False:
|
||||||
|
print("No input files supplied")
|
||||||
|
exit(1)
|
||||||
|
if includepathsFile is not None:
|
||||||
|
includePaths = get_include_paths(input_file_name, includepathsFile)
|
||||||
|
for input_file_name in input_file_names:
|
||||||
|
check_input_path(input_file_name, includePaths)
|
||||||
|
else:
|
||||||
|
for input_file_name in input_file_names:
|
||||||
|
check_input_path(input_file_name, None)
|
||||||
|
|
||||||
|
if silent_flag is False:
|
||||||
|
print("---------------------------------------------------------")
|
||||||
|
|
||||||
|
if textFormat:
|
||||||
|
i = 0
|
||||||
|
for key,value in global_funcs.items():
|
||||||
|
if i < len(global_funcs.items())-1:
|
||||||
|
print(key, end=',')
|
||||||
|
else:
|
||||||
|
print(key)
|
||||||
|
i = i + 1
|
||||||
|
else:
|
||||||
|
# Dump function declarations and calls to json
|
||||||
|
output_dikt = {
|
||||||
|
'functions':'',
|
||||||
|
'calls':''
|
||||||
|
}
|
||||||
|
output_dikt['functions'] = [{'name':key, 'value':value} for key,value in
|
||||||
|
global_funcs.items()]
|
||||||
|
output_dikt['calls'] = [{'name':key, 'value':value} for key,value in global_calls.items()]
|
||||||
|
if includepathsFile is not None:
|
||||||
|
# Read syscalls from txt file
|
||||||
|
all_syscalls = read_syscalls_list('syscall_list.txt')
|
||||||
|
called_syscalls = compare_syscalls(all_syscalls)
|
||||||
|
output_dikt['syscalls'] = called_syscalls
|
||||||
|
|
||||||
|
output_file = sys.stdout
|
||||||
|
json.dump(output_dikt, output_file)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__== "__main__":
|
||||||
|
if platform.system() == "Darwin":
|
||||||
|
clang.cindex.Config.set_library_file
|
||||||
|
("/Applications/Xcode.app/Contents/Frameworks/libclang.dylib")
|
||||||
|
main()
|
|
@ -11,6 +11,9 @@ import (
|
||||||
"debug/elf"
|
"debug/elf"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
u "tools/srcs/common"
|
u "tools/srcs/common"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -191,12 +194,106 @@ func executeDependAptCache(programName string, data *u.StaticData,
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// findSourcesFiles puts together all C/C++ source files found in a given application folder.
|
||||||
|
//
|
||||||
|
// It returns a slice containing the found source file names and an error if any. Otherwise, it
|
||||||
|
// returns nil.
|
||||||
|
func findSourcesFiles(workspace string) ([]string, error) {
|
||||||
|
|
||||||
|
var filenames []string
|
||||||
|
|
||||||
|
err := filepath.Walk(workspace,
|
||||||
|
func(path string, info os.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
ext := filepath.Ext(info.Name())
|
||||||
|
if ext == ".c" || ext == ".cpp" {
|
||||||
|
filenames = append(filenames, path)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return filenames, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO REPLACE
|
||||||
|
// ExecuteCommand a single command without displaying the output.
|
||||||
|
//
|
||||||
|
// It returns a string which represents stdout and an error if any, otherwise
|
||||||
|
// it returns nil.
|
||||||
|
func ExecuteCommand(command string, arguments []string) (string, error) {
|
||||||
|
out, err := exec.Command(command, arguments...).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return string(out), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// addSourceFileSymbols adds all the symbols present in 'output' to the static data field in
|
||||||
|
// 'data'.
|
||||||
|
func addSourceFileSymbols(output string, data *u.Data) {
|
||||||
|
outputTab := strings.Split(output, ",")
|
||||||
|
|
||||||
|
// Get the list of system calls
|
||||||
|
systemCalls := initSystemCalls()
|
||||||
|
|
||||||
|
for _, s := range outputTab {
|
||||||
|
if _, isSyscall := systemCalls[s]; isSyscall {
|
||||||
|
data.StaticData.SystemCalls[s] = systemCalls[s]
|
||||||
|
} else {
|
||||||
|
data.StaticData.Symbols[s] = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractPrototype executes the parserClang.py script on each source file to extracts all possible
|
||||||
|
// symbols of each of these files.
|
||||||
|
//
|
||||||
|
// It returns an error if any, otherwise it returns nil.
|
||||||
|
func extractPrototype(sourcesFiltered []string, data *u.Data) error {
|
||||||
|
|
||||||
|
for _, f := range sourcesFiltered {
|
||||||
|
script := filepath.Join(os.Getenv("GOPATH"), "src", "tools", "srcs", "dependtool",
|
||||||
|
"parserClang.py")
|
||||||
|
output, err := ExecuteCommand("python3", []string{script, "-q", "-t", f})
|
||||||
|
if err != nil {
|
||||||
|
u.PrintWarning("Incomplete analysis with file " + f)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
addSourceFileSymbols(output, data)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// gatherSourceFileSymbols gathers symbols of source files from a given application folder.
|
||||||
|
//
|
||||||
|
// It returns an error if any, otherwise it returns nil.
|
||||||
|
func gatherSourceFileSymbols(data *u.Data, programPath string) error {
|
||||||
|
|
||||||
|
tmp := strings.Split(programPath, "/")
|
||||||
|
folderPath := strings.Join(tmp[:len(tmp)-1], "/")
|
||||||
|
|
||||||
|
files, err := findSourcesFiles(folderPath)
|
||||||
|
if err != nil {
|
||||||
|
u.PrintErr(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := extractPrototype(files, data); err != nil {
|
||||||
|
u.PrintErr(err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------Run-------------------------------------
|
// -------------------------------------Run-------------------------------------
|
||||||
|
|
||||||
// staticAnalyser runs the static analysis to get shared libraries,
|
// staticAnalyser runs the static analysis to get shared libraries,
|
||||||
// system calls and library calls of a given application.
|
// system calls and library calls of a given application.
|
||||||
//
|
func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments, data *u.Data,
|
||||||
func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments, data *u.Data, programPath string) {
|
programPath string) {
|
||||||
|
|
||||||
programName := *args.StringArg[programArg]
|
programName := *args.StringArg[programArg]
|
||||||
fullDeps := *args.BoolArg[fullDepsArg]
|
fullDeps := *args.BoolArg[fullDepsArg]
|
||||||
|
@ -240,9 +337,16 @@ func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Detect symbols from source files
|
||||||
|
u.PrintHeader2("(*) Gathering symbols from source files")
|
||||||
|
if err := gatherSourceFileSymbols(data, programPath); err != nil {
|
||||||
|
u.PrintWarning(err)
|
||||||
|
}
|
||||||
|
|
||||||
// Detect symbols from shared libraries
|
// Detect symbols from shared libraries
|
||||||
if fullStaticAnalysis && isLinux {
|
if fullStaticAnalysis && isLinux {
|
||||||
u.PrintHeader2("(*) Gathering symbols and system calls of shared libraries from binary file")
|
u.PrintHeader2("(*) Gathering symbols and system calls of shared libraries from binary" +
|
||||||
|
"file")
|
||||||
for key, path := range staticData.SharedLibs {
|
for key, path := range staticData.SharedLibs {
|
||||||
if len(path) > 0 {
|
if len(path) > 0 {
|
||||||
fmt.Printf("\t-> Analysing %s - %s\n", key, path[0])
|
fmt.Printf("\t-> Analysing %s - %s\n", key, path[0])
|
||||||
|
|
Loading…
Add table
Reference in a new issue