dependtool first improvement
This commit is contained in:
parent
7f26ca5d3d
commit
6d88b02718
3 changed files with 351 additions and 7 deletions
|
@ -203,10 +203,7 @@ func matchLibs(unikraftLibs string, data *u.Data) ([]string, map[string]string,
|
|||
}
|
||||
|
||||
dataMap := putJsonSymbolsTogether(data)
|
||||
//matchedLibs = append(matchedLibs, POSIXPROCESS)
|
||||
//matchedLibs = append(matchedLibs, POSIXUSER)
|
||||
//matchedLibs = append(matchedLibs, POSIXSYSINFO)
|
||||
//matchedLibs = append(matchedLibs, POSIXLIBDL)
|
||||
|
||||
// Perform the symbol matching
|
||||
matchedLibs = matchSymbols(matchedLibs, dataMap, mapSymbols)
|
||||
|
||||
|
|
243
srcs/dependtool/parserClang.py
Executable file
243
srcs/dependtool/parserClang.py
Executable file
|
@ -0,0 +1,243 @@
|
|||
#!/usr/bin/env python3
|
||||
#---------------------------------------------------------------------
|
||||
# (*) Installation:
|
||||
#
|
||||
# pip3 install clang
|
||||
#
|
||||
# cd /usr/lib/x86_64-linux-gnu/
|
||||
# sudo ln -s libclang-X.Y.so.1 libclang-14.so (X.Y the version number)
|
||||
#
|
||||
# (*) Run:
|
||||
#
|
||||
# python3 parserClang.py <filepath> [includepathsfile]
|
||||
#
|
||||
# where filepath can be a repository/folder or a file (c/cpp/h/hpp)
|
||||
#
|
||||
#
|
||||
# Gaulthier Gain <gaulthier.gain@uliege.be>
|
||||
# License: BSD
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
import getopt
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import clang.cindex
|
||||
import clang
|
||||
import platform
|
||||
from clang.cindex import CursorKind
|
||||
from collections import Counter
|
||||
|
||||
verbose = False # Change it to verbose mode
|
||||
|
||||
global_funcs = Counter()
|
||||
global_calls = Counter()
|
||||
|
||||
silent_flag = False
|
||||
|
||||
# Check if a path is a directory or a file
|
||||
def check_input_path(path, includePaths):
|
||||
if os.path.isdir(path):
|
||||
iterate_root_folder(path, includePaths)
|
||||
elif os.path.isfile(path):
|
||||
check_type_file(path, includePaths)
|
||||
else:
|
||||
print("Unable to analyse this file")
|
||||
exit(1)
|
||||
|
||||
def get_include_paths(rootdir, includepathsFile):
|
||||
paths = []
|
||||
with open(includepathsFile, 'r') as file:
|
||||
for includePath in file.readlines():
|
||||
path = '-isystem ' + rootdir + includePath.replace('\n', '')
|
||||
paths.append(path)
|
||||
|
||||
return ' '.join(paths)
|
||||
|
||||
# Check type/exenstion of a given file
|
||||
def check_type_file(filepath, includePaths):
|
||||
cplusplusOptions = '-x c++ --std=c++11'
|
||||
cOptions = ''
|
||||
|
||||
if includePaths is not None:
|
||||
cplusplusOptions = cplusplusOptions + ' ' + includePaths
|
||||
cOptions = cOptions + ' ' + includePaths
|
||||
if silent_flag is False:
|
||||
print("Gathering symbols of " + filepath)
|
||||
if filepath.endswith(".cpp") or filepath.endswith(".hpp"):
|
||||
parse_file(filepath, cplusplusOptions)
|
||||
elif filepath.endswith(".c") or filepath.endswith(".h"):
|
||||
parse_file(filepath, cOptions)
|
||||
|
||||
# Iterate through a root folder
|
||||
def iterate_root_folder(rootdir, includePaths):
|
||||
for subdir, dirs, files in os.walk(rootdir):
|
||||
for file in files:
|
||||
filepath = subdir + os.sep + file
|
||||
check_type_file(filepath, includePaths)
|
||||
|
||||
# Print info about symbols (verbose mode)
|
||||
def display_info_function(funcs, calls):
|
||||
for f in funcs:
|
||||
print(fully_qualified(f), f.location)
|
||||
for c in calls:
|
||||
if is_function_call(f, c):
|
||||
print('-', c.location)
|
||||
print()
|
||||
|
||||
# Parse a given file to generate a AST
|
||||
def parse_file(filepath, arguments):
|
||||
|
||||
idx = clang.cindex.Index.create()
|
||||
args = arguments.split()
|
||||
tu = idx.parse(filepath, args=args)
|
||||
funcs, calls = find_funcs_and_calls(tu)
|
||||
if verbose:
|
||||
display_info_function(funcs, calls)
|
||||
print(list(tu.diagnostics))
|
||||
|
||||
|
||||
# Retrieve a fully qualified function name (with namespaces)
|
||||
def fully_qualified(c):
|
||||
if c is None:
|
||||
return ''
|
||||
elif c.kind == CursorKind.TRANSLATION_UNIT:
|
||||
return ''
|
||||
else:
|
||||
res = fully_qualified(c.semantic_parent)
|
||||
if res != '':
|
||||
return res + '::' + c.spelling
|
||||
return c.spelling
|
||||
|
||||
# Determine where a call-expression cursor refers to a particular
|
||||
# function declaration
|
||||
def is_function_call(funcdecl, c):
|
||||
defn = c.get_definition()
|
||||
return (defn is not None) and (defn == funcdecl)
|
||||
|
||||
# Filter name to take only the function name (remove "(args)")
|
||||
def filter_func_name(displayname):
|
||||
if "(" in displayname:
|
||||
funcName = displayname.split('(')[0]
|
||||
else:
|
||||
funcName = displayname
|
||||
return funcName
|
||||
|
||||
# Retrieve lists of function declarations and call expressions in a
|
||||
#translation unit
|
||||
def find_funcs_and_calls(tu):
|
||||
filename = tu.cursor.spelling
|
||||
calls = []
|
||||
funcs = []
|
||||
for c in tu.cursor.walk_preorder():
|
||||
if c.kind == CursorKind.CALL_EXPR:
|
||||
calls.append(c)
|
||||
# filter name to take only the name if necessary
|
||||
funcName = filter_func_name(c.displayname)
|
||||
global_calls[funcName] += 1
|
||||
elif c.kind == CursorKind.FUNCTION_DECL:
|
||||
funcs.append(c)
|
||||
# filter name to take only the name if necessary
|
||||
funcName = filter_func_name(c.displayname)
|
||||
global_funcs[funcName] += 1
|
||||
return funcs, calls
|
||||
|
||||
# Write data to json file
|
||||
def write_to_json(output_filename, data):
|
||||
with open(output_filename + '.json', 'w') as fp:
|
||||
json.dump(data, fp, indent=4, sort_keys=True)
|
||||
|
||||
# Open data to json file
|
||||
def read_from_json(filename):
|
||||
with open(output_filename + '.json', 'r') as fp:
|
||||
data = json.load(fp)
|
||||
return data
|
||||
|
||||
# Read the list of syscalls (text file)
|
||||
def read_syscalls_list(filename):
|
||||
syscalls = set()
|
||||
with open(filename) as f:
|
||||
for line in f:
|
||||
syscalls.add(line.strip())
|
||||
return syscalls
|
||||
|
||||
# Check which syscall is called
|
||||
def compare_syscalls(syscalls):
|
||||
if silent_flag is False:
|
||||
print("Gathered syscalls from function calls:")
|
||||
|
||||
return [key for key in global_calls.keys() if key not in syscalls]
|
||||
|
||||
|
||||
|
||||
# Main function
|
||||
def main():
|
||||
optlist, args = getopt.getopt(sys.argv[1:], "o:qvt")
|
||||
input_file_names = None
|
||||
includepathsFile = None
|
||||
output_file_name = None
|
||||
textFormat = False
|
||||
for opt in optlist:
|
||||
if opt[0] == "-i":
|
||||
includepathFile = opt[1]
|
||||
if opt[0] == "-o":
|
||||
output_file_name = opt[1]
|
||||
if opt[0] == "-q":
|
||||
global silent_flag
|
||||
silent_flag = True
|
||||
if opt[0] == "-v":
|
||||
global verbose
|
||||
verbose = True
|
||||
if opt[0] == "-t":
|
||||
textFormat = True
|
||||
|
||||
|
||||
|
||||
input_file_names = args
|
||||
if len(input_file_names) == 0:
|
||||
if silent_flag is False:
|
||||
print("No input files supplied")
|
||||
exit(1)
|
||||
if includepathsFile is not None:
|
||||
includePaths = get_include_paths(input_file_name, includepathsFile)
|
||||
for input_file_name in input_file_names:
|
||||
check_input_path(input_file_name, includePaths)
|
||||
else:
|
||||
for input_file_name in input_file_names:
|
||||
check_input_path(input_file_name, None)
|
||||
|
||||
if silent_flag is False:
|
||||
print("---------------------------------------------------------")
|
||||
|
||||
if textFormat:
|
||||
i = 0
|
||||
for key,value in global_funcs.items():
|
||||
if i < len(global_funcs.items())-1:
|
||||
print(key, end=',')
|
||||
else:
|
||||
print(key)
|
||||
i = i + 1
|
||||
else:
|
||||
# Dump function declarations and calls to json
|
||||
output_dikt = {
|
||||
'functions':'',
|
||||
'calls':''
|
||||
}
|
||||
output_dikt['functions'] = [{'name':key, 'value':value} for key,value in
|
||||
global_funcs.items()]
|
||||
output_dikt['calls'] = [{'name':key, 'value':value} for key,value in global_calls.items()]
|
||||
if includepathsFile is not None:
|
||||
# Read syscalls from txt file
|
||||
all_syscalls = read_syscalls_list('syscall_list.txt')
|
||||
called_syscalls = compare_syscalls(all_syscalls)
|
||||
output_dikt['syscalls'] = called_syscalls
|
||||
|
||||
output_file = sys.stdout
|
||||
json.dump(output_dikt, output_file)
|
||||
|
||||
|
||||
if __name__== "__main__":
|
||||
if platform.system() == "Darwin":
|
||||
clang.cindex.Config.set_library_file
|
||||
("/Applications/Xcode.app/Contents/Frameworks/libclang.dylib")
|
||||
main()
|
|
@ -11,6 +11,9 @@ import (
|
|||
"debug/elf"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
u "tools/srcs/common"
|
||||
)
|
||||
|
||||
|
@ -191,12 +194,106 @@ func executeDependAptCache(programName string, data *u.StaticData,
|
|||
return nil
|
||||
}
|
||||
|
||||
// findSourcesFiles puts together all C/C++ source files found in a given application folder.
|
||||
//
|
||||
// It returns a slice containing the found source file names and an error if any. Otherwise, it
|
||||
// returns nil.
|
||||
func findSourcesFiles(workspace string) ([]string, error) {
|
||||
|
||||
var filenames []string
|
||||
|
||||
err := filepath.Walk(workspace,
|
||||
func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ext := filepath.Ext(info.Name())
|
||||
if ext == ".c" || ext == ".cpp" {
|
||||
filenames = append(filenames, path)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
// TODO REPLACE
|
||||
// ExecuteCommand a single command without displaying the output.
|
||||
//
|
||||
// It returns a string which represents stdout and an error if any, otherwise
|
||||
// it returns nil.
|
||||
func ExecuteCommand(command string, arguments []string) (string, error) {
|
||||
out, err := exec.Command(command, arguments...).CombinedOutput()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
// addSourceFileSymbols adds all the symbols present in 'output' to the static data field in
|
||||
// 'data'.
|
||||
func addSourceFileSymbols(output string, data *u.Data) {
|
||||
outputTab := strings.Split(output, ",")
|
||||
|
||||
// Get the list of system calls
|
||||
systemCalls := initSystemCalls()
|
||||
|
||||
for _, s := range outputTab {
|
||||
if _, isSyscall := systemCalls[s]; isSyscall {
|
||||
data.StaticData.SystemCalls[s] = systemCalls[s]
|
||||
} else {
|
||||
data.StaticData.Symbols[s] = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extractPrototype executes the parserClang.py script on each source file to extracts all possible
|
||||
// symbols of each of these files.
|
||||
//
|
||||
// It returns an error if any, otherwise it returns nil.
|
||||
func extractPrototype(sourcesFiltered []string, data *u.Data) error {
|
||||
|
||||
for _, f := range sourcesFiltered {
|
||||
script := filepath.Join(os.Getenv("GOPATH"), "src", "tools", "srcs", "dependtool",
|
||||
"parserClang.py")
|
||||
output, err := ExecuteCommand("python3", []string{script, "-q", "-t", f})
|
||||
if err != nil {
|
||||
u.PrintWarning("Incomplete analysis with file " + f)
|
||||
continue
|
||||
}
|
||||
addSourceFileSymbols(output, data)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// gatherSourceFileSymbols gathers symbols of source files from a given application folder.
|
||||
//
|
||||
// It returns an error if any, otherwise it returns nil.
|
||||
func gatherSourceFileSymbols(data *u.Data, programPath string) error {
|
||||
|
||||
tmp := strings.Split(programPath, "/")
|
||||
folderPath := strings.Join(tmp[:len(tmp)-1], "/")
|
||||
|
||||
files, err := findSourcesFiles(folderPath)
|
||||
if err != nil {
|
||||
u.PrintErr(err)
|
||||
}
|
||||
|
||||
if err := extractPrototype(files, data); err != nil {
|
||||
u.PrintErr(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// -------------------------------------Run-------------------------------------
|
||||
|
||||
// staticAnalyser runs the static analysis to get shared libraries,
|
||||
// system calls and library calls of a given application.
|
||||
//
|
||||
func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments, data *u.Data, programPath string) {
|
||||
func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments, data *u.Data,
|
||||
programPath string) {
|
||||
|
||||
programName := *args.StringArg[programArg]
|
||||
fullDeps := *args.BoolArg[fullDepsArg]
|
||||
|
@ -240,9 +337,16 @@ func staticAnalyser(elfFile *elf.File, isDynamic, isLinux bool, args u.Arguments
|
|||
}
|
||||
}
|
||||
|
||||
// Detect symbols from source files
|
||||
u.PrintHeader2("(*) Gathering symbols from source files")
|
||||
if err := gatherSourceFileSymbols(data, programPath); err != nil {
|
||||
u.PrintWarning(err)
|
||||
}
|
||||
|
||||
// Detect symbols from shared libraries
|
||||
if fullStaticAnalysis && isLinux {
|
||||
u.PrintHeader2("(*) Gathering symbols and system calls of shared libraries from binary file")
|
||||
u.PrintHeader2("(*) Gathering symbols and system calls of shared libraries from binary" +
|
||||
"file")
|
||||
for key, path := range staticData.SharedLibs {
|
||||
if len(path) > 0 {
|
||||
fmt.Printf("\t-> Analysing %s - %s\n", key, path[0])
|
||||
|
|
Loading…
Add table
Reference in a new issue