gaulthiergain-tools/srcs/dependtool/interdependence_graph.go

355 lines
10 KiB
Go
Raw Normal View History

2023-03-15 11:29:53 +01:00
package dependtool
import (
2023-06-12 15:36:08 +02:00
"fmt"
2023-03-15 11:29:53 +01:00
"io/ioutil"
"net/http"
"os"
"os/exec"
2023-03-15 11:29:53 +01:00
"path/filepath"
2023-05-17 21:48:04 +02:00
"regexp"
2023-03-15 11:29:53 +01:00
"strconv"
"strings"
u "tools/srcs/common"
)
// ---------------------------------Gather Data---------------------------------
2023-06-12 15:36:08 +02:00
// findSourceFilesAndFolders puts together all the application C/C++ source files found on one hand
// and all the application (sub-)folder paths on the other hand.
2023-03-15 11:29:53 +01:00
//
2023-06-12 15:36:08 +02:00
// It returns two slices: one containing the found source file paths and one containing the found
// (sub-)folder paths, and an error if any, otherwise it returns nil.
func findSourceFilesAndFolders(workspace string) ([]string, []string, error) {
var filenames []string
var foldernames []string
err := filepath.Walk(workspace,
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Ignore hidden elements
if string(info.Name()[0]) == "." {
return nil
}
// Found (sub-)folder
if info.IsDir() {
foldernames = append(foldernames, "-I")
foldernames = append(foldernames, path)
return nil
}
// Found source file
ext := filepath.Ext(info.Name())
if ext == ".c" || ext == ".cpp" || ext == ".cc" || ext == ".h" || ext == ".hpp" ||
ext == ".hcc" {
filenames = append(filenames, path)
}
return nil
})
if err != nil {
return nil, nil, err
}
return filenames, foldernames, nil
}
// checkIncDir checks that an inclusion directive path does not contain the two dots ".." which
// refer to the parent directory and that can cause trouble when pruning the map and generating the
// graph. If the path does contain these dots, it is simplified.
//
// It returns the simplified directive path or the path itself if it contains no dots.
func checkIncDir(directive string) string {
// No problem
if !strings.Contains(directive, "../") {
return directive
}
// Must be simplified
splitDir := strings.Split(directive, u.SEP)
var i int
for i = 0; i < len(splitDir); i++ {
if splitDir[i] == ".." {
// Dots at the beginning of the path
if i == 0 {
splitDir = splitDir[1:]
i--
// Dots somewhere else in the path
} else {
splitDir = append(splitDir[:i-1], splitDir[i+1:]...)
i -= 2
}
}
}
return strings.Join(splitDir, u.SEP)
}
// sourceFileIncludesAnalysis collects all the inclusion directives from a C/C++ source file.
//
// It returns a slice containing all the relative paths contained in the inclusion directives.
2023-03-15 11:29:53 +01:00
func sourceFileIncludesAnalysis(sourceFile string) []string {
2023-06-12 15:36:08 +02:00
var fileIncDir []string
2023-03-15 11:29:53 +01:00
fileLines, err := u.ReadLinesFile(sourceFile)
if err != nil {
u.PrintErr(err)
}
2023-06-12 15:36:08 +02:00
// Find inclusion directives using regexp
2023-05-17 21:48:04 +02:00
var re = regexp.MustCompile(`(.*)(#include)(.*)("|<)(.*)("|>)(.*)`)
for lineIndex := range fileLines {
for _, match := range re.FindAllStringSubmatch(fileLines[lineIndex], -1) {
2023-06-12 15:36:08 +02:00
// Append the relative path to the list of relative paths
2023-05-17 21:48:04 +02:00
for i := 1; i < len(match); i++ {
if match[i] == "\"" || match[i] == "<" {
2023-06-12 15:36:08 +02:00
fileIncDir = append(fileIncDir, checkIncDir(match[i+1]))
2023-05-17 21:48:04 +02:00
break
}
2023-03-15 11:29:53 +01:00
}
}
}
2023-06-12 15:36:08 +02:00
return fileIncDir
2023-03-15 11:29:53 +01:00
}
// TODO REPLACE
// ExecuteCommand a single command without displaying the output.
//
// It returns a string which represents stdout and an error if any, otherwise
// it returns nil.
2023-06-12 15:36:08 +02:00
func executeCommand(command string, arguments []string) (string, error) {
out, err := exec.Command(command, arguments...).CombinedOutput()
return string(out), err
}
2023-06-12 15:36:08 +02:00
// gppSourceFileIncludesAnalysis collects all the inclusion directives from a C/C++ source file
// using the gpp preprocessor.
2023-03-15 11:29:53 +01:00
//
2023-06-12 15:36:08 +02:00
// It returns a slice containing all the absolute paths contained in the inclusion directives.
func gppSourceFileIncludesAnalysis(sourceFile, programPath string,
sourceFolders []string) ([]string, error) {
2023-03-15 11:29:53 +01:00
2023-06-12 15:36:08 +02:00
var fileIncDir []string
2023-03-15 11:29:53 +01:00
2023-06-12 15:36:08 +02:00
// g++ command
outputStr, err := executeCommand("g++", append([]string{"-E", sourceFile}, sourceFolders...))
2023-03-15 11:29:53 +01:00
2023-06-12 15:36:08 +02:00
// If command g++ returns an error, prune file: it contains non-standard libraries
2023-03-15 11:29:53 +01:00
if err != nil {
2023-06-12 15:36:08 +02:00
return nil, err
2023-03-15 11:29:53 +01:00
}
2023-06-12 15:36:08 +02:00
// Find inclusion directive paths
outputSlice := strings.Split(outputStr, "\n")
2023-03-15 11:29:53 +01:00
for _, line := range outputSlice {
2023-06-12 15:36:08 +02:00
// If warnings or errors are present, ignore their paths
if strings.Contains(line, ":") {
continue
}
// Only interested in file paths not coming from the standard library
if strings.Contains(line, programPath) {
includeDirective :=
checkIncDir(line[strings.Index(line, "\"")+1 : strings.LastIndex(line, "\"")])
if !u.Contains(fileIncDir, includeDirective) {
fileIncDir = append(fileIncDir, includeDirective)
2023-03-15 11:29:53 +01:00
}
}
}
2023-06-12 15:36:08 +02:00
return fileIncDir, nil
2023-03-15 11:29:53 +01:00
}
// pruneRemovableFiles prunes interdependence graph elements if the latter are unused header files.
func pruneRemovableFiles(interdependMap *map[string][]string) {
2023-06-12 15:36:08 +02:00
for file := range *interdependMap {
2023-03-15 11:29:53 +01:00
// No removal of C/C++ source files
2023-06-12 15:36:08 +02:00
if filepath.Ext(file) != ".c" && filepath.Ext(file) != ".cpp" &&
filepath.Ext(file) != ".cc" {
2023-03-15 11:29:53 +01:00
// Lookup for files depending on the current header file
depends := false
for _, dependencies := range *interdependMap {
for _, dependency := range dependencies {
2023-06-12 15:36:08 +02:00
if file == dependency {
2023-03-15 11:29:53 +01:00
depends = true
break
}
}
if depends {
break
}
}
// Prune header file if unused
if !depends {
2023-06-12 15:36:08 +02:00
delete(*interdependMap, file)
2023-03-15 11:29:53 +01:00
}
}
}
}
// pruneElemFiles prunes interdependence graph elements if the latter contain the substring in
// argument.
func pruneElemFiles(interdependMap *map[string][]string, pruneElem string) {
// Lookup for key elements containing the substring and prune them
2023-06-12 15:36:08 +02:00
for key := range *interdependMap {
if strings.Contains(key, pruneElem) {
delete(*interdependMap, key)
2023-03-15 11:29:53 +01:00
// Lookup for key elements that depend on the key found above and prune them
for file, dependencies := range *interdependMap {
for _, dependency := range dependencies {
2023-06-12 15:36:08 +02:00
if dependency == key {
2023-03-15 11:29:53 +01:00
pruneElemFiles(interdependMap, file)
}
}
}
}
}
}
// requestUnikraftExtLibs collects all the GitHub repositories of Unikraft through the GitHub API
// and returns the whole list of Unikraft external libraries.
2023-06-12 15:36:08 +02:00
//
// It returns a slice containing all the libraries names.
2023-03-15 11:29:53 +01:00
func requestUnikraftExtLibs() []string {
var extLibsList, appsList []string
2023-06-12 15:36:08 +02:00
// Only 2 Web pages of repositories as for february 2023 (125 repos - 100 repos per page)
2023-03-15 11:29:53 +01:00
nbPages := 2
for i := 1; i <= nbPages; i++ {
// HTTP Get request
resp, err := http.Get("https://api.github.com/orgs/unikraft/repos?page=" +
strconv.Itoa(i) + "&per_page=100")
if err != nil {
u.PrintErr(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
u.PrintErr(err)
}
2023-06-12 15:36:08 +02:00
// Collect libraries
2023-03-15 11:29:53 +01:00
fileLines := strings.Split(string(body), "\"name\":\"lib-")
for i := 1; i < len(fileLines); i++ {
extLibsList = append(extLibsList, fileLines[i][0:strings.Index(fileLines[i][0:],
"\"")])
}
2023-06-12 15:36:08 +02:00
// Collect applications
2023-03-15 11:29:53 +01:00
fileLines = strings.Split(string(body), "\"name\":\"app-")
for i := 1; i < len(fileLines); i++ {
appsList = append(appsList, fileLines[i][0:strings.Index(fileLines[i][0:],
"\"")])
}
}
// Avoid libs that are also apps (e.g. nginx, redis)
for i, lib := range extLibsList {
if u.Contains(appsList, lib) {
extLibsList = append(extLibsList[:i], extLibsList[i+1:]...)
}
}
return extLibsList
}
// -------------------------------------Run-------------------------------------
2023-06-12 15:36:08 +02:00
// runInterdependAnalyser collects all the inclusion directives (i.e., dependencies) from each
// C/C++ application source file and builds an interdependence graph (dot file and png image)
// between the source files that it deemed compilable with Unikraft.
//
// It returns the path to the folder containing the source files that have been kept for generating
// the graph.
2023-03-15 11:29:53 +01:00
func interdependAnalyser(programPath, programName, outFolder string) string {
2023-06-12 15:36:08 +02:00
// Find all application source files and (sub-)folders
sourceFiles, sourceFolders, err := findSourceFilesAndFolders(programPath)
2023-03-15 11:29:53 +01:00
if err != nil {
u.PrintErr(err)
}
2023-06-12 15:36:08 +02:00
u.PrintOk(fmt.Sprint(len(sourceFiles)) + " source files found")
u.PrintOk(fmt.Sprint(len(sourceFolders)) + " source subfolders found")
2023-03-15 11:29:53 +01:00
2023-06-12 15:36:08 +02:00
// Collect source file inclusion directives. Source files are first analysed by g++ to make
// sure to avoid directives that are commented or subjected to a macro and then "by hand" to
// sort the include directives of the g++ analysis (i.e. to avoid inclusion directives that are
// not present in the source file currently analysed).
2023-03-15 11:29:53 +01:00
interdependMap := make(map[string][]string)
2023-06-12 15:36:08 +02:00
for _, sourceFile := range sourceFiles {
analysis := sourceFileIncludesAnalysis(sourceFile)
gppAnalysis, err := gppSourceFileIncludesAnalysis(sourceFile, programPath, sourceFolders)
2023-03-15 11:29:53 +01:00
if err != nil {
continue
}
2023-06-12 15:36:08 +02:00
// Build the interdependence map with the paths contained in the inclusion directives
interdependMap[sourceFile] = make([]string, 0)
for _, directive := range analysis {
for _, gppDirective := range gppAnalysis {
if strings.Contains(gppDirective, directive) {
interdependMap[sourceFile] = append(interdependMap[sourceFile], gppDirective)
}
2023-03-15 11:29:53 +01:00
}
}
}
2023-06-12 15:36:08 +02:00
// Prune the interdependence graph
2023-03-15 11:29:53 +01:00
extLibsList := requestUnikraftExtLibs()
extLibsList = append(extLibsList, "win32", "test", "TEST")
for _, extLib := range extLibsList {
pruneElemFiles(&interdependMap, extLib)
}
pruneRemovableFiles(&interdependMap)
2023-06-12 15:36:08 +02:00
// Create a folder and copy all the kept source files into it for later use with build tool
outAppFolder := outFolder + programName + u.SEP
_, err = u.CreateFolder(outAppFolder)
if err != nil {
u.PrintErr(err)
}
for _, sourceFile := range sourceFiles {
if _, ok := interdependMap[sourceFile]; ok {
if err := u.CopyFileContents(sourceFile,
outAppFolder+filepath.Base(sourceFile)); err != nil {
u.PrintErr(err)
}
}
}
u.PrintOk(fmt.Sprint(len(interdependMap)) + " source files kept and copied to " + outAppFolder)
// Change the absolute paths in the interdependence map into relative paths for more
// readability in the png image
graphMap := make(map[string][]string)
for appFilePath := range interdependMap {
appFile := strings.Split(appFilePath, programPath)[1][1:]
graphMap[appFile] = make([]string, 0)
for _, fileDepPath := range interdependMap[appFilePath] {
graphMap[appFile] = append(graphMap[appFile], strings.Split(fileDepPath,
programPath)[1][1:])
2023-03-15 11:29:53 +01:00
}
}
2023-06-12 15:36:08 +02:00
// Create dot and png files
u.GenerateGraph(programName, outFolder+programName, graphMap, nil)
2023-03-15 11:29:53 +01:00
return outAppFolder
}