chaoss · jwalden · Feb 10, 2022
diff --git a/README.md b/README.md
@@ -34,6 +34,7 @@ and define the **details** level of the analysis (useful when analyzing large so
 - [nomos](https://github.com/fossology/fossology/tree/master/src/nomos) 
 - [scancode](https://github.com/nexB/scancode-toolkit) 
 - [github-linguist](https://github.com/github/linguist)
+- [cqmetrics](https://github.com/dspinellis/cqmetrics)
 
 ### How to install/create the executables:
 - **Cloc**
@@ -86,6 +87,13 @@ After successfully executing the above mentioned steps, (if required) we have to
 pip install simplejson execnet
 ```
 
+- **CQMetrics**
+
+  ```
+  $> git clone https://github.com/dspinellis/cqmetrics
+  $> cd cqmetrics/src
+  $> make && make install
+  ```
 
 ##  How to install/uninstall
 Graal is being developed and tested mainly on GNU/Linux platforms. Thus it is very likely it will work out of the box
@@ -109,7 +117,7 @@ Several backends have been developed to assess the genericity of Graal. Those ba
 tools, where executions are triggered via system calls or their Python interfaces. In the current status, the backends
 mostly target Python code, however other backends can be easily developed to cover other programming languages. The
 currently available backends are:
-- **CoCom** gathers data about code complexity (e.g., cyclomatic complexity, LOC) from projects written in popular programming languages such as: C/C++, Java, Scala, JavaScript, Ruby, Python, Lua and Golang. It leverages on [Cloc](http://cloc.sourceforge.net/), [Lizard](https://github.com/terryyin/lizard) and [scc](https://github.com/boyter/scc). The tool can be exectued at `file` and `repository` levels activated with the help of category: `code_complexity_lizard_file` or `code_complexity_lizard_repository`.
+- **CoCom** gathers data about code complexity (e.g., cyclomatic complexity, LOC) from projects written in popular programming languages such as: C/C++, Java, Scala, JavaScript, Ruby, Python, Lua and Golang. It leverages on [Cloc](http://cloc.sourceforge.net/), [Lizard](https://github.com/terryyin/lizard), [scc](https://github.com/boyter/scc), and [CQMetrics](https://github.com/dspinellis/cqmetrics). The tool can be exectued at `file` and `repository` levels activated with the help of category: `code_complexity_lizard_file` or `code_complexity_lizard_repository`.
 - **CoDep** extracts package and class dependencies of a Python module and serialized them as JSON structures, composed of edges and nodes, thus easing the bridging with front-end technologies for graph visualizations. It combines [PyReverse](https://pypi.org/project/pyreverse/) and [NetworkX](https://networkx.github.io/).
 - **CoQua** retrieves code quality insights, such as checks about line-code’s length, well-formed variable names, unused imported modules and code clones. It uses [PyLint](https://www.pylint.org/) and [Flake8](http://flake8.pycqa.org/en/latest/index.html). The tools can be activated by passing the corresponding category: `code_quality_pylint` or `code_quality_flake8`.
 - **CoVuln** scans the code to identify security vulnerabilities such as potential SQL and Shell injections, hard-coded passwords and weak cryptographic key size. It relies on [Bandit](https://github.com/PyCQA/bandit).

diff --git a/graal/backends/core/analyzers/cqmetrics-names.tsv b/graal/backends/core/analyzers/cqmetrics-names.tsv
@@ -0,0 +1 @@
+nchar	nline	line_length_min	line_length_mean	line_length_median	line_length_max	line_length_sd	nempty_line	nfunction	nstatement	statement_nesting_min	statement_nesting_mean	statement_nesting_median	statement_nesting_max	statement_nesting_sd	ninternal	nconst	nenum	ngoto	ninline	nnoalias	nregister	nrestrict	nsigned	nstruct	nunion	nunsigned	nvoid	nvolatile	ntypedef	ncomment	ncomment_char	nboilerplate_comment_char	ndox_comment	ndox_comment_char	nfun_comment	ncpp_directive	ncpp_include	ncpp_conditional	nfun_cpp_directive	nfun_cpp_conditional	style_inconsistency	nfunction2	halstead_min	halstead_mean	halstead_median	halstead_max	halstead_sd	nfunction3	cyclomatic_min	cyclomatic_mean	cyclomatic_median	cyclomatic_max	cyclomatic_sd	nidentifier	identifier_length_min	identifier_length_mean	identifier_length_median	identifier_length_max	identifier_length_sd	unique_nidentifier	unique_identifier_length_min	unique_identifier_length_mean	unique_identifier_length_median	unique_identifier_length_max	unique_identifier_length_sd	indentation_spacing_count	indentation_spacing_min	indentation_spacing_mean	indentation_spacing_median	indentation_spacing_max	indentation_spacing_sd	nno_space_after_binary_op	nno_space_after_closing_brace	nno_space_after_comma	nno_space_after_keyword	nno_space_after_opening_brace	nno_space_after_semicolon	nno_space_before_binary_op	nno_space_before_closing_brace	nno_space_before_keyword	nno_space_before_opening_brace	nspace_after_opening_square_bracket	nspace_after_struct_op	nspace_after_unary_op	nspace_at_end_of_line	nspace_before_closing_bracket	nspace_before_closing_square_bracket	nspace_before_comma	nspace_before_opening_square_bracket	nspace_before_semicolon	nspace_before_struct_op	nspace_after_binary_op	nspace_after_closing_brace	nspace_after_comma	nspace_after_keyword	nspace_after_opening_brace	nspace_after_semicolon	nno_space_after_struct_op	nspace_before_binary_op	nspace_before_closing_brace	nspace_before_keyword	nspace_before_opening_brace	nno_space_before_struct_op	nno_space_after_opening_square_bracket	nno_space_after_unary_op	nno_space_before_closing_bracket	nno_space_before_closing_square_bracket	nno_space_before_comma	nno_space_before_opening_square_bracket	nno_space_before_semicolon
diff --git a/graal/backends/core/analyzers/qmcalc.py b/graal/backends/core/analyzers/qmcalc.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2015-2020 Bitergia
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+#     James Walden <[email protected]>
+#     Valerio Cosentino <[email protected]>
+#     inishchith <[email protected]>
+#
+
+import subprocess
+from pathlib import Path
+from statistics import mean, median, stdev
+
+from graal.graal import (GraalError,
+                         GraalRepository)
+from .analyzer import Analyzer
+
+class QMCalc(Analyzer):
+    """A wrapper for QMCalc (cqmetrics)
+
+    This class allows to call QMCalc with a file, parses
+    the result of the analysis and returns it as a dict.
+
+    :param diff_timeout: max time to compute diffs of a given file
+    """
+    version = '0.0.1'
+    metrics_names_file = 'cqmetrics-names.tsv'
+    metrics_names_path = Path(__file__).parent.absolute().joinpath(metrics_names_file)
+
+    def __init__(self):
+        try:
+            with open(QMCalc.metrics_names_path) as f:
+                name_string = f.read().rstrip()
+        except:
+            raise GraalError(cause="Error on reading cqmetrics metric names from %" % metrics_names_path)
+
+        self.metrics_names = name_string.split("\t")
+
+    def __analyze_file(self, message, file_path, relative_path):
+        """Convert tab-separated metrics values from qmcalc into a dictionary
+
+        :param message: message from standard output after execution of qmcalc
+
+        :returns result: dict of the results of qmcalc analysis of a file
+        """
+
+        value_strings = message.rstrip().split("\t")
+        results = dict(zip(self.metrics_names, value_strings))
+
+        # Coerce each metric value to correct type or NA
+        for metric in results:
+            if results[metric] == '':
+                results[metric] = 'NA'
+            else:
+                if (metric[0] == 'n' or metric.endswith("_length_min") or 
+                        metric.endswith("_length_max") or 
+                        metric.endswith("_nesting_min") or 
+                        metric.endswith("_nesting_max")):
+                    results[metric] = int(results[metric])
+                else:
+                    results[metric] = float(results[metric])
+
+        path = Path(file_path)
+        results['file_path'] = path.relative_to(relative_path).as_posix()
+        results['file_extension'] = path.suffix
+
+        return results
+
+    def __analyze_repository(self, message, file_paths, relative_path):
+        """Return metrics for all files in repository.
+
+        :param message: message from standard output after execution of qmcalc
+        :param file_paths: array of paths to C source and header files
+        :param relative_path: path to repository containing source files
+
+        :returns result: dict of the results of the analysis over a repository
+        """
+
+        # Create array of file metric dictionaries
+        file_metrics = []
+        i = 0
+        for line in message.strip().split("\n"):
+            file_results = self.__analyze_file(line, file_paths[i], relative_path)
+            file_metrics.append(file_results)
+            i = i + 1
+
+        # Build results dictionary with summary data and file_metrics
+        results = { 
+                    'nfiles': len(file_metrics),
+                    'files': file_metrics
+                  }
+        for metric_name in self.metrics_names:
+            metrics = [ file[metric_name] for file in file_metrics ]
+            metrics = list(filter(lambda x: x != 'NA', metrics))
+
+            if metric_name == 'filename':
+                continue
+            elif metric_name.endswith('min'):
+                results[metric_name] = min(metrics)
+            elif metric_name.endswith('max'):
+                results[metric_name] = max(metrics)
+            elif metric_name.endswith('mean'):
+                results[metric_name] = mean(metrics)
+            elif metric_name.endswith('median'):
+                results[metric_name] = median(metrics)
+            elif metric_name.endswith('sd'):
+                mean_metric = metric_name.replace('sd', 'mean')
+                mean_metrics = [ file[mean_metric] for file in file_metrics ]
+                mean_metrics = list(filter(lambda x: x != 'NA', mean_metrics))
+                results[metric_name] = stdev(mean_metrics)
+            else:
+                results[metric_name] = sum(metrics)
+
+        return results
+
+    def analyze(self, **kwargs):
+        """Add information using qmcalc
+
+        :param file_path: path of a single C source or header file to analyze
+        :param repository_level: set to True if analysis has to be performed on a repository
+
+        :returns result: dict of the results of the analysis
+        """
+
+        repository_level = kwargs.get('repository_level', False)
+        if repository_level:
+            file_paths = list(Path(kwargs['repository_path']).glob('**/*.[ch]'))
+        else:
+            file_paths = [ kwargs['file_path'] ]
+
+        # If no C source/header files exist, return empty array for results
+        if len(file_paths) == 0:
+            return []
+
+        # Run qmcalc to compute metrics for all file paths
+        try:
+            qmcalc_command = ['qmcalc'] + file_paths
+            message = subprocess.check_output(qmcalc_command).decode('utf-8')
+        except subprocess.CalledProcessError as e:
+            raise GraalError(cause="QMCalc failed at %s, %s" % (file_path, e.output.decode('utf-8')))
+        finally:
+            subprocess._cleanup()
+
+        if repository_level:
+            results = self.__analyze_repository(message, file_paths, kwargs['repository_path'])
+        else:
+            results = self.__analyze_file(message, file_paths[0], kwargs['file_path'])
+
+        return results
diff --git a/graal/backends/core/cocom.py b/graal/backends/core/cocom.py
@@ -30,6 +30,7 @@
                          DEFAULT_WORKTREE_PATH)
 from graal.backends.core.analyzers.cloc import Cloc
 from graal.backends.core.analyzers.lizard import Lizard
+from graal.backends.core.analyzers.qmcalc import QMCalc
 from graal.backends.core.analyzers.scc import SCC
 from perceval.utils import DEFAULT_DATETIME, DEFAULT_LAST_DATETIME
 
@@ -39,12 +40,18 @@
 LIZARD_FILE = 'lizard_file'
 LIZARD_REPOSITORY = 'lizard_repository'
 
+QMCALC_FILE = 'qmcalc_file'
+QMCALC_REPOSITORY = 'qmcalc_repository'
+
 CATEGORY_COCOM_LIZARD_FILE = 'code_complexity_' + LIZARD_FILE
 CATEGORY_COCOM_LIZARD_REPOSITORY = 'code_complexity_' + LIZARD_REPOSITORY
 
 CATEGORY_COCOM_SCC_FILE = 'code_complexity_' + SCC_FILE
 CATEGORY_COCOM_SCC_REPOSITORY = 'code_complexity_' + SCC_REPOSITORY
 
+CATEGORY_COCOM_QMCALC_FILE = 'code_complexity_' + QMCALC_FILE
+CATEGORY_COCOM_QMCALC_REPOSITORY = 'code_complexity_' + QMCALC_REPOSITORY
+
 logger = logging.getLogger(__name__)
 
 
@@ -84,10 +91,12 @@ class CoCom(Graal):
     :raises RepositoryError: raised when there was an error cloning or
         updating the repository.
     """
-    version = '0.5.1'
+    version = '0.6.0'
 
     CATEGORIES = [CATEGORY_COCOM_LIZARD_FILE,
                   CATEGORY_COCOM_LIZARD_REPOSITORY,
+                  CATEGORY_COCOM_QMCALC_FILE,
+                  CATEGORY_COCOM_QMCALC_REPOSITORY,
                   CATEGORY_COCOM_SCC_FILE,
                   CATEGORY_COCOM_SCC_REPOSITORY]
 
@@ -113,6 +122,10 @@ def fetch(self, category=CATEGORY_COCOM_LIZARD_FILE, paths=None,
             self.analyzer_kind = LIZARD_FILE
         elif category == CATEGORY_COCOM_LIZARD_REPOSITORY:
             self.analyzer_kind = LIZARD_REPOSITORY
+        elif category == CATEGORY_COCOM_QMCALC_FILE:
+            self.analyzer_kind = QMCALC_FILE
+        elif category == CATEGORY_COCOM_QMCALC_REPOSITORY:
+            self.analyzer_kind = QMCALC_REPOSITORY
         elif category == CATEGORY_COCOM_SCC_FILE:
             self.analyzer_kind = SCC_FILE
         elif category == CATEGORY_COCOM_SCC_REPOSITORY:
@@ -141,6 +154,10 @@ def metadata_category(item):
             return CATEGORY_COCOM_LIZARD_FILE
         elif item['analyzer'] == LIZARD_REPOSITORY:
             return CATEGORY_COCOM_LIZARD_REPOSITORY
+        elif item['analyzer'] == QMCALC_FILE:
+            return CATEGORY_COCOM_QMCALC_FILE
+        elif item['analyzer'] == QMCALC_REPOSITORY:
+            return CATEGORY_COCOM_QMCALC_REPOSITORY
         elif item['analyzer'] == SCC_FILE:
             return CATEGORY_COCOM_SCC_FILE
         elif item['analyzer'] == SCC_REPOSITORY:
@@ -173,7 +190,7 @@ def _analyze(self, commit):
         """
         analysis = []
 
-        if self.analyzer_kind in [LIZARD_FILE, SCC_FILE]:
+        if self.analyzer_kind in [LIZARD_FILE, QMCALC_FILE, SCC_FILE]:
             for committed_file in commit['files']:
 
                 file_path = committed_file['file']
@@ -211,6 +228,7 @@ def _analyze(self, commit):
 
                 file_info = self.analyzer.analyze(local_path)
                 file_info.update({'file_path': file_path})
+
                 analysis.append(file_info)
         else:
             files_affected = [file_info['file'] for file_info in commit['files']]
@@ -234,6 +252,7 @@ class FileAnalyzer:
     """Class to analyse the content of files"""
 
     ALLOWED_EXTENSIONS = ['java', 'py', 'php', 'scala', 'js', 'rb', 'cs', 'cpp', 'c', 'lua', 'go', 'swift']
+    QMC_ALLOWED_EXTENSIONS = ['c', 'h']
     FORBIDDEN_EXTENSIONS = ['tar', 'bz2', "gz", "lz", "apk", "tbz2",
                             "lzma", "tlz", "war", "xar", "zip", "zipx"]
 
@@ -244,6 +263,9 @@ def __init__(self, details=False, kind=LIZARD_FILE):
         if self.kind == LIZARD_FILE:
             self.cloc = Cloc()
             self.lizard = Lizard()
+        elif self.kind == QMCALC_FILE:
+            self.cloc = Cloc()
+            self.qmcalc = QMCalc()
         else:
             self.scc = SCC()
 
@@ -281,6 +303,11 @@ def analyze(self, file_path):
 
             file_analysis['blanks'] = cloc_analysis['blanks']
             file_analysis['comments'] = cloc_analysis['comments']
+        elif self.kind == QMCALC_FILE:
+            if GraalRepository.extension(file_path) in self.QMC_ALLOWED_EXTENSIONS:
+                file_analysis = self.qmcalc.analyze(**kwargs)
+            else:
+                file_analysis = self.cloc.analyze(**kwargs)
         else:
             file_analysis = self.scc.analyze(**kwargs)
 
@@ -299,6 +326,8 @@ def __init__(self, details=False, kind=LIZARD_REPOSITORY):
 
         if kind == LIZARD_REPOSITORY:
             self.analyzer = Lizard()
+        elif self.kind == QMCALC_REPOSITORY:
+            self.analyzer = QMCalc()
         else:
             self.analyzer = SCC()
 
@@ -327,6 +356,9 @@ def analyze(self, repository_path, files_affected):
             'files_affected': files_affected,
             'details': self.details
         }
+        repository_path = kwargs.get('repository_path', False)
+        if not repository_path:
+            raise GraalError(cause="The 'repository_path' argument is not set for a repository level analysis.")
 
         repository_analysis = self.analyzer.analyze(**kwargs)
 

diff --git a/tests/base_analyzer.py b/tests/base_analyzer.py
@@ -29,6 +29,7 @@
 
 ANALYZER_TEST_FOLDER = "data/"
 ANALYZER_TEST_FILE = "sample_code.py"
+ANALYZER_TEST_C_FILE = "sample_code.c"
 DOCKERFILE_TEST = "Dockerfile"
 
 

diff --git a/tests/base_repo.py b/tests/base_repo.py
@@ -52,7 +52,7 @@ def setUp(self):
         zip_path = os.path.join(data_path, self.repo_name + '.zip')
         subprocess.check_call(['unzip', '-qq', zip_path, '-d', self.tmp_repo_path])
 
-        origin_path = os.path.join(self.tmp_repo_path, 'graaltest')
+        origin_path = os.path.join(self.tmp_repo_path, self.repo_name)
         subprocess.check_call(['git', 'clone', '-q', '--bare', origin_path, self.git_path],
                               stderr=fdout)
 

diff --git a/tests/data/BSDCoreUtils.zip b/tests/data/BSDCoreUtils.zip
diff --git a/tests/data/sample_code.c b/tests/data/sample_code.c
@@ -0,0 +1,44 @@
+/* 
+ * Example file based on BSD-licensed c2rust project example files:
+ * https://github.com/immunant/c2rust/blob/master/examples/qsort/qsort.c 
+ */
+
+#ifdef DOES_NOT_EXIST
+#define DOES_EXIST
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static const unsigned int testvar=0;
+
+void swap(int* a, int* b)
+{
+    int t = *a;
+    *a = *b;
+    *b = t;
+}
+
+int partition (int arr[], int low, int high)
+{
+    int pivot = arr[high];
+    int i = low - 1;
+
+    for (int j = low; j <= high - 1; j++) {
+        if (arr[j] <= pivot) {
+            i++;
+            swap(&arr[i], &arr[j]);
+        }
+    }
+    swap(&arr[i + 1], &arr[high]);
+    return i + 1;
+}
+
+void quickSort(int arr[], int low, int high)
+{
+    if (low < high) {
+        int i = partition(arr, low, high);
+        quickSort(arr, low, i - 1);
+        quickSort(arr, i + 1, high);
+    }
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		nchar nline line_length_min line_length_mean line_length_median line_length_max line_length_sd nempty_line nfunction nstatement statement_nesting_min statement_nesting_mean statement_nesting_median statement_nesting_max statement_nesting_sd ninternal nconst nenum ngoto ninline nnoalias nregister nrestrict nsigned nstruct nunion nunsigned nvoid nvolatile ntypedef ncomment ncomment_char nboilerplate_comment_char ndox_comment ndox_comment_char nfun_comment ncpp_directive ncpp_include ncpp_conditional nfun_cpp_directive nfun_cpp_conditional style_inconsistency nfunction2 halstead_min halstead_mean halstead_median halstead_max halstead_sd nfunction3 cyclomatic_min cyclomatic_mean cyclomatic_median cyclomatic_max cyclomatic_sd nidentifier identifier_length_min identifier_length_mean identifier_length_median identifier_length_max identifier_length_sd unique_nidentifier unique_identifier_length_min unique_identifier_length_mean unique_identifier_length_median unique_identifier_length_max unique_identifier_length_sd indentation_spacing_count indentation_spacing_min indentation_spacing_mean indentation_spacing_median indentation_spacing_max indentation_spacing_sd nno_space_after_binary_op nno_space_after_closing_brace nno_space_after_comma nno_space_after_keyword nno_space_after_opening_brace nno_space_after_semicolon nno_space_before_binary_op nno_space_before_closing_brace nno_space_before_keyword nno_space_before_opening_brace nspace_after_opening_square_bracket nspace_after_struct_op nspace_after_unary_op nspace_at_end_of_line nspace_before_closing_bracket nspace_before_closing_square_bracket nspace_before_comma nspace_before_opening_square_bracket nspace_before_semicolon nspace_before_struct_op nspace_after_binary_op nspace_after_closing_brace nspace_after_comma nspace_after_keyword nspace_after_opening_brace nspace_after_semicolon nno_space_after_struct_op nspace_before_binary_op nspace_before_closing_brace nspace_before_keyword nspace_before_opening_brace nno_space_before_struct_op nno_space_after_opening_square_bracket nno_space_after_unary_op nno_space_before_closing_bracket nno_space_before_closing_square_bracket nno_space_before_comma nno_space_before_opening_square_bracket nno_space_before_semicolon