aboutsummaryrefslogtreecommitdiff
path: root/rdfloader/parser2v3/parse_file.go
diff options
context:
space:
mode:
Diffstat (limited to 'rdfloader/parser2v3/parse_file.go')
-rw-r--r--rdfloader/parser2v3/parse_file.go223
1 files changed, 223 insertions, 0 deletions
diff --git a/rdfloader/parser2v3/parse_file.go b/rdfloader/parser2v3/parse_file.go
new file mode 100644
index 0000000..54c92c1
--- /dev/null
+++ b/rdfloader/parser2v3/parse_file.go
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+
+package parser2v3
+
+import (
+ "fmt"
+ "strings"
+
+ gordfParser "github.com/spdx/gordf/rdfloader/parser"
+ "github.com/spdx/tools-golang/spdx/common"
+ "github.com/spdx/tools-golang/spdx/v2_3"
+)
+
+// returns a file instance and the error if any encountered.
+func (parser *rdfParser2_3) getFileFromNode(fileNode *gordfParser.Node) (file *v2_3.File, err error) {
+ file = &v2_3.File{}
+
+ currState := parser.cache[fileNode.ID]
+ if currState == nil {
+ // this is the first time we are seeing this node.
+ parser.cache[fileNode.ID] = &nodeState{
+ object: file,
+ Color: WHITE,
+ }
+ } else if currState.Color == GREY {
+ // we have already started parsing this file node and we needn't parse it again.
+ return currState.object.(*v2_3.File), nil
+ }
+
+ // setting color to grey to indicate that we've started parsing this node.
+ parser.cache[fileNode.ID].Color = GREY
+
+ // setting color to black just before function returns to the caller to
+ // indicate that parsing current node is complete.
+ defer func() { parser.cache[fileNode.ID].Color = BLACK }()
+
+ err = setFileIdentifier(fileNode.ID, file) // 4.2
+ if err != nil {
+ return nil, err
+ }
+
+ if existingFile := parser.files[file.FileSPDXIdentifier]; existingFile != nil {
+ file = existingFile
+ }
+
+ for _, subTriple := range parser.nodeToTriples(fileNode) {
+ switch subTriple.Predicate.ID {
+ case SPDX_FILE_NAME: // 4.1
+ // cardinality: exactly 1
+ file.FileName = subTriple.Object.ID
+ case SPDX_NAME:
+ // cardinality: exactly 1
+ // TODO: check where it will be set in the golang-tools spdx-data-model
+ case RDF_TYPE:
+ // cardinality: exactly 1
+ case SPDX_FILE_TYPE: // 4.3
+ // cardinality: min 0
+ fileType := ""
+ fileType, err = parser.getFileTypeFromUri(subTriple.Object.ID)
+ file.FileTypes = append(file.FileTypes, fileType)
+ case SPDX_CHECKSUM: // 4.4
+ // cardinality: min 1
+ err = parser.setFileChecksumFromNode(file, subTriple.Object)
+ case SPDX_LICENSE_CONCLUDED: // 4.5
+ // cardinality: (exactly 1 anyLicenseInfo) or (None) or (Noassertion)
+ anyLicense, err := parser.getAnyLicenseFromNode(subTriple.Object)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing licenseConcluded: %v", err)
+ }
+ file.LicenseConcluded = anyLicense.ToLicenseString()
+ case SPDX_LICENSE_INFO_IN_FILE: // 4.6
+ // cardinality: min 1
+ lic, err := parser.getAnyLicenseFromNode(subTriple.Object)
+ if err != nil {
+ return nil, fmt.Errorf("error parsing licenseInfoInFile: %v", err)
+ }
+ file.LicenseInfoInFiles = append(file.LicenseInfoInFiles, lic.ToLicenseString())
+ case SPDX_LICENSE_COMMENTS: // 4.7
+ // cardinality: max 1
+ file.LicenseComments = subTriple.Object.ID
+ // TODO: allow copyright text to be of type NOASSERTION
+ case SPDX_COPYRIGHT_TEXT: // 4.8
+ // cardinality: exactly 1
+ file.FileCopyrightText = subTriple.Object.ID
+ case SPDX_LICENSE_INFO_FROM_FILES:
+ // TODO: implement it. It is not defined in the tools-golang model.
+ // deprecated artifactOf (see sections 4.9, 4.10, 4.11)
+ case SPDX_ARTIFACT_OF:
+ // cardinality: min 0
+ var artifactOf *v2_3.ArtifactOfProject
+ artifactOf, err = parser.getArtifactFromNode(subTriple.Object)
+ file.ArtifactOfProjects = append(file.ArtifactOfProjects, artifactOf)
+ case RDFS_COMMENT: // 4.12
+ // cardinality: max 1
+ file.FileComment = subTriple.Object.ID
+ case SPDX_NOTICE_TEXT: // 4.13
+ // cardinality: max 1
+ file.FileNotice = getNoticeTextFromNode(subTriple.Object)
+ case SPDX_FILE_CONTRIBUTOR: // 4.14
+ // cardinality: min 0
+ file.FileContributors = append(file.FileContributors, subTriple.Object.ID)
+ case SPDX_FILE_DEPENDENCY:
+ // cardinality: min 0
+ newFile, err := parser.getFileFromNode(subTriple.Object)
+ if err != nil {
+ return nil, fmt.Errorf("error setting a file dependency in a file: %v", err)
+ }
+ file.FileDependencies = append(file.FileDependencies, string(newFile.FileSPDXIdentifier))
+ case SPDX_ATTRIBUTION_TEXT:
+ // cardinality: min 0
+ file.FileAttributionTexts = append(file.FileAttributionTexts, subTriple.Object.ID)
+ case SPDX_ANNOTATION:
+ // cardinality: min 0
+ err = parser.parseAnnotationFromNode(subTriple.Object)
+ case SPDX_RELATIONSHIP:
+ // cardinality: min 0
+ err = parser.parseRelationship(subTriple)
+ default:
+ return nil, fmt.Errorf("unknown triple predicate id %s", subTriple.Predicate.ID)
+ }
+ if err != nil {
+ return nil, err
+ }
+ }
+ parser.files[file.FileSPDXIdentifier] = file
+ return file, nil
+}
+
+func (parser *rdfParser2_3) setFileChecksumFromNode(file *v2_3.File, checksumNode *gordfParser.Node) error {
+ checksumAlgorithm, checksumValue, err := parser.getChecksumFromNode(checksumNode)
+ if err != nil {
+ return fmt.Errorf("error parsing checksumNode of a file: %v", err)
+ }
+ if file.Checksums == nil {
+ file.Checksums = []common.Checksum{}
+ }
+ switch checksumAlgorithm {
+ case common.SHA1,
+ common.SHA224,
+ common.SHA256,
+ common.SHA384,
+ common.SHA512,
+ common.MD2,
+ common.MD4,
+ common.MD5,
+ common.MD6,
+ common.SHA3_256,
+ common.SHA3_384,
+ common.SHA3_512,
+ common.BLAKE2b_256,
+ common.BLAKE2b_384,
+ common.BLAKE2b_512,
+ common.BLAKE3,
+ common.ADLER32:
+ file.Checksums = append(file.Checksums, common.Checksum{Algorithm: checksumAlgorithm, Value: checksumValue})
+ case "":
+ return fmt.Errorf("empty checksum algorithm and value")
+ default:
+ return fmt.Errorf("unknown checksumAlgorithm %s for a file", checksumAlgorithm)
+ }
+ return nil
+}
+
+func (parser *rdfParser2_3) getArtifactFromNode(node *gordfParser.Node) (*v2_3.ArtifactOfProject, error) {
+ artifactOf := &v2_3.ArtifactOfProject{}
+ // setting artifactOfProjectURI attribute (which is optional)
+ if node.NodeType == gordfParser.IRI {
+ artifactOf.URI = node.ID
+ }
+ // parsing rest triples and attributes of the artifact.
+ for _, triple := range parser.nodeToTriples(node) {
+ switch triple.Predicate.ID {
+ case RDF_TYPE:
+ case DOAP_HOMEPAGE:
+ artifactOf.HomePage = triple.Object.ID
+ case DOAP_NAME:
+ artifactOf.Name = triple.Object.ID
+ default:
+ return nil, fmt.Errorf("error parsing artifactOf predicate %s", triple.Predicate.ID)
+ }
+ }
+ return artifactOf, nil
+}
+
+// TODO: check if the filetype is valid.
+func (parser *rdfParser2_3) getFileTypeFromUri(uri string) (string, error) {
+ // fileType is given as a uri. for example: http://spdx.org/rdf/terms#fileType_text
+ lastPart := getLastPartOfURI(uri)
+ if !strings.HasPrefix(lastPart, "fileType_") {
+ return "", fmt.Errorf("fileType Uri must begin with fileTYpe_. found: %s", lastPart)
+ }
+ return strings.TrimPrefix(lastPart, "fileType_"), nil
+}
+
+// populates parser.doc.Files by a list of files which are not
+// associated with a package by the hasFile attribute
+// assumes: all the packages are already parsed.
+func (parser *rdfParser2_3) setUnpackagedFiles() {
+ for fileID := range parser.files {
+ if !parser.assocWithPackage[fileID] {
+ parser.doc.Files = append(parser.doc.Files, parser.files[fileID])
+ }
+ }
+}
+
+func setFileIdentifier(idURI string, file *v2_3.File) (err error) {
+ idURI = strings.TrimSpace(idURI)
+ uriFragment := getLastPartOfURI(idURI)
+ file.FileSPDXIdentifier, err = ExtractElementID(uriFragment)
+ if err != nil {
+ return fmt.Errorf("error setting file identifier: %s", err)
+ }
+ return nil
+}
+
+func getNoticeTextFromNode(node *gordfParser.Node) string {
+ switch node.ID {
+ case SPDX_NOASSERTION_CAPS, SPDX_NOASSERTION_SMALL:
+ return "NOASSERTION"
+ default:
+ return node.ID
+ }
+}