rdfloader/parser2v2/parser.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133

// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

package parser2v2

import (
	"errors"
	"fmt"

	gordfParser "github.com/spdx/gordf/rdfloader/parser"
	gordfWriter "github.com/spdx/gordf/rdfwriter"
	"github.com/spdx/tools-golang/spdx/common"
	"github.com/spdx/tools-golang/spdx/v2_2"
)

// returns a new instance of rdfParser2_2 given the gordf object and nodeToTriples mapping
func NewParser2_2(gordfParserObj *gordfParser.Parser, nodeToTriples map[string][]*gordfParser.Triple) *rdfParser2_2 {
	parser := rdfParser2_2{
		gordfParserObj:      gordfParserObj,
		nodeStringToTriples: nodeToTriples,
		doc: &v2_2.Document{
			ExternalDocumentReferences: []v2_2.ExternalDocumentRef{},
			CreationInfo:               &v2_2.CreationInfo{},
			Packages:                   []*v2_2.Package{},
			Files:                      []*v2_2.File{},
			OtherLicenses:              []*v2_2.OtherLicense{},
			Relationships:              []*v2_2.Relationship{},
			Annotations:                []*v2_2.Annotation{},
			Reviews:                    []*v2_2.Review{},
		},
		files:            map[common.ElementID]*v2_2.File{},
		assocWithPackage: map[common.ElementID]bool{},
		cache:            map[string]*nodeState{},
	}
	return &parser
}

// main function which takes in a gordfParser and returns
// a spdxDocument model or the error encountered while parsing it
func LoadFromGoRDFParser(gordfParserObj *gordfParser.Parser) (*v2_2.Document, error) {
	// nodeToTriples is a mapping from a node to list of triples.
	// for every node in the set of subjects of all the triples,
	// it provides a list of triples that are associated with that subject node.
	nodeToTriples := gordfWriter.GetNodeToTriples(gordfParserObj.Triples)
	parser := NewParser2_2(gordfParserObj, nodeToTriples)

	spdxDocumentNode, err := parser.getSpdxDocNode()
	if err != nil {
		return nil, err
	}

	err = parser.parseSpdxDocumentNode(spdxDocumentNode)
	if err != nil {
		return nil, err
	}

	// parsing other root elements
	for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) {
		typeTriples := gordfWriter.FilterTriples(gordfParserObj.Triples, &rootNode.ID, &RDF_TYPE, nil)
		if len(typeTriples) != 1 {
			return nil, fmt.Errorf("every node must be associated with exactly 1 type Triple. found %d type triples", len(typeTriples))
		}
		switch typeTriples[0].Object.ID {
		case SPDX_SPDX_DOCUMENT_CAPITALIZED:
			continue // it is already parsed.
		case SPDX_SNIPPET:
			snippet, err := parser.getSnippetInformationFromNode2_2(typeTriples[0].Subject)
			if err != nil {
				return nil, fmt.Errorf("error parsing a snippet: %v", err)
			}
			err = parser.setSnippetToFileWithID(snippet, snippet.SnippetFromFileSPDXIdentifier)
			if err != nil {
				return nil, err
			}
		// todo: check other root node attributes.
		default:
			continue
			// because in rdf it is quite possible that the root node is an
			// element that has been used in the some other element as a child
		}
	}

	// parsing packages and files sets the files to a files variable which is
	// associated with the parser and not the document. following method is
	// necessary to transfer the files which are not set in the packages to the
	// Files attribute of the document
	// WARNING: do not relocate following function call. It must be at the end of the function
	parser.setUnpackagedFiles()
	return parser.doc, nil
}

// from the given parser object, returns the SpdxDocument Node defined in the root elements.
// returns error if the document is associated with no SpdxDocument or
// associated with more than one SpdxDocument node.
func (parser *rdfParser2_2) getSpdxDocNode() (node *gordfParser.Node, err error) {
	/* Possible Questions:
	1. why are you traversing the root nodes only? why not directly filter out
	   all the triples with rdf:type=spdx:SpdxDocument?
	Ans: It is quite possible that the relatedElement or any other attribute
		 to have dependency of another SpdxDocument. In that case, that
		 element will reference the dependency using SpdxDocument tag which will
		 cause false positives when direct filtering is done.
	*/
	// iterate over root nodes and find the node which has a property of rdf:type=spdx:SpdxDocument
	var spdxDocNode *gordfParser.Node
	for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) {
		typeTriples := gordfWriter.FilterTriples(
			parser.nodeToTriples(rootNode), // triples
			&rootNode.ID,                   // Subject
			&RDF_TYPE,                      // Predicate
			nil,                            // Object
		)

		if typeTriples[0].Object.ID == SPDX_SPDX_DOCUMENT_CAPITALIZED {
			// we found a SpdxDocument Node

			// must be associated with exactly one rdf:type.
			if len(typeTriples) != 1 {
				return nil, fmt.Errorf("rootNode (%v) must be associated with exactly one"+
					" triple of predicate rdf:type, found %d triples", rootNode, len(typeTriples))
			}

			// checking if we've already found a node and it is not same as the current one.
			if spdxDocNode != nil && spdxDocNode.ID != typeTriples[0].Subject.ID {
				return nil, fmt.Errorf("found more than one SpdxDocument Node (%v and %v)", spdxDocNode, typeTriples[0].Subject)
			}
			spdxDocNode = typeTriples[0].Subject
		}
	}
	if spdxDocNode == nil {
		return nil, errors.New("RDF files must be associated with a SpdxDocument tag. No tag found")
	}
	return spdxDocNode, nil
}