Here is a sample training data configuration file that describes a graph for a node-classification task:
{
"version" : "v2.0",
"query_engine" : "gremlin",
"graph" : [
{
"edges" : [
{
"file_name" : "edges/(movie)-included_in-(genre).csv",
"separator" : ",",
"source" : ["~from", "movie"],
"relation" : ["", "included_in"],
"dest" : [ "~to", "genre" ]
},
{
"file_name" : "edges/(user)-rated-(movie).csv",
"separator" : ",",
"source" : ["~from", "movie"],
"relation" : ["rating", "prefixname"], # [prefixname#value]
"dest" : ["~to", "genre"],
"features" : [
{
"feature" : ["rating", "rating", "numerical"],
"norm" : "min-max"
}
]
}
],
"nodes" : [
{
"file_name" : "nodes/genre.csv",
"separator" : ",",
"node" : ["~id", "genre"],
"features" : [
{
"feature": ["name", "genre", "category"],
"separator": ";"
}
]
},
{
"file_name" : "nodes/movie.csv",
"separator" : ",",
"node" : ["~id", "movie"],
"features" : [
{
"feature": ["title", "title", "word2vec"],
"language": ["en_core_web_lg"]
}
]
},
{
"file_name" : "nodes/user.csv",
"separator" : ",",
"node" : ["~id", "user"],
"features" : [
{
"feature": ["age", "age", "numerical"],
"norm" : "min-max",
"imputation": "median",
},
{
"feature": ["occupation", "occupation", "category"],
}
],
"labels" : [
{
"label": ["gender", "classification"],
"split_rate" : [0.8, 0.2, 0.0]
}
]
}
]
},
"warnings" : [ ]
]
}