Import from DB-Pedia
DBPedia exports all the entities as GZipped CSV files. Features:
- First line contains column names, second, third and forth has meta information, which we'll skip (look at
"skipFrom": 1, "skipTo": 3
in CSV transformer) - The vertex class name is created automatically based on the file name, so we can use the same file against any DBPedia file
- The Primary Key is the "URI" field, where a UNIQUE index has also been created (refer to "ORIENTDB" loader)
- The "merge" transformer is used to allow to re-import or update any file without generating duplicates
Configuration
{
"config": {
"log": "debug",
"fileDirectory": "/temp/databases/dbpedia_csv/",
"fileName": "Person.csv.gz"
},
"begin": [
{ "let": { "name": "$filePath", "value": "$fileDirectory.append( $fileName )"} },
{ "let": { "name": "$className", "value": "$fileName.substring( 0, $fileName.indexOf('.') )"} }
],
"source" : {
"file": { "path": "$filePath", "lock" : true }
},
"extractor" : {
{ "csv": { "separator": ",", "nullValue": "NULL", "skipFrom": 1, "skipTo": 3 } },
},
"transformers" : [
{ "merge": { "joinFieldName":"URI", "lookup":"V.URI" } },
{ "vertex": { "class": "$className"} }
],
"loader" : {
"orientdb": {
"dbURL": "plocal:/temp/databases/dbpedia",
"dbUser": "admin",
"dbPassword": "admin",
"dbAutoCreate": true,
"tx": false,
"batchCommit": 1000,
"dbType": "graph",
"indexes": [{"class":"V", "fields":["URI:string"], "type":"UNIQUE" }]
}
}
}