Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: added previous/next navigation table

...

Code Block
# Reference: https://pydataverse.readthedocs.io/en/latest/reference.html
from pyDataverse.api import NativeApi, DataAccessApi
import json
from lxml import etree
import os

DATAVERSE = "https://example.dataverse.org" 
DATASET_DOI = "doi:10.5072/FK2/XXXXXX"

# Use environment variable for API TOKEN
# export APITOKEN=....
apitoken = os.environ['APITOKEN']
api = NativeApi(DATAVERSE, apitoken)
dataset = api.get_dataset(DATASET_DOI, auth=apitoken)
data_api = DataAccessApi(DATAVERSE, apitoken)

donor_dict = {} # key: donor id, value: life cycle 

files_list = dataset.json()['data']['latestVersion']['files']

# creates a dict key donorID and value donorLifeStage from the xml files 
xml_files = [ f for f in files_list if "xml" in f["dataFile"]["filename"] ]
for x in xml_files:
    xml_content = data_api.get_datafile(x["dataFile"]["id"]).text.encode('utf-8')
    xml_root = etree.fromstring(xml_content)
    donor_id = xml_root.find(".//{http://mother-db.org/mdb}donorID").text
    donor_life_stage = xml_root.find(".//{http://mother-db.org/mdb}donorLifeStage").text
    if donor_id not in donor_dict:
        donor_dict[donor_id] = donor_life_stage
print(donor_dict)

for file in files_list:
    file_name = file["dataFile"]["filename"]
    file_id = file["dataFile"]["id"]
    print(file_name, file_id)
    # get_datafile_metadata(identifier, is_filepid=False, is_draft=False, auth=True)
    file_metadata = api.get_datafile_metadata(file_id, is_draft=True)
    file_metadata = file_metadata.json() 
    if "ome.tif" in file_metadata['label']:
        file_metadata['description'] = "Full Resolution Image"
    elif "reduced" in file_metadata['label']:
        file_metadata['description'] = "Reduced Image"
    elif "thumbnail" in file_metadata['label']:
        file_metadata['description'] = "Thumbnail Image"
    elif "xml" in file_metadata['label']:
        file_metadata['description'] = "Metadata"
    else:
        pass

    for d in donor_dict:
        if d in file_metadata['label']:
        # add lifeStage as tag
            file_metadata['categories'] = [ donor_dict[d] ]
        # add donor as directoryLabel
            file_metadata['directoryLabel'] = d
            
    # update_datafile_metadata(identifier, json_str=None, is_filepid=False)
    file_metadata_json_str = json.dumps(file_metadata) 
    print(file_metadata_json_str)
    #update_response = api.update_datafile_metadata(file_id, json_str=file_metadata_json_str)
    #update_response.check_returncode()