...
Code Block |
---|
# Reference: https://pydataverse.readthedocs.io/en/latest/reference.html from pyDataverse.api import NativeApi, DataAccessApi import json from lxml import etree import os DATAVERSE = "https://example.dataverse.org" DATASET_DOI = "doi:10.5072/FK2/XXXXXX" # Use environment variable for API TOKEN # export APITOKEN=.... apitoken = os.environ['APITOKEN'] api = NativeApi(DATAVERSE, apitoken) dataset = api.get_dataset(DATASET_DOI, auth=apitoken) data_api = DataAccessApi(DATAVERSE, apitoken) donor_dict = {} # key: donor id, value: life cycle files_list = dataset.json()['data']['latestVersion']['files'] # creates a dict key donorID and value donorLifeStage from the xml files xml_files = [ f for f in files_list if "xml" in f["dataFile"]["filename"] ] for x in xml_files: xml_content = data_api.get_datafile(x["dataFile"]["id"]).text.encode('utf-8') xml_root = etree.fromstring(xml_content) donor_id = xml_root.find(".//{http://mother-db.org/mdb}donorID").text donor_life_stage = xml_root.find(".//{http://mother-db.org/mdb}donorLifeStage").text if donor_id not in donor_dict: donor_dict[donor_id] = donor_life_stage print(donor_dict) for file in files_list: file_name = file["dataFile"]["filename"] file_id = file["dataFile"]["id"] print(file_name, file_id) # get_datafile_metadata(identifier, is_filepid=False, is_draft=False, auth=True) file_metadata = api.get_datafile_metadata(file_id, is_draft=True) file_metadata = file_metadata.json() if "ome.tif" in file_metadata['label']: file_metadata['description'] = "Full Resolution Image" elif "reduced" in file_metadata['label']: file_metadata['description'] = "Reduced Image" elif "thumbnail" in file_metadata['label']: file_metadata['description'] = "Thumbnail Image" elif "xml" in file_metadata['label']: file_metadata['description'] = "Metadata" else: pass for d in donor_dict: if d in file_metadata['label']: # add lifeStage as tag file_metadata['categories'] = [ donor_dict[d] ] # add donor as directoryLabel file_metadata['directoryLabel'] = d # update_datafile_metadata(identifier, json_str=None, is_filepid=False) file_metadata_json_str = json.dumps(file_metadata) print(file_metadata_json_str) #update_response = api.update_datafile_metadata(file_id, json_str=file_metadata_json_str) #update_response.check_returncode() |
Previous: Adding file tags |