Source code for argopy.related.argo_documentation
import os
import json
import pandas as pd
from functools import lru_cache
from ..stores import httpstore
from ..options import OPTIONS
from .utils import path2assets
# Load the ADMT documentation catalogue:
with open(os.path.join(path2assets, "admt_documentation_catalogue.json"), "rb") as f:
ADMT_CATALOGUE = json.load(f)['data']['catalogue']
[docs]class ArgoDocs:
"""ADMT documentation helper class
Examples
--------
>>> ArgoDocs().list
>>> ArgoDocs().search("CDOM")
>>> ArgoDocs().search("CDOM", where='abstract')
>>> ArgoDocs(35385)
>>> ArgoDocs(35385).ris
>>> ArgoDocs(35385).abstract
>>> ArgoDocs(35385).show()
>>> ArgoDocs(35385).open_pdf()
>>> ArgoDocs(35385).open_pdf(page=12)
"""
_catalogue = ADMT_CATALOGUE
class RIS:
"""RIS file structure from TXT file"""
def __init__(self, file=None, fs=None):
self.record = None
self.fs = fs
if file:
self.parse(file)
def parse(self, file):
"""Parse input file"""
# log.debug(file)
with self.fs.open(file, 'r', encoding="utf-8") as f:
TXTlines = f.readlines()
lines = []
# Eliminate blank lines
for line in TXTlines:
line = line.strip()
if len(line) > 0:
lines.append(line)
TXTlines = lines
#
record = {}
for line in TXTlines:
# print("\n>", line)
if len(line) > 2:
if line[2] == " ":
tag = line[0:2]
field = line[3:]
# print("ok", {tag: field})
record[tag] = [field]
else:
# print("-", line)
record[tag].append(line)
elif len(line) == 2:
record[line] = []
# else:
# print("*", line)
for key in record.keys():
record[key] = "; ".join(record[key])
self.record = record
[docs] @lru_cache
def __init__(self, docid=None, cache=False):
self.docid = None
self._ris = None
self._risfile = None
self._fs = httpstore(cache=cache, cachedir=OPTIONS['cachedir'])
self._doiserver = "https://dx.doi.org"
self._archimer = "https://archimer.ifremer.fr"
if isinstance(docid, int):
if docid in [doc['id'] for doc in self._catalogue]:
self.docid = docid
else:
raise ValueError("Unknown document id")
elif isinstance(docid, str):
start_with = lambda f, x: f[0:len(x)] == x if len(x) <= len(f) else False # noqa: E731
if start_with(docid, '10.13155/') and docid in [doc['doi'] for doc in self._catalogue]:
self.docid = [doc['id'] for doc in self._catalogue if docid == doc['doi']][0]
else:
raise ValueError("'docid' must be an integer or a valid Argo DOI")
def __repr__(self):
summary = ["<argopy.ArgoDocs>"]
if self.docid is not None:
doc = [doc for doc in self._catalogue if doc['id'] == self.docid][0]
summary.append("Title: %s" % doc['title'])
summary.append("DOI: %s" % doc['doi'])
summary.append("url: https://dx.doi.org/%s" % doc['doi'])
summary.append("last pdf: %s" % self.pdf)
if 'AF' in self.ris:
summary.append("Authors: %s" % self.ris['AF'])
summary.append("Abstract: %s" % self.ris['AB'])
else:
summary.append("- %i documents with a DOI are available in the catalogue" % len(self._catalogue))
summary.append("- Use the method 'search' to find a document id")
summary.append("- Use the property 'list' to check out the catalogue")
return "\n".join(summary)
@property
def list(self):
"""List of all available documents as a :class:`pandas.DataFrame`"""
return pd.DataFrame(self._catalogue)
@property
def js(self):
"""Internal json record for a document"""
if self.docid is not None:
return [doc for doc in self._catalogue if doc['id'] == self.docid][0]
else:
raise ValueError("Select a document first !")
@property
def ris(self):
"""RIS record of a document"""
if self.docid is not None:
if self._ris is None:
# Fetch RIS metadata for this document:
import re
file = self._fs.download_url("%s/%s" % (self._doiserver, self.js['doi']))
x = re.search(r'<a target="_blank" href="(https?:\/\/([^"]*))"\s+([^>]*)rel="nofollow">TXT<\/a>',
str(file))
export_txt_url = x[1].replace("https://archimer.ifremer.fr", self._archimer)
self._risfile = export_txt_url
self._ris = self.RIS(export_txt_url, fs=self._fs).record
return self._ris
else:
raise ValueError("Select a document first !")
@property
def abstract(self):
"""Abstract of a document"""
if self.docid is not None:
return self.ris['AB']
else:
raise ValueError("Select a document first !")
@property
def pdf(self):
"""Link to the online pdf version of a document"""
if self.docid is not None:
return self.ris['UR']
else:
raise ValueError("Select a document first !")
[docs] def show(self, height=800):
"""Insert document in pdf in a notebook cell
Parameters
----------
height: int
Height in pixels of the cell
"""
if self.docid is not None:
from IPython.core.display import HTML
return HTML(
'<embed src="%s" type="application/pdf" width="100%%" height="%ipx" />' % (self.ris['UR'], height))
else:
raise ValueError("Select a document first !")
[docs] def open_pdf(self, page=None, url_only=False):
"""Open document in new browser tab
Parameters
----------
page: int, optional
Open directly a specific page number
"""
url = self.pdf
url += '#view=FitV&pagemode=thumbs'
if page:
url += '&page=%i' % page
if self.docid is not None:
if not url_only:
import webbrowser
webbrowser.open_new(url)
else:
return url
else:
raise ValueError("Select a document first !")
[docs] def search(self, txt, where='title'):
"""Search for string in all documents title or abstract
Parameters
----------
txt: str
where: str, default='title'
Where to search, can be 'title' or 'abstract'
Returns
-------
list
"""
results = []
for doc in self.list.iterrows():
docid = doc[1]['id']
if where == 'title':
if txt.lower() in ArgoDocs(docid).js['title'].lower():
results.append(docid)
elif where == 'abstract':
if txt.lower() in ArgoDocs(docid).abstract.lower():
results.append(docid)
return results