from pybtex.database.input import bibtex
import pybtex.database.input.bibtex
from time import strptime
import string
import html
import os
import re
Publications markdown generator for academicpages
Takes a set of bibtex of publications and converts them for use with academicpages.github.io. This is an interactive Jupyter notebook (see more info here).
The core python code is also in pubsFromBibs.py
. Run either from the markdown_generator
folder after replacing updating the publist dictionary with: * bib file names * specific venue keys based on your bib file preferences * any specific pre-text for specific files * Collection Name (future feature)
TODO: Make this work with other databases of citations, TODO: Merge this with the existing TSV parsing solution
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
= {
publist "proceeding": {
"file" : "proceedings.bib",
"venuekey": "booktitle",
"venue-pretext": "In the proceedings of ",
"collection" : {"name":"publications",
"permalink":"/publication/"}
},"journal":{
"file": "pubs.bib",
"venuekey" : "journal",
"venue-pretext" : "",
"collection" : {"name":"publications",
"permalink":"/publication/"}
} }
= {
html_escape_table "&": "&",
'"': """,
"'": "'"
}
def html_escape(text):
"""Produce entities within text."""
return "".join(html_escape_table.get(c,c) for c in text)
for pubsource in publist:
= bibtex.Parser()
parser = parser.parse_file(publist[pubsource]["file"])
bibdata
#loop through the individual references in a given bibtex file
for bib_id in bibdata.entries:
#reset default date
= "1900"
pub_year = "01"
pub_month = "01"
pub_day
= bibdata.entries[bib_id].fields
b
try:
= f'{b["year"]}'
pub_year
#todo: this hack for month and day needs some cleanup
if "month" in b.keys():
if(len(b["month"])<3):
= "0"+b["month"]
pub_month = pub_month[-2:]
pub_month elif(b["month"] not in range(12)):
= strptime(b["month"][:3],'%b').tm_mon
tmnth = "{:02d}".format(tmnth)
pub_month else:
= str(b["month"])
pub_month if "day" in b.keys():
= str(b["day"])
pub_day
= pub_year+"-"+pub_month+"-"+pub_day
pub_date
#strip out {} as needed (some bibtex entries that maintain formatting)
= b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")
clean_title
= re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
url_slug = url_slug.replace("--","-")
url_slug
= (str(pub_date) + "-" + url_slug + ".md").replace("--","-")
md_filename = (str(pub_date) + "-" + url_slug).replace("--","-")
html_filename
#Build Citation from text
= ""
citation
#citation authors - todo - add highlighting for primary author?
for author in bibdata.entries[bib_id].persons["author"]:
= citation+" "+author.first_names[0]+" "+author.last_names[0]+", "
citation
#citation title
= citation + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\""
citation
#add venue logic depending on citation type
= publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","")
venue
= citation + " " + html_escape(venue)
citation = citation + ", " + pub_year + "."
citation
## YAML variables
= "---\ntitle: \"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'
md
+= """collection: """ + publist[pubsource]["collection"]["name"]
md
+= """\npermalink: """ + publist[pubsource]["collection"]["permalink"] + html_filename
md
= False
note if "note" in b.keys():
if len(str(b["note"])) > 5:
+= "\nexcerpt: '" + html_escape(b["note"]) + "'"
md = True
note
+= "\ndate: " + str(pub_date)
md
+= "\nvenue: '" + html_escape(venue) + "'"
md
= False
url if "url" in b.keys():
if len(str(b["url"])) > 5:
+= "\npaperurl: '" + b["url"] + "'"
md = True
url
+= "\ncitation: '" + html_escape(citation) + "'"
md
+= "\n---"
md
## Markdown description for individual page
if note:
+= "\n" + html_escape(b["note"]) + "\n"
md
if url:
+= "\n[Access paper here](" + b["url"] + "){:target=\"_blank\"}\n"
md else:
+= "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation"
md
= os.path.basename(md_filename)
md_filename
with open("../_publications/" + md_filename, 'w') as f:
f.write(md)print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
# field may not exist for a reference
except KeyError as e:
print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")
continue