## Summary This PR introduces an automated mechanism to update the citation count for authors' publications. - Inspired by @BernardoCama’s suggestion in #3150. - Resolves #3150. ## Key Changes - Adds an action to update publication citation counts. - Note: This action creates a commit on the main branch. - To trigger further GitHub Actions workflows from this commit, a Personal Access Token (PAT) must be used (the default GitHub Actions token cannot trigger subsequent workflows). - Adds and manages citation data in `_data/citations.yml`. - Adds and adapts `bin/update_scholar_citations.py` to handle citation updates. ## Usage Examples ### Timeout <img width="758" height="415" alt="image" src="https://github.com/user-attachments/assets/0a330d35-b386-4670-8668-62701f2dc68b" /> ### Success <img width="1684" height="857" alt="image" src="https://github.com/user-attachments/assets/44aa0558-e02a-4f00-b8cb-9e0ce16dd53c" />
133 lines
4.6 KiB
Python
133 lines
4.6 KiB
Python
#!/usr/bin/env python
|
|
|
|
import os
|
|
import sys
|
|
import yaml
|
|
from datetime import datetime
|
|
from scholarly import scholarly
|
|
|
|
|
|
def load_scholar_user_id() -> str:
|
|
"""Load the Google Scholar user ID from the configuration file."""
|
|
config_file = "_data/socials.yml"
|
|
if not os.path.exists(config_file):
|
|
print(
|
|
f"Configuration file {config_file} not found. Please ensure the file exists and contains your Google Scholar user ID."
|
|
)
|
|
sys.exit(1)
|
|
try:
|
|
with open(config_file, "r") as f:
|
|
config = yaml.safe_load(f)
|
|
scholar_user_id = config.get("scholar_userid")
|
|
if not scholar_user_id:
|
|
print(
|
|
"No 'scholar_userid' found in the configuration file. Please add 'scholar_userid' to _data/socials.yml."
|
|
)
|
|
sys.exit(1)
|
|
return scholar_user_id
|
|
except yaml.YAMLError as e:
|
|
print(
|
|
f"Error parsing YAML file {config_file}: {e}. Please check the file for correct YAML syntax."
|
|
)
|
|
sys.exit(1)
|
|
|
|
|
|
SCHOLAR_USER_ID: str = load_scholar_user_id()
|
|
OUTPUT_FILE: str = "_data/citations.yml"
|
|
|
|
|
|
def get_scholar_citations() -> None:
|
|
"""Fetch and update Google Scholar citation data."""
|
|
print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}")
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
|
|
# Check if the output file was already updated today
|
|
if os.path.exists(OUTPUT_FILE):
|
|
try:
|
|
with open(OUTPUT_FILE, "r") as f:
|
|
existing_data = yaml.safe_load(f)
|
|
if (
|
|
existing_data
|
|
and "metadata" in existing_data
|
|
and "last_updated" in existing_data["metadata"]
|
|
):
|
|
print(f"Last updated on: {existing_data['metadata']['last_updated']}")
|
|
if existing_data["metadata"]["last_updated"] == today:
|
|
print("Citations data is already up-to-date. Skipping fetch.")
|
|
return
|
|
except Exception as e:
|
|
print(
|
|
f"Warning: Could not read existing citation data from {OUTPUT_FILE}: {e}. The file may be missing or corrupted."
|
|
)
|
|
|
|
citation_data = {"metadata": {"last_updated": today}, "papers": {}}
|
|
|
|
scholarly.set_timeout(15)
|
|
scholarly.set_retries(3)
|
|
try:
|
|
author = scholarly.search_author_id(SCHOLAR_USER_ID)
|
|
author_data = scholarly.fill(author)
|
|
except Exception as e:
|
|
print(
|
|
f"Error fetching author data from Google Scholar for user ID '{SCHOLAR_USER_ID}': {e}. Please check your internet connection and Scholar user ID."
|
|
)
|
|
sys.exit(1)
|
|
|
|
if not author_data:
|
|
print(
|
|
f"Could not fetch author data for user ID '{SCHOLAR_USER_ID}'. Please verify the Scholar user ID and try again."
|
|
)
|
|
sys.exit(1)
|
|
|
|
if "publications" not in author_data:
|
|
print(f"No publications found in author data for user ID '{SCHOLAR_USER_ID}'.")
|
|
sys.exit(1)
|
|
|
|
for pub in author_data["publications"]:
|
|
try:
|
|
pub_id = pub.get("pub_id") or pub.get("author_pub_id")
|
|
if not pub_id:
|
|
print(
|
|
f"Warning: No ID found for publication: {pub.get('bib', {}).get('title', 'Unknown')}. This publication will be skipped."
|
|
)
|
|
continue
|
|
|
|
title = pub.get("bib", {}).get("title", "Unknown Title")
|
|
year = pub.get("bib", {}).get("pub_year", "Unknown Year")
|
|
citations = pub.get("num_citations", 0)
|
|
|
|
print(f"Found: {title} ({year}) - Citations: {citations}")
|
|
|
|
citation_data["papers"][pub_id] = {
|
|
"title": title,
|
|
"year": year,
|
|
"citations": citations,
|
|
}
|
|
except Exception as e:
|
|
print(
|
|
f"Error processing publication '{pub.get('bib', {}).get('title', 'Unknown')}': {e}. This publication will be skipped."
|
|
)
|
|
|
|
# Compare new data with existing data
|
|
if existing_data and existing_data.get("papers") == citation_data["papers"]:
|
|
print("No changes in citation data. Skipping file update.")
|
|
return
|
|
|
|
try:
|
|
with open(OUTPUT_FILE, "w") as f:
|
|
yaml.dump(citation_data, f, width=1000, sort_keys=True)
|
|
print(f"Citation data saved to {OUTPUT_FILE}")
|
|
except Exception as e:
|
|
print(
|
|
f"Error writing citation data to {OUTPUT_FILE}: {e}. Please check file permissions and disk space."
|
|
)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
get_scholar_citations()
|
|
except Exception as e:
|
|
print(f"Unexpected error: {e}")
|
|
sys.exit(1)
|