Adapt mechanism for citiation count (#3177)
## Summary This PR introduces an automated mechanism to update the citation count for authors' publications. - Inspired by @BernardoCama’s suggestion in #3150. - Resolves #3150. ## Key Changes - Adds an action to update publication citation counts. - Note: This action creates a commit on the main branch. - To trigger further GitHub Actions workflows from this commit, a Personal Access Token (PAT) must be used (the default GitHub Actions token cannot trigger subsequent workflows). - Adds and manages citation data in `_data/citations.yml`. - Adds and adapts `bin/update_scholar_citations.py` to handle citation updates. ## Usage Examples ### Timeout <img width="758" height="415" alt="image" src="https://github.com/user-attachments/assets/0a330d35-b386-4670-8668-62701f2dc68b" /> ### Success <img width="1684" height="857" alt="image" src="https://github.com/user-attachments/assets/44aa0558-e02a-4f00-b8cb-9e0ce16dd53c" />
This commit is contained in:
parent
c38d33b5f8
commit
1f349ffda2
101
.github/workflows/update-citations.yml
vendored
Normal file
101
.github/workflows/update-citations.yml
vendored
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
name: Update Google Scholar Citations
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: "0 0 * * 1" # Monday
|
||||||
|
- cron: "0 0 * * 3" # Wednesday
|
||||||
|
- cron: "0 0 * * 5" # Friday
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
update-citations:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
# See CUSTOMIZE.md for details on how to set up PAT for triggering subsequent workflows
|
||||||
|
# with:
|
||||||
|
# token: ${{ secrets.PAT }}
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.13"
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
echo "🔧 Installing dependencies..."
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
- name: Save current citations.yml hash
|
||||||
|
id: before
|
||||||
|
run: |
|
||||||
|
echo "📦 Checking existing citations.yml hash..."
|
||||||
|
if [ -f _data/citations.yml ]; then
|
||||||
|
sha_before=$(sha256sum _data/citations.yml | awk '{print $1}')
|
||||||
|
echo "sha_before=$sha_before" >> $GITHUB_OUTPUT
|
||||||
|
echo "📝 SHA before: $sha_before"
|
||||||
|
else
|
||||||
|
echo "sha_before=none" >> $GITHUB_OUTPUT
|
||||||
|
echo "📝 No existing citations.yml file found."
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run citation update script
|
||||||
|
id: run_citation_update
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set +e
|
||||||
|
echo "🚀 Running citation update script (single attempt)..."
|
||||||
|
start_time=$(date)
|
||||||
|
timeout 90 python bin/update_scholar_citations.py
|
||||||
|
status=$?
|
||||||
|
end_time=$(date)
|
||||||
|
if [ $status -eq 0 ]; then
|
||||||
|
echo "✅ Citation update succeeded (started at $start_time, ended at $end_time)."
|
||||||
|
echo "✅ Citation update succeeded." >> $GITHUB_STEP_SUMMARY
|
||||||
|
else
|
||||||
|
echo "❌ Citation update script failed with exit code $status (started at $start_time, ended at $end_time)."
|
||||||
|
echo "❌ Citation update script failed with exit code $status." >> $GITHUB_STEP_SUMMARY
|
||||||
|
fi
|
||||||
|
set -e
|
||||||
|
|
||||||
|
- name: Save new citations.yml hash
|
||||||
|
id: after
|
||||||
|
run: |
|
||||||
|
echo "🔍 Checking updated citations.yml hash..."
|
||||||
|
if [ -f _data/citations.yml ]; then
|
||||||
|
sha_after=$(sha256sum _data/citations.yml | awk '{print $1}')
|
||||||
|
echo "sha_after=$sha_after" >> $GITHUB_OUTPUT
|
||||||
|
echo "📝 SHA after: $sha_after"
|
||||||
|
else
|
||||||
|
echo "sha_after=none" >> $GITHUB_OUTPUT
|
||||||
|
echo "📝 citations.yml was not created or is missing."
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Report citations.yml change in summary
|
||||||
|
run: |
|
||||||
|
echo "📋 Comparing citation file hashes..."
|
||||||
|
if [ "${{ steps.before.outputs.sha_before }}" != "${{ steps.after.outputs.sha_after }}" ]; then
|
||||||
|
echo "✅ _data/citations.yml was updated."
|
||||||
|
echo "✅ _data/citations.yml was updated." >> $GITHUB_STEP_SUMMARY
|
||||||
|
else
|
||||||
|
echo "ℹ️ _data/citations.yml was not changed."
|
||||||
|
echo "ℹ️ _data/citations.yml was not changed." >> $GITHUB_STEP_SUMMARY
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Configure Git
|
||||||
|
run: |
|
||||||
|
git config --local user.email "actions@github.com"
|
||||||
|
git config --local user.name "GitHub Actions"
|
||||||
|
echo "🔧 Git configured."
|
||||||
|
|
||||||
|
- name: Commit and push if changed
|
||||||
|
run: |
|
||||||
|
git add _data/citations.yml
|
||||||
|
git diff --staged --quiet || (
|
||||||
|
echo "📤 Committing and pushing changes..."
|
||||||
|
git commit -m "Update Google Scholar citations"
|
||||||
|
git push
|
||||||
|
)
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@ -10,4 +10,5 @@ Gemfile.lock
|
|||||||
assets/libs/
|
assets/libs/
|
||||||
node_modules/
|
node_modules/
|
||||||
vendor
|
vendor
|
||||||
.idea
|
.idea
|
||||||
|
.venv
|
||||||
|
|||||||
@ -10,3 +10,5 @@ _posts/2015-10-20-math.md
|
|||||||
_sass/font-awesome/*.scss
|
_sass/font-awesome/*.scss
|
||||||
_sass/tabler-icons/*.scss
|
_sass/tabler-icons/*.scss
|
||||||
_scripts/*
|
_scripts/*
|
||||||
|
# Ignore citation YAML file generated by script
|
||||||
|
_data/citations.yml
|
||||||
|
|||||||
44
CUSTOMIZE.md
44
CUSTOMIZE.md
@ -345,3 +345,47 @@ In this folder you need to store your file in the same format as you would in `_
|
|||||||
- `2025-08-27-file2.md` will be posted exactly on 27-August-2025
|
- `2025-08-27-file2.md` will be posted exactly on 27-August-2025
|
||||||
- `File3.md` will not be posted at all
|
- `File3.md` will not be posted at all
|
||||||
- `2026-02-31-file4.md` is supposed to be posted on 31-February-2026, but there is no 31st in February hence this file will never be posted either
|
- `2026-02-31-file4.md` is supposed to be posted on 31-February-2026, but there is no 31st in February hence this file will never be posted either
|
||||||
|
|
||||||
|
## Setting up a Personal Access Token (PAT) for Google Scholar Citation Updates
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> After setting up al-folio you may want to run `python3 bin/update_citations.py` to fill the `_data/citations.yml` file with your Google Scholar citation counts.
|
||||||
|
|
||||||
|
This project includes an automated workflow to update the citation counts for your publications using Google Scholar.
|
||||||
|
The workflow commits changes to `_data/citations.yml` directly to the `main` branch.
|
||||||
|
By default, the `GITHUB_TOKEN` will be used to commit the changes.
|
||||||
|
However, this token does not have permission to trigger subsequent workflows, such as the site rebuild workflow.
|
||||||
|
In order to deploy the changes from `main`, you can manually trigger the `deploy` workflow.
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> To ensure that these commits can trigger further GitHub Actions workflows (such as site rebuilds), you can use a Personal Access Token (PAT) instead of the default GitHub Actions token.
|
||||||
|
> If you have set up a PAT, citation updates will trigger further workflows (such as site rebuilds) after committing changes. In order to run the action with a PAT, you need to uncomment the following lines from the workflow file (`update-citations.yml`):
|
||||||
|
>
|
||||||
|
> ```yaml
|
||||||
|
> with:
|
||||||
|
> token: ${{ secrets.PAT }}
|
||||||
|
> ```
|
||||||
|
|
||||||
|
### Why is a PAT required?
|
||||||
|
|
||||||
|
GitHub restricts the default `GITHUB_TOKEN` from triggering other workflows when a commit is made from within a workflow. Using a PAT overcomes this limitation and allows for full automation.
|
||||||
|
|
||||||
|
### How to set up the PAT
|
||||||
|
|
||||||
|
1. **Create a Personal Access Token**
|
||||||
|
|
||||||
|
- Go to [GitHub Settings > Developer settings > Personal access tokens](https://github.com/settings/tokens).
|
||||||
|
- Click "Generate new token" (classic or fine-grained).
|
||||||
|
- Grant at least the following permissions:
|
||||||
|
- `repo` (for classic tokens if repo is private), `public_repo` (for classic tokens if repo is public) or `contents: read/write` (for fine-grained tokens)
|
||||||
|
- Save the token somewhere safe.
|
||||||
|
|
||||||
|
2. **Add the PAT as a repository secret**
|
||||||
|
|
||||||
|
- Go to your repository on GitHub.
|
||||||
|
- Navigate to `Settings` > `Secrets and variables` > `Actions` > `New repository secret`.
|
||||||
|
- Name the secret `PAT` (must match the name used in the workflow).
|
||||||
|
- Paste your PAT and save.
|
||||||
|
|
||||||
|
3. **Workflow usage**
|
||||||
|
The workflow `.github/workflows/update-citations.yml` uses this PAT to commit updates to `_data/citations.yml`.
|
||||||
|
|||||||
4179
_data/citations.yml
Normal file
4179
_data/citations.yml
Normal file
File diff suppressed because it is too large
Load Diff
@ -317,9 +317,27 @@
|
|||||||
aria-label="Google Scholar link"
|
aria-label="Google Scholar link"
|
||||||
role="button"
|
role="button"
|
||||||
>
|
>
|
||||||
|
{% assign citation_count = 0 %}
|
||||||
|
{% assign scholar_id_key = site.scholar_userid | append: ':' | append: entry.google_scholar_id %}
|
||||||
|
{% assign publication_key = entry.google_scholar_id %}
|
||||||
|
|
||||||
|
{% if site.data.citations.papers[scholar_id_key] %}
|
||||||
|
{% assign citation_count = site.data.citations.papers[scholar_id_key].citations %}
|
||||||
|
{% elsif site.data.citations.papers[publication_key] %}
|
||||||
|
{% assign citation_count = site.data.citations.papers[publication_key].citations %}
|
||||||
|
{% else %}
|
||||||
|
{% for paper in site.data.citations.papers %}
|
||||||
|
{% assign paper_key = paper[0] %}
|
||||||
|
{% if paper_key contains entry.google_scholar_id %}
|
||||||
|
{% assign citation_count = paper[1].citations %}
|
||||||
|
{% break %}
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<img
|
<img
|
||||||
src="https://img.shields.io/badge/scholar-{% google_scholar_citations site.data.socials.scholar_userid entry.google_scholar_id %}-4285F4?logo=googlescholar&labelColor=beige"
|
src="https://img.shields.io/badge/scholar-{{ citation_count }}-4285F4?logo=googlescholar&labelColor=beige"
|
||||||
alt="{% google_scholar_citations site.data.socials.scholar_userid entry.google_scholar_id %} Google Scholar citations"
|
alt="{{ citation_count }} Google Scholar citations"
|
||||||
>
|
>
|
||||||
</a>
|
</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|||||||
132
bin/update_scholar_citations.py
Normal file
132
bin/update_scholar_citations.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import yaml
|
||||||
|
from datetime import datetime
|
||||||
|
from scholarly import scholarly
|
||||||
|
|
||||||
|
|
||||||
|
def load_scholar_user_id() -> str:
|
||||||
|
"""Load the Google Scholar user ID from the configuration file."""
|
||||||
|
config_file = "_data/socials.yml"
|
||||||
|
if not os.path.exists(config_file):
|
||||||
|
print(
|
||||||
|
f"Configuration file {config_file} not found. Please ensure the file exists and contains your Google Scholar user ID."
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
try:
|
||||||
|
with open(config_file, "r") as f:
|
||||||
|
config = yaml.safe_load(f)
|
||||||
|
scholar_user_id = config.get("scholar_userid")
|
||||||
|
if not scholar_user_id:
|
||||||
|
print(
|
||||||
|
"No 'scholar_userid' found in the configuration file. Please add 'scholar_userid' to _data/socials.yml."
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
return scholar_user_id
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
print(
|
||||||
|
f"Error parsing YAML file {config_file}: {e}. Please check the file for correct YAML syntax."
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
SCHOLAR_USER_ID: str = load_scholar_user_id()
|
||||||
|
OUTPUT_FILE: str = "_data/citations.yml"
|
||||||
|
|
||||||
|
|
||||||
|
def get_scholar_citations() -> None:
|
||||||
|
"""Fetch and update Google Scholar citation data."""
|
||||||
|
print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}")
|
||||||
|
today = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
# Check if the output file was already updated today
|
||||||
|
if os.path.exists(OUTPUT_FILE):
|
||||||
|
try:
|
||||||
|
with open(OUTPUT_FILE, "r") as f:
|
||||||
|
existing_data = yaml.safe_load(f)
|
||||||
|
if (
|
||||||
|
existing_data
|
||||||
|
and "metadata" in existing_data
|
||||||
|
and "last_updated" in existing_data["metadata"]
|
||||||
|
):
|
||||||
|
print(f"Last updated on: {existing_data['metadata']['last_updated']}")
|
||||||
|
if existing_data["metadata"]["last_updated"] == today:
|
||||||
|
print("Citations data is already up-to-date. Skipping fetch.")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
f"Warning: Could not read existing citation data from {OUTPUT_FILE}: {e}. The file may be missing or corrupted."
|
||||||
|
)
|
||||||
|
|
||||||
|
citation_data = {"metadata": {"last_updated": today}, "papers": {}}
|
||||||
|
|
||||||
|
scholarly.set_timeout(15)
|
||||||
|
scholarly.set_retries(3)
|
||||||
|
try:
|
||||||
|
author = scholarly.search_author_id(SCHOLAR_USER_ID)
|
||||||
|
author_data = scholarly.fill(author)
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
f"Error fetching author data from Google Scholar for user ID '{SCHOLAR_USER_ID}': {e}. Please check your internet connection and Scholar user ID."
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not author_data:
|
||||||
|
print(
|
||||||
|
f"Could not fetch author data for user ID '{SCHOLAR_USER_ID}'. Please verify the Scholar user ID and try again."
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if "publications" not in author_data:
|
||||||
|
print(f"No publications found in author data for user ID '{SCHOLAR_USER_ID}'.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
for pub in author_data["publications"]:
|
||||||
|
try:
|
||||||
|
pub_id = pub.get("pub_id") or pub.get("author_pub_id")
|
||||||
|
if not pub_id:
|
||||||
|
print(
|
||||||
|
f"Warning: No ID found for publication: {pub.get('bib', {}).get('title', 'Unknown')}. This publication will be skipped."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = pub.get("bib", {}).get("title", "Unknown Title")
|
||||||
|
year = pub.get("bib", {}).get("pub_year", "Unknown Year")
|
||||||
|
citations = pub.get("num_citations", 0)
|
||||||
|
|
||||||
|
print(f"Found: {title} ({year}) - Citations: {citations}")
|
||||||
|
|
||||||
|
citation_data["papers"][pub_id] = {
|
||||||
|
"title": title,
|
||||||
|
"year": year,
|
||||||
|
"citations": citations,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
f"Error processing publication '{pub.get('bib', {}).get('title', 'Unknown')}': {e}. This publication will be skipped."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Compare new data with existing data
|
||||||
|
if existing_data and existing_data.get("papers") == citation_data["papers"]:
|
||||||
|
print("No changes in citation data. Skipping file update.")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(OUTPUT_FILE, "w") as f:
|
||||||
|
yaml.dump(citation_data, f, width=1000, sort_keys=True)
|
||||||
|
print(f"Citation data saved to {OUTPUT_FILE}")
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
f"Error writing citation data to {OUTPUT_FILE}: {e}. Please check file permissions and disk space."
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
get_scholar_citations()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Unexpected error: {e}")
|
||||||
|
sys.exit(1)
|
||||||
@ -1 +1,3 @@
|
|||||||
nbconvert
|
nbconvert
|
||||||
|
pyyaml
|
||||||
|
scholarly
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user