From 26d8439038e14cc88df7823fb067bcb16653f688 Mon Sep 17 00:00:00 2001
From: Arnaud Levy <contact@arnaudlevy.com>
Date: Wed, 31 Jan 2024 10:50:16 +0100
Subject: [PATCH] hal importer

---
 app/models/research/hal/author.rb  |  2 +-
 app/models/research/publication.rb | 57 +---------------------------
 app/services/importers/hal.rb      | 60 ++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 57 deletions(-)
 create mode 100644 app/services/importers/hal.rb

diff --git a/app/models/research/hal/author.rb b/app/models/research/hal/author.rb
index 8843d31ef..1d45b4873 100644
--- a/app/models/research/hal/author.rb
+++ b/app/models/research/hal/author.rb
@@ -68,7 +68,7 @@ class Research::Hal::Author < ApplicationRecord
     publications.clear
     # Do not overuse the API if no researcher is concerned
     return if researchers.none?
-    Research::Hal::Publication.import_from_hal_for_author(self).each do |publication|
+    Importers::Hal.import_publications_for_author(self).each do |publication|
       publications << publication
     end
     publications
diff --git a/app/models/research/publication.rb b/app/models/research/publication.rb
index 615a27f8e..03599f916 100644
--- a/app/models/research/publication.rb
+++ b/app/models/research/publication.rb
@@ -59,61 +59,6 @@ class Research::Publication < ApplicationRecord
     source == :osuny
   end
 
-  # https://api.archives-ouvertes.fr/search/?q=03713859&fl=*
-  def self.import_from_hal_for_author(author)
-    fields = [
-      'docid',
-      'title_s',
-      'citationRef_s',
-      'citationFull_s',
-      'uri_s',
-      'doiId_s',
-      'publicationDate_tdate',
-      'linkExtUrl_s',
-      'abstract_s',
-      'openAccess_bool',
-      'journalTitle_s',
-      'authFullName_s',
-      'authLastName_s',
-      'authFirstName_s',
-      'files_s'
-      # '*',
-    ]
-    publications = []
-    response = HalOpenscience::Document.search "authIdFormPerson_s:#{author.docid}", fields: fields, limit: 1000
-    response.results.each do |doc|
-      publication = create_from doc
-      publications << publication
-    end
-    publications
-  end
-
-  def self.create_from(doc)
-    publication = where(hal_docid: doc.docid).first_or_create
-    puts "HAL sync publication #{doc.docid}"
-    publication.title = Osuny::Sanitizer.sanitize doc.title_s.first, 'string'
-    publication.ref = doc.attributes['citationRef_s']
-    publication.citation_full = doc.attributes['citationFull_s']
-    publication.abstract = doc.attributes['abstract_s']&.first
-    publication.hal_url = doc.attributes['uri_s']
-    publication.doi = doc.attributes['doiId_s']
-    publication.publication_date = doc.attributes['publicationDate_tdate']
-    publication.url = doc.attributes['linkExtUrl_s']
-    publication.open_access = doc.attributes['openAccess_bool']
-    publication.journal_title = doc.attributes['journalTitle_s']
-    publication.file = doc.attributes['files_s']&.first
-    publication.authors_list = doc.attributes['authFullName_s'].join(', ')
-    publication.authors_citeproc = []
-    doc.attributes['authLastName_s'].each_with_index do |last_name, index|
-      publication.authors_citeproc << {
-        "family" => last_name, 
-        "given" => doc.attributes['authFirstName_s'][index]
-      }
-    end
-    publication.save
-    publication
-  end
-
   def template_static
     "admin/research/publications/static"
   end
@@ -140,7 +85,7 @@ class Research::Publication < ApplicationRecord
     {
       "title" => title,
       "author" => authors_citeproc,
-      "URL" => hal_url,
+      "URL" => best_url,
       "container-title" => journal_title,
       "pdf" => file,
       "month-numeric" => publication_date.present? ? publication_date.month.to_s : nil,
diff --git a/app/services/importers/hal.rb b/app/services/importers/hal.rb
new file mode 100644
index 000000000..093abdc38
--- /dev/null
+++ b/app/services/importers/hal.rb
@@ -0,0 +1,60 @@
+module Importers
+  class HAL
+
+    # https://api.archives-ouvertes.fr/search/?q=03713859&fl=*
+    def self.import_publications_for_author(author)
+      fields = [
+        'docid',
+        'title_s',
+        'citationRef_s',
+        'citationFull_s',
+        'uri_s',
+        'doiId_s',
+        'publicationDate_tdate',
+        'linkExtUrl_s',
+        'abstract_s',
+        'openAccess_bool',
+        'journalTitle_s',
+        'authFullName_s',
+        'authLastName_s',
+        'authFirstName_s',
+        'files_s'
+        # '*',
+      ]
+      publications = []
+      response = HalOpenscience::Document.search "authIdFormPerson_s:#{author.docid}", fields: fields, limit: 1000
+      response.results.each do |doc|
+        publication = create_publication_from doc
+        publications << publication
+      end
+      publications
+    end
+
+    def self.create_publication_from(doc)
+      publication = Research::Publication.where(hal_docid: doc.docid).first_or_create
+      puts "HAL sync publication #{doc.docid}"
+      publication.title = Osuny::Sanitizer.sanitize doc.title_s.first, 'string'
+      publication.ref = doc.attributes['citationRef_s']
+      publication.citation_full = doc.attributes['citationFull_s']
+      publication.abstract = doc.attributes['abstract_s']&.first
+      publication.hal_url = doc.attributes['uri_s']
+      publication.doi = doc.attributes['doiId_s']
+      publication.publication_date = doc.attributes['publicationDate_tdate']
+      publication.url = doc.attributes['linkExtUrl_s']
+      publication.open_access = doc.attributes['openAccess_bool']
+      publication.journal_title = doc.attributes['journalTitle_s']
+      publication.file = doc.attributes['files_s']&.first
+      publication.authors_list = doc.attributes['authFullName_s'].join(', ')
+      publication.authors_citeproc = []
+      doc.attributes['authLastName_s'].each_with_index do |last_name, index|
+        publication.authors_citeproc << {
+          "family" => last_name, 
+          "given" => doc.attributes['authFirstName_s'][index]
+        }
+      end
+      publication.save
+      publication
+    end
+
+  end
+end
\ No newline at end of file
-- 
GitLab