Skip to content

Added options to extract citations for clusters #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion scholar.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,10 @@ def set_num_page_results(self, num_page_results):
msg = 'maximum number of results on page must be numeric'
self.num_results = ScholarUtils.ensure_int(num_page_results, msg)

def set_start(self, start):
msg = 'start offset must be numeric'
self.start = start

def get_url(self):
"""
Returns a complete, submittable URL string for this particular
Expand Down Expand Up @@ -560,6 +564,39 @@ def get_url(self):

return self.SCHOLAR_CLUSTER_URL % urlargs

class CitesClusterScholarQuery(ScholarQuery):
"""
This version just pulls up citations of an article cluster whose ID we already
know about.
"""
SCHOLAR_CLUSTER_URL = ScholarConf.SCHOLAR_SITE + '/scholar?' \
+ 'oi=bibs&cites=%(cluster)s' \
+ '&start=%(start)s'

def __init__(self, cluster=None):
ScholarQuery.__init__(self)
self.cluster = None
self.set_cluster(cluster)

def set_cluster(self, cluster):
"""
Sets search to a Google Scholar results cluster ID.
"""
msg = 'cluster ID must be numeric'
self.cluster = ScholarUtils.ensure_int(cluster, msg)

def get_url(self):
if self.cluster is None:
raise QueryArgumentError('cluster query needs cluster ID')

urlargs = {'cluster': self.cluster,
'num': self.num_results or ScholarConf.MAX_PAGE_RESULTS,
'start': self.start or 0}

for key, val in urlargs.items():
urlargs[key] = quote(str(val))

return self.SCHOLAR_CLUSTER_URL % urlargs

class SearchScholarQuery(ScholarQuery):
"""
Expand Down Expand Up @@ -955,6 +992,9 @@ def main():
help='Do not search, just use articles in given cluster ID')
group.add_option('-c', '--count', type='int', default=None,
help='Maximum number of results')
group.add_option('-S', '--start', type='int', default=0,
help='For more than 20 citations, starting offset')
group.add_option('--cites', default=None, help='Output citations instead of cluster', action="store_true")
parser.add_option_group(group)

group = optparse.OptionGroup(parser, 'Output format',
Expand Down Expand Up @@ -1024,7 +1064,10 @@ def main():
querier.apply_settings(settings)

if options.cluster_id:
query = ClusterScholarQuery(cluster=options.cluster_id)
if options.cites:
query = CitesClusterScholarQuery(cluster=options.cluster_id)
else:
query = ClusterScholarQuery(cluster=options.cluster_id)
else:
query = SearchScholarQuery()
if options.author:
Expand All @@ -1048,6 +1091,9 @@ def main():
options.count = min(options.count, ScholarConf.MAX_PAGE_RESULTS)
query.set_num_page_results(options.count)

if options.start is not None:
query.set_start(options.start)

querier.send_query(query)

if options.csv:
Expand Down