Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions pyscf/tools/trexio.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pyscf import lib
from pyscf import gto
from pyscf import scf
from pyscf import fci
import trexio

def to_trexio(obj, filename, backend='h5'):
Expand Down Expand Up @@ -223,3 +224,67 @@ def _group_by(a, keys):
assert all(keys[:-1] <= keys[1:])
idx = np.unique(keys, return_index=True)[1]
return np.split(a, idx[1:])

def get_occsa_and_occsb(mcscf, norb, nelec):
ci_coeff = mcscf.ci
num_determinants = int(np.sum(np.abs(ci_coeff) > 1e-8))
occslst = fci.cistring.gen_occslst(range(norb), nelec // 2)
selected_occslst = occslst[:num_determinants]

occsa = []
occsb = []
ci_values = []

for i in range(min(len(selected_occslst), mcscf.ci.shape[0])):
for j in range(min(len(selected_occslst), mcscf.ci.shape[1])):
ci_coeff = mcscf.ci[i, j]
if np.abs(ci_coeff) > 1e-2: # Check if CI coefficient is significant
occsa.append(selected_occslst[i])
occsb.append(selected_occslst[j])
ci_values.append(ci_coeff)

# Sort by the absolute value of the CI coefficients in descending order
sorted_indices = np.argsort(-np.abs(ci_values))
occsa_sorted = [occsa[idx] for idx in sorted_indices]
occsb_sorted = [occsb[idx] for idx in sorted_indices]
ci_values_sorted = [ci_values[idx] for idx in sorted_indices]

return occsa_sorted, occsb_sorted, ci_values_sorted, num_determinants

def det_to_trexio(mcscf, norb, nelec, filename, backend='h5'):
from trexio_tools.group_tools import determinant as trexio_det

mo_num = mcscf.mo_energy.size
int64_num = int((mo_num-1)/64) + 1
occsa, occsb, ci_values, num_determinants = get_occsa_and_occsb(mcscf, norb, nelec)

det_list = []
for a, b, coeff in zip(occsa, occsb, ci_values):
det_tmp = []
det_tmp += trexio_det.to_determinant_list(occsa_upshifted, int64_num)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@NastaMauger you forgot to rename occsa_upshifted to occsa here (and same for occsb).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad ! It is done now

det_tmp += trexio_det.to_determinant_list(occsb_upshifted, int64_num)
det_list.append(det_tmp)

offset_file = 0

with trexio.File(filename, 'u', back_end=_mode(backend)) as tf:
if(trexio.has_determinant(tf)):
trexio.delete_determinant(tf)
trexio.write_mo_num(tf, mo_num)
trexio.write_electron_up_num(tf, len(a))
trexio.write_electron_dn_num(tf, len(b))
trexio.write_electron_num(tf, len(a)+len(b))
trexio.write_determinant_list(tf, offset_file, num_determinants, det_list)
trexio.write_determinant_coefficient(tf, offset_file, num_determinants, ci_values)


def read_det_trexio(filename, backend='h5'):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above (backend option can be removed)

with trexio.File(filename, 'r', back_end=_mode(backend)) as tf:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When reading, you can safely use back_end=trexio.TREXIO_AUTO. TREXIO_AUTO allows the library to automatically detect the backend for a given TREXIO file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I set back_end=_mode(backend) to maintain consistency with how it was previously written in the code. For example, the same back_end is called on line 95.

I can certainly change it to back_end=trexio.TREXIO_AUTO as you advised, but I thought it might be better to stay consistent.

We could either keep it as it is or update all calls to use back_end=trexio.TREXIO_AUTO. Let me know which option would be best.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I would advise to use that but only for reading TREXIO files. The reason is that when one wants to write/create TREXIO file - the I/O back end has to be provided explicitly, but for reading - our library is smart enough and can detect the I/O back end that was used to produce the TREXIO file. So using TREXIO_AUTO allows you to stay compatible with whatever new back end we will decide to implement in the future.

If @sunqm is OK with that, I would recommend to modify also the line 95 accordingly.

Thanks!

Copy link
Contributor Author

@NastaMauger NastaMauger Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made this modification in all functions that use trexio.read. It’s done in a separate commit so that if @sunqm disagree, I can easily roll it back.

Copy link

@q-posev q-posev Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@NastaMauger Sorry, i should have precised that: you need to use back_end=trexio.TREXIO_AUTO in the calls to trexio.File with 'r' mode. And then you can remove the backend argument in the functions with *_from_trexio and read_eri. I am not sure your commit works as expected because _mode function doesn't handle the TREXIO_AUTO case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@q-posev My apologies—I am a bit confused about the backend. I have modified the code based on your last comment. Could you please check it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I would advise to use that but only for reading TREXIO files. The reason is that when one wants to write/create TREXIO file - the I/O back end has to be provided explicitly, but for reading - our library is smart enough and can detect the I/O back end that was used to produce the TREXIO file. So using TREXIO_AUTO allows you to stay compatible with whatever new back end we will decide to implement in the future.

If @sunqm is OK with that, I would recommend to modify also the line 95 accordingly.

Thanks!

sure. Please update the module as you like. I implemented the version based on the paper or the documents. Many places are likely sub-optimal.

offset_file = 0

num_det = trexio.read_determinant_num(tf)
coeff = trexio.read_determinant_coefficient(tf, offset_file, num_det)
det = trexio.read_determinant_list(tf, offset_file, num_det)

return num_det, coeff[0], det[0]