/* $Id: Org_ref.cpp 548810 2017-10-18 13:38:41Z ivanov $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Author:  NCBI Staff
 *
 * File Description:
 *   Convenience methods for COrg_ref object
 *
 * Remark:
 *   This code was originally generated by application DATATOOL
 *   using specifications from the ASN data definition file
 *   'seqfeat.asn'.
 */

// standard includes

// generated includes
#include <ncbi_pch.hpp>
#include <objects/seqfeat/Org_ref.hpp>
#include <objects/general/Object_id.hpp>
#include <objects/general/Dbtag.hpp>

// generated classes

BEGIN_NCBI_SCOPE

BEGIN_objects_SCOPE // namespace ncbi::objects::

// destructor
COrg_ref::~COrg_ref(void)
{
}

// Appends a label to "label" based on content
void COrg_ref::GetLabel(string* label) const
{
    if (IsSetTaxname()) {
        *label += GetTaxname();
    } else if (IsSetCommon()) {
        *label += GetCommon();
    } else if (IsSetDb()) {
        GetDb().front()->GetLabel(label);
    }
}
    
static const char* const s_taxonName = "taxon" ;
static const string s_nomenclature = "nomenclature=";

int
COrg_ref::GetTaxId() const
{
    if( ! IsSetDb() ) {
        return 0;
    }
    const TDb& lDbTags = GetDb();
 
    for(TDb::const_iterator i = lDbTags.begin();
	i != lDbTags.end();
	++i) {
	if( i->GetPointer()
	    && i->GetObject().GetDb().compare(s_taxonName) == 0 ) {
	    const CObject_id& id = i->GetObject().GetTag();
	    if( id.IsId() )
		return id.GetId();
	}
    }
    return 0;
}

int
COrg_ref::SetTaxId( int tax_id )
{
    int old_id(0);

    TDb& lDbTags = SetDb();
    // Try to update existing tax id first
    for(TDb::iterator i = lDbTags.begin();
	i != lDbTags.end();
	++i) {
	if( *i && i->GetObject().GetDb() == s_taxonName ) {
	    CObject_id& id = i->GetObject().SetTag();
	    if( id.IsId() )
		old_id = id.GetId();
	    id.SetId() = tax_id;
	    return old_id;
	}
    }
    // Add new tag
    CRef< CDbtag > ref( new CDbtag() );
    ref->SetDb( s_taxonName );
    ref->SetTag().SetId( tax_id );
    SetDb().push_back( ref );

    return old_id;
}

bool COrg_ref::IsSetLineage(void) const
{
    return IsSetOrgname () && GetOrgname ().IsSetLineage ();
}

const string& COrg_ref::GetLineage(void) const
{
    return GetOrgname ().GetLineage ();
}

bool COrg_ref::IsSetGcode(void) const
{
    return IsSetOrgname () && GetOrgname ().IsSetGcode ();
}

int COrg_ref::GetGcode(void) const
{
    return GetOrgname ().GetGcode ();
}

bool COrg_ref::IsSetMgcode(void) const
{
    return IsSetOrgname () && GetOrgname ().IsSetMgcode ();
}

int COrg_ref::GetMgcode(void) const
{
    return GetOrgname ().GetMgcode ();
}

bool COrg_ref::IsSetPgcode(void) const
{
    return IsSetOrgname () && GetOrgname ().IsSetPgcode ();
}

int COrg_ref::GetPgcode(void) const
{
    return GetOrgname ().GetPgcode ();
}

bool COrg_ref::IsSetDivision(void) const
{
    return IsSetOrgname () && GetOrgname ().IsSetDiv ();
}

const string& COrg_ref::GetDivision(void) const
{
    return GetOrgname ().GetDiv ();
}

bool COrg_ref::IsSetOrgMod(void) const
{
    return IsSetOrgname () && GetOrgname ().IsSetMod ();
}


string COrg_ref::x_GetTaxnameAfterFirstTwoWords() const
{
    string taxname = "";
    if (IsSetTaxname()) {
        taxname = GetTaxname();
    }
    // Look for modifiers in taxname after first two words
    size_t pos = NStr::Find (taxname, " ");
    if (pos == string::npos) {
        taxname = "";
    } else {
        taxname = taxname.substr(pos + 1);
        NStr::TruncateSpacesInPlace(taxname);
        pos = NStr::Find (taxname, " ");
        if (pos == string::npos) {
            taxname = "";
        } else {
            taxname = taxname.substr(pos + 1);
            NStr::TruncateSpacesInPlace(taxname);
        }
    }
    return taxname;
}


bool s_FindWholeWord (string taxname, string value)
{
    if (NStr::IsBlank(taxname) || NStr::IsBlank(value)) {
        return false;
    }
    size_t pos = NStr::Find (taxname, value);
    size_t value_len = value.length();
    while (pos != string::npos 
           && ( ( (pos != 0 && isalpha (taxname.c_str()[pos - 1]))
                || isalpha (taxname.c_str()[pos + value_len])))) {
        pos = NStr::Find(taxname, value, pos + value_len);
    }
    if (pos == string::npos) {
        return false;
    } else {
        return true;
    }
}


bool COrg_ref::IsVarietyValid(const string& variety) const
{
    if (NStr::IsBlank(variety)) {
        return true;
    }
    string taxname = x_GetTaxnameAfterFirstTwoWords();
    return s_FindWholeWord(taxname, variety);
}


bool COrg_ref::HasValidVariety() const
{
    if (!IsSetOrgname() || !GetOrgname().IsSetMod()) {
        return false;
    }
    ITERATE(COrgName::TMod, it, GetOrgname().GetMod()) {
        if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == COrgMod::eSubtype_variety
            && (*it)->IsSetSubname() && !NStr::IsBlank((*it)->GetSubname())
            && IsVarietyValid((*it)->GetSubname())) {
            return true;
        }
    }
    return false;
}


bool COrg_ref::IsSubspeciesValid(const string& subspecies) const
{
    if (NStr::IsBlank(subspecies)) {
        return true;
    }
    string taxname = x_GetTaxnameAfterFirstTwoWords();
    if (s_FindWholeWord(taxname, subspecies)) {
        return true;
    } else {
        return HasValidVariety();
    }

}


#define MAKE_COMMON(o1,o2,o3,Field) if (o1.IsSet##Field() && o2.IsSet##Field() && NStr::Equal(o1.Get##Field(), o2.Get##Field())) o3.Set##Field(o1.Get##Field());

void s_MakeCommonStringList(const list< string >& list1, const list< string >& list2, list< string >& list3)
{
    ITERATE(list< string >, it1, list1) {
        bool found = false;
        ITERATE(list< string >, it2, list2) {
            if (NStr::Equal(*it1, *it2)) {
                found = true;
                break;
            }
        }
        if (found) {
            list3.push_back(*it1);
        }
    }
}


CRef<COrg_ref> COrg_ref::MakeCommon(const COrg_ref& other) const
{
    int taxid1 = GetTaxId();
    int taxid2 = other.GetTaxId();
    if (taxid1 != taxid2) {
        return CRef<COrg_ref>(NULL);
    }

    CRef<COrg_ref> common(new COrg_ref());
    if (Equals(other)) {
        common->Assign(*this);
    } else {
        MAKE_COMMON((*this), other, (*common), Taxname);
        MAKE_COMMON((*this), other, (*common), Common);

        // common mods
        if (IsSetMod() && other.IsSetMod()) {
            s_MakeCommonStringList(GetMod(), other.GetMod(), common->SetMod());
            if (common->GetMod().empty()) {
                common->ResetMod();
            }
        }

        // common synonyms
        if (IsSetSyn() && other.IsSetSyn()) {
            s_MakeCommonStringList(GetSyn(), other.GetSyn(), common->SetSyn());
            if (common->GetSyn().empty()) {
                common->ResetSyn();
            }
        }

        // common dbtags
        if (IsSetDb() && other.IsSetDb()) {
            ITERATE(TDb, it1, GetDb()) {
                bool found = false;
                ITERATE(TDb, it2, GetDb()) {
                    if ((*it1)->Equals(**it2)) {
                        found = true;
                        break;
                    }
                }
                if (found) {
                    CRef<CDbtag> add(new CDbtag());
                    add->Assign(**it1);
                    common->SetDb().push_back(add);
                }
            }
        }

        // common orgname
        if (IsSetOrgname() && other.IsSetOrgname()) {
            CRef<COrgName> orgname = GetOrgname().MakeCommon(other.GetOrgname());
            if (orgname) {
                common->SetOrgname().Assign(*orgname);
            }
        }
    }

    return common;
}


typedef map<string, CRef<COrg_ref>, PNocase> TOrgrefMap;
static TOrgrefMap s_OrgRefMap;
static vector<string> s_CommonTaxnameList;
static bool                s_OrgRefMapInitialized = false;
DEFINE_STATIC_FAST_MUTEX(s_OrgRefMapMutex);

// automatically generated include file
#include "common_tax.inc"

static void s_ProcessOrgRefMapLine(const CTempString& line)
{
    vector<string> tokens;
    NStr::Split(line, "\t", tokens);
    if (tokens.size() != 8) {
        //        ERR_POST_X(1, Warning << "Bad format in common_tax.txt entry " << line
        //                   << "; disregarding");
    } else {
        NON_CONST_ITERATE(vector<string>, t, tokens) {
            NStr::TruncateSpacesInPlace(*t);
            if (NStr::Equal(*t, "-")) {
                *t = kEmptyStr;
            }
        }

        s_CommonTaxnameList.push_back(tokens[0]);
        CRef<COrg_ref> org(new COrg_ref());
        org->SetTaxname(tokens[0]);
        if (!NStr::IsBlank(tokens[1])) {
            org->SetCommon(tokens[1]);
        }
        
        if (!NStr::IsBlank(tokens[2])) {
            try {
                org->SetOrgname().SetGcode(NStr::StringToNonNegativeInt(tokens[2]));
            } catch (CException& ex) {
            }
        }
        if (!NStr::IsBlank(tokens[3])) {
            try {
                org->SetOrgname().SetMgcode(NStr::StringToNonNegativeInt(tokens[3]));
            } catch (CException& ex) {
            }
        }
        if (!NStr::IsBlank(tokens[4])) {
            try {
                org->SetOrgname().SetPgcode(NStr::StringToNonNegativeInt(tokens[4]));
            } catch (CException& ex) {
            }
        }

        if (!NStr::IsBlank(tokens[5])) {
            try {
                CRef<CDbtag>taxon(new CDbtag());
                taxon->SetDb("taxon");
                taxon->SetTag().SetId(NStr::StringToNonNegativeInt(tokens[5]));
                org->SetDb().push_back(taxon);
            } catch (CException& ex) {
            }
        }

        if (!NStr::IsBlank(tokens[6])) {
            org->SetOrgname().SetDiv(tokens[6]);
        }
        
        if (!NStr::IsBlank(tokens[7])) {
            org->SetOrgname().SetLineage(tokens[7]);
        }

        s_OrgRefMap[tokens[0]] = org;
    }
}


static void s_InitializeOrgRefMap(void)
{
    CFastMutexGuard GUARD(s_OrgRefMapMutex);
    if (s_OrgRefMapInitialized) {
        return;
    }
    string file = g_FindDataFile("common_tax.txt");
    CRef<ILineReader> lr;
    if (!file.empty()) {
        LOG_POST("Reading from " + file + " for popular organisms.");
        try {
            lr = ILineReader::New(file);
        } NCBI_CATCH("s_InitializeOrgRefMap")
    } else {
        LOG_POST("Falling back on built-in data for popular organisms.");
    }

    if (lr.Empty()) {
        size_t num_orgrefs = sizeof(kOrgRefList) / sizeof(char *);
        for (size_t i = 0; i < num_orgrefs; i++) {
            const char *p = kOrgRefList[i];
            s_ProcessOrgRefMapLine(p);
        }
    } else {
        do {
            s_ProcessOrgRefMapLine(*++*lr);
        } while (!lr->AtEOF());
    }

    s_OrgRefMapInitialized = true;
}


CConstRef<COrg_ref> COrg_ref::TableLookup(const string& taxname)
{
    s_InitializeOrgRefMap();
    TOrgrefMap::iterator it = s_OrgRefMap.find(taxname);
    if (it != s_OrgRefMap.end()) {
        return CConstRef<COrg_ref>(it->second.GetPointer());
    }
    return CConstRef<COrg_ref>(NULL);
}


bool COrg_ref::UpdateFromTable()
{
    if (!IsSetTaxname() || NStr::IsBlank(GetTaxname())) {
        return false;
    }
    CConstRef<COrg_ref> lookup = TableLookup(GetTaxname());
    if (lookup) {
        if (lookup->IsSetCommon() && !NStr::IsBlank(lookup->GetCommon())) {
            SetCommon(lookup->GetCommon());
        }
        if (lookup->IsSetGcode()) {
            SetOrgname().SetGcode(lookup->GetGcode());
        }
        if (lookup->IsSetMgcode()) {
            SetOrgname().SetMgcode(lookup->GetMgcode());
        }
        if (lookup->IsSetDivision()) {
            SetOrgname().SetDiv(lookup->GetDivision());
        }
        if (lookup->IsSetDb()) {
            CObject_id::TId taxid = 0;
            ITERATE(TDb, it, lookup->GetDb()) {
                if ((*it)->IsSetDb() &&
                    (*it)->IsSetTag() &&
                    (*it)->GetTag().IsId() &&
                    NStr::Equal((*it)->GetDb(), "taxon")) {
                    taxid = (*it)->GetTag().GetId();
                    break;
                }
            }
            if (taxid > 0) {
                SetTaxId(taxid);
            }
        }
        if (lookup->IsSetLineage()) {
            SetOrgname().SetLineage(lookup->GetOrgname().GetLineage());
        }
        return true;
    } else {
        return false;
    }
}


const vector<string>& COrg_ref::GetTaxnameList()
{
    s_InitializeOrgRefMap();
    return s_CommonTaxnameList;
}


void COrg_ref::CleanForGenBank()
{
    ResetSyn();
}

#define NO_FLAG(a,f) (( a & f ) == 0)

void COrg_ref::FilterOutParts( fOrgref_parts to_remain )
{
    if( to_remain == eOrgref_all ) {
        return;
    } else if( to_remain == eOrgref_nothing ) {
        Reset();
    } else {
        if( NO_FLAG( to_remain, eOrgref_taxname ) && IsSetTaxname() ) {
            ResetTaxname();
        }
        if( NO_FLAG( to_remain, eOrgref_common ) && IsSetCommon() ) {
            ResetCommon();
        }
        if( NO_FLAG( to_remain, eOrgref_mod ) && IsSetMod() ) {
            ResetMod();
        }
	if( IsSetDb() ) {
	    if( NO_FLAG( to_remain, eOrgref_db_all ) ) {
		ResetDb();
	    } else {
		if( NO_FLAG( to_remain, eOrgref_db_taxid ) ) {
		    TDb& lDbTags = SetDb();
		    for(TDb::iterator i = lDbTags.begin(); i != lDbTags.end(); ) {
			if( *i && i->GetObject().GetDb() == s_taxonName ) {
			    i = lDbTags.erase( i );
			} else {
			    ++i;
			}
		    }
		}
	    }
	}
        if( NO_FLAG( to_remain, eOrgref_syn ) && IsSetSyn() ) {
            ResetSyn();
        }
	if( IsSetOrgname() ) {
	    if( NO_FLAG( to_remain, eOrgref_on_all ) ) {
		ResetOrgname();
	    } else {
		COrgName& on = SetOrgname();
		if( NO_FLAG( to_remain, eOrgref_on_name ) && on.IsSetName() ) {
		    on.ResetName();
		}
		if( on.IsSetMod() ) {
		    if( NO_FLAG( to_remain, eOrgref_on_mod ) && on.IsSetMod() ) {
			on.ResetMod();
		    } else { // Filter out the rest mods
			if( NO_FLAG( to_remain, eOrgref_on_mod_nom ) ) {
			    on.ResetNomenclature();
			}
			if( NO_FLAG( to_remain, eOrgref_on_mod_oldname ) ) {
			    on.RemoveModBySubtype( COrgMod::eSubtype_old_name );
			}
			if( NO_FLAG( to_remain, eOrgref_on_mod_tm ) ) {
			    on.RemoveModBySubtype( COrgMod::eSubtype_type_material );
			}
		    }
		}
		if( on.IsSetAttrib() ) {
		    if( NO_FLAG( to_remain, eOrgref_on_attr_all ) ) {
			on.ResetAttrib();
		    } else {
			if( NO_FLAG( to_remain, eOrgref_on_attr_nofwd ) && on.IsModifierForwardingDisabled() ) {
			    on.EnableModifierForwarding();
			}
		    }
		}
		if( NO_FLAG( to_remain, eOrgref_on_lin ) && on.IsSetLineage() ) {
		    on.ResetLineage();
		}
		if( NO_FLAG( to_remain, eOrgref_on_gc ) && on.IsSetGcode() ) {
		    on.ResetGcode();
		}
		if( NO_FLAG( to_remain, eOrgref_on_mgc ) && on.IsSetMgcode() ) {
		    on.ResetMgcode();
		}
		if( NO_FLAG( to_remain, eOrgref_on_pgc ) && on.IsSetPgcode() ) {
		    on.ResetPgcode();
		}
		if( NO_FLAG( to_remain, eOrgref_on_div ) && on.IsSetDiv() ) {
		    on.ResetDiv();
		}

	    }
	}
    }
}


END_objects_SCOPE // namespace ncbi::objects::

END_NCBI_SCOPE

/* Original file checksum: lines: 61, chars: 1882, CRC32: c3300cc2 */
