// This file currently only contains code relating to artifact files

import {
  ArtifactFileId,
  b64ToHex,
  DirMetadata,
  FileEntry,
  Manifest,
  MetadataNode,
  parseArtifactRef,
  ReadyManifest,
} from '@wandb/weave/core';
import * as _ from 'lodash';

import {backendHost} from './config';
import {
  ArtifactFilesDocument,
  ArtifactFilesQuery,
  ArtifactManifestFromIdDocument,
  ArtifactManifestFromIdQuery,
  ArtifactManifestFromMembershipDocument,
  ArtifactManifestFromMembershipQuery,
  ResolveLatestSequenceAliasToArtifactIdDocument,
  ResolveLatestSequenceAliasToArtifactIdQuery,
  SingleFileDocument,
  SingleFileQuery,
} from './generated/graphql';
import {ApolloClient} from './state/types';
import {propagateErrorsContext} from './util/errors';
import * as Requests from './util/requests';

// Global caches! Currently never cleaned up!
const artifactManifestCache: {
  [artifactId: string]: Promise<ReadyManifest>;
} = {};

const artifactFileCache: {
  [artifactId: string]: {
    [assetPath: string]: Promise<{
      refFileId: ArtifactFileId | null;
      contents: string | null;
    }>;
  };
} = {};

const runFileCache: {
  [entityName: string]: {
    [projectName: string]: {
      [runName: string]: {
        [fileName: string]: Promise<{
          contents: string | null;
        }>;
      };
    };
  };
} = {};

function artifactFileUrl(
  storagePolicy: string,
  storagePolicyConfig: {
    storageRegion?: string;
    storageLayout?: string;
  },
  defaultCloudRegion: string,
  entityName: string,
  entry: FileEntry,
  fileName: string,
  projectName?: string,
  collectionName?: string
) {
  if (storagePolicy !== 'wandb-storage-policy-v1') {
    console.warn('unhandled storage policy');
    // Return a string for URL in this case. Clicking a download
    // link will be a no-op.
    return '';
  }
  const bucketRegion = storagePolicyConfig.storageRegion || defaultCloudRegion;
  const storageLayout = storagePolicyConfig.storageLayout || 'V1';

  if (entry.digest == null) {
    throw new Error('Missing digest for file');
  }
  switch (storageLayout) {
    case 'V1':
      return `${backendHost()}/artifacts/${entityName}/${b64ToHex(
        entry.digest
      )}/${encodeURI(fileName)}`;
    case 'V2':
      if (projectName != null && collectionName != null) {
        return `${backendHost()}/artifactsV2/${bucketRegion}/${entityName}/${projectName}/${collectionName}/${encodeURI(
          entry.birthArtifactID!
        )}/${b64ToHex(entry.digest)}/${encodeURI(fileName)}`;
      }
      return `${backendHost()}/artifactsV2/${bucketRegion}/${entityName}/${encodeURI(
        entry.birthArtifactID!
      )}/${b64ToHex(entry.digest)}/${encodeURI(fileName)}`;
    default:
      console.warn(`unhandled storage layout: ${storageLayout}`);
      return '';
  }
}

function makeFileTree(
  entityName: string,
  defaultCloudRegion: string,
  manifest: Manifest,
  projectName?: string,
  collectionName?: string
): DirMetadata {
  const fileEntries = manifest.contents;
  const fileTree: DirMetadata = {
    type: 'dir',
    fullPath: '',
    size: 0,
    dirs: {},
    files: {},
  };
  _.map(fileEntries, (entry, name) => {
    let currentFolder = fileTree;
    // 'media/images/image01.jpg' => ['media','images','image01.jpg']
    const path = name.split('/');
    while (path.length > 1) {
      // The following is safe to do because we made sure path had elems in the loop condition.
      const folderName = path.shift() as string;
      // create subfolder if it doesn't already exist
      if (!currentFolder.dirs[folderName]) {
        currentFolder.dirs[folderName] = {
          type: 'dir',
          fullPath:
            currentFolder.fullPath === ''
              ? folderName
              : currentFolder.fullPath + '/' + folderName,
          size: 0,
          dirs: {},
          files: {},
        };
      }
      currentFolder.dirs[folderName].size += entry.size;
      currentFolder = currentFolder.dirs[folderName];
    }
    // if we've come to the last item in the path, add this file object to the current folder
    currentFolder.files[path[0]] = {
      type: 'file',
      fullPath: name,
      url: entry.ref
        ? entry.ref
        : artifactFileUrl(
            manifest.storagePolicy,
            manifest.storagePolicyConfig,
            defaultCloudRegion,
            entityName,
            entry,
            path[0],
            projectName,
            collectionName
          ),
      ...entry,
    };
  });
  return fileTree;
}

function lookupNode(dir: DirMetadata, path: string): MetadataNode | null {
  if (path === '') {
    return dir;
  }
  const pathItems = path.split('/');
  for (const pathItem of pathItems.slice(0, pathItems.length - 1)) {
    dir = dir.dirs[pathItem];
    if (dir == null) {
      return null;
    }
  }
  const lastItem = pathItems[pathItems.length - 1];
  return dir.dirs[lastItem] || dir.files[lastItem] || null;
}

let artifactFilesBatches: {
  [artifactGqlId: string]: Array<{
    assetPath: string;
    promiseResolve: (result: string | null) => void;
  }>;
} = {};

async function loadArtifactDirectUrlBatched(
  client: ApolloClient,
  layout: string,
  artifactGqlID: string,
  assetPath: string
) {
  if (artifactGqlID.indexOf(':') > -1) {
    artifactGqlID = await resolveSequenceAliasToArtifactId(
      client,
      artifactGqlID
    );
  }

  const doScheduleBatches = Object.keys(artifactFilesBatches).length === 0;
  if (artifactFilesBatches[artifactGqlID] == null) {
    artifactFilesBatches[artifactGqlID] = [];
  }
  const batch = artifactFilesBatches[artifactGqlID];
  let resolve = (value: string | PromiseLike<string | null> | null) => {};
  const assetPromise = new Promise<string | null>(
    promResolve => (resolve = promResolve)
  );
  batch.push({assetPath, promiseResolve: resolve});

  const scheduleBatch = () => {
    setTimeout(async () => {
      const executeBatches = {...artifactFilesBatches};
      artifactFilesBatches = {};
      for (const [artifactID, runBatch] of Object.entries(executeBatches)) {
        const result = await client.query<ArtifactFilesQuery>({
          query: ArtifactFilesDocument,
          context: propagateErrorsContext(),
          fetchPolicy: 'no-cache',
          variables: {
            layout,
            artifactID,
            artifactFileNames: runBatch.map(a => a.assetPath),
          },
        });
        const edges = result.data?.artifact?.files?.edges ?? [];
        runBatch.forEach(({assetPath: doneAssetPath, promiseResolve}) => {
          const edge = edges.find(e => e.node?.name === doneAssetPath);
          promiseResolve(edge?.node?.directUrl ?? null);
        });
      }
      // A new request arrived while our query was in flight. Schedule the next batch
      if (Object.keys(artifactFilesBatches).length !== 0) {
        scheduleBatch();
      }
    }, 100);
  };

  if (doScheduleBatches) {
    scheduleBatch();
  }

  return assetPromise;
}

async function loadArtifactDirectUrl(
  client: ApolloClient,
  layout: string,
  artifactGqlID: string,
  assetPath: string
) {
  return await loadArtifactDirectUrlBatched(
    client,
    layout,
    artifactGqlID,
    assetPath
  );
}

async function loadRunFileDirectUrl(
  client: ApolloClient,
  projectName: string,
  runName: string,
  fileName: string,
  entityName?: string
) {
  const result = await client.query<SingleFileQuery>({
    query: SingleFileDocument,
    context: propagateErrorsContext(),
    fetchPolicy: 'no-cache',
    variables: {
      projectName,
      entityName,
      runName,
      fileName,
    },
  });
  if (
    result.data?.project?.run?.files?.edges &&
    result.data?.project?.run?.files.edges.length > 0
  ) {
    return result.data?.project?.run?.files.edges[0].node?.directUrl;
  }
  return null;
}

async function resolveSequenceAliasToArtifactId(
  client: ApolloClient,
  aliasedSequenceId: string
): Promise<string> {
  const parts = aliasedSequenceId.split(':');
  if (parts.length !== 2 || parts[1] !== 'latest') {
    throw new Error('only "latest" alias is currently supported');
  }
  const sequenceResolverResult =
    await client.query<ResolveLatestSequenceAliasToArtifactIdQuery>({
      query: ResolveLatestSequenceAliasToArtifactIdDocument,
      context: propagateErrorsContext(),
      fetchPolicy: 'no-cache',
      variables: {
        sequenceId: parts[0],
      },
    });
  const artifactId =
    sequenceResolverResult.data?.artifactSequence?.latestArtifact?.id;
  if (artifactId == null) {
    throw new Error('artifact sequence not found');
  }
  return artifactId;
}
async function loadArtifactManifest(
  client: ApolloClient,
  artifactId: string
): Promise<ReadyManifest> {
  if (artifactId.indexOf(':') > -1) {
    artifactId = await resolveSequenceAliasToArtifactId(client, artifactId);
  }
  const manifestResult = await client.query<ArtifactManifestFromIdQuery>({
    query: ArtifactManifestFromIdDocument,
    context: propagateErrorsContext(),
    fetchPolicy: 'no-cache',
    variables: {
      artifactId,
    },
  });

  const artifact = manifestResult.data?.artifact;
  if (artifact == null) {
    throw new Error('artifact not found');
  }
  const defaultCloudRegion =
    manifestResult.data?.serverInfo?.defaultCloudRegion.id;
  if (defaultCloudRegion == null) {
    throw new Error('invalid state: server missing defaultCloudRegion');
  }

  const directUrl = artifact.currentManifest?.file.directUrl;
  if (directUrl == null) {
    throw new Error('invalid state: artifact missing manifest');
  }

  const manifestString = await Requests.getUrlWithRetry(directUrl);
  let manifest: Manifest;
  try {
    manifest = JSON.parse(manifestString);
  } catch {
    throw new Error("invalid state: couldn't parse manifest");
  }

  const layout = manifest.storagePolicyConfig.storageLayout ?? 'V1';

  return {
    manifest,
    layout,
    rootMetadata: makeFileTree(
      artifact.artifactSequence?.project?.entityName ?? '_',
      defaultCloudRegion,
      manifest
    ),
  };
}

async function loadArtifactMembershipManifest(
  client: ApolloClient,
  entityName: string,
  projectName: string,
  collectionName: string,
  artifactVersionIndex: string
): Promise<ReadyManifest> {
  const manifestResult =
    await client.query<ArtifactManifestFromMembershipQuery>({
      query: ArtifactManifestFromMembershipDocument,
      context: propagateErrorsContext(),
      fetchPolicy: 'no-cache',
      variables: {
        projectName,
        entityName,
        artifactName: collectionName,
        versionIndex: `${artifactVersionIndex}`,
      },
    });

  const project = manifestResult.data?.project;
  if (project == null) {
    throw new Error('membership project not found');
  }

  const artifactCollection = project?.artifactCollection;
  if (artifactCollection == null) {
    throw new Error('artifact collection not found');
  }

  const artifact = artifactCollection?.artifactMembership?.artifact;
  if (artifact == null) {
    throw new Error('artifact not found via membership version index');
  }
  const defaultCloudRegionId =
    manifestResult.data?.serverInfo?.defaultCloudRegion.id;
  if (defaultCloudRegionId == null) {
    throw new Error('invalid state: server missing defaultCloudRegion');
  }

  const directUrl = artifact.currentManifest?.file.directUrl;
  if (directUrl == null) {
    throw new Error('invalid state: artifact missing manifest');
  }

  const manifestString = await Requests.getUrlWithRetry(directUrl);
  let manifest: Manifest;
  try {
    manifest = JSON.parse(manifestString);
  } catch {
    throw new Error("invalid state: couldn't parse manifest");
  }

  const layout = manifest.storagePolicyConfig.storageLayout ?? 'V1';

  return {
    manifest,
    layout,
    rootMetadata: makeFileTree(
      entityName,
      defaultCloudRegionId,
      manifest,
      projectName,
      collectionName
    ),
  };
}

// TODO: Make sure we don't have multiple of these in flight
async function loadArtifactReferenceDirectUrl(
  client: ApolloClient,
  ref: string
) {
  const {artifactId: refArtifactId, assetPath: refAssetPath} =
    parseArtifactRef(ref);
  const refManifest = await cachedLoadArtifactManifest(client, refArtifactId);

  const layout = refManifest.manifest.storagePolicyConfig.storageLayout ?? 'V1';
  const refManifestEntry = refManifest.manifest.contents[refAssetPath];
  if (refManifestEntry.digest == null) {
    throw new Error('Invalid state: missing digest');
  }
  const directUrl = await loadArtifactDirectUrl(
    client,
    layout,
    refArtifactId,
    refAssetPath
  );
  return {
    refFileId: {artifactId: refArtifactId, path: refAssetPath},
    directUrl,
  };
}

async function loadS3ReferenceDirectUrl(ref: string) {
  const url = new URL(ref);

  const path = url.pathname;

  const match = path.match(/^\/\/(.+?)\/(.+)$/);
  if (!match || match.length !== 3) {
    throw new Error('Invalid S3 reference ' + ref);
  }
  const [, bucketName, objectKey] = match;

  return {
    refFileId: null,

    // this scheme may not work for all buckets -- see https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html#access-bucket-intro
    // the documentation implies that the region name is mandatory (e.g. s3-us-west-2.amazonaws.com)
    // for "region-specific" buckets, but this was not my experience. Also, bucket names with periods

    // we can use the S3 SDK to get the region (or the object!) but this requires an
    // identity pool: https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/s3-example-photos-view.html#s3-example-photos-view-scenario-prerequisites
    directUrl: `https://${bucketName}.s3.amazonaws.com/${objectKey}`,
  };
}

async function loadGSReferenceDirectUrl(ref: string) {
  const url = new URL(ref);

  const path = url.pathname;

  const match = path.match(/^\/\/(.+?)\/(.+)$/);
  if (!match || match.length !== 3) {
    throw new Error('Invalid GS reference ' + ref);
  }
  const [, bucketName, objectKey] = match;

  return {
    refFileId: null,
    directUrl: `https://storage.googleapis.com/${bucketName}/${objectKey}`,
  };
}

async function loadArtifactFileContent(
  client: ApolloClient,
  artifactId: string,
  assetPath: string
) {
  const {directUrl, refFileId} = await loadArtifactFileDirectUrl(
    client,
    artifactId,
    assetPath
  );
  if (directUrl == null) {
    return {refFileId: null, contents: null};
  }
  const fileContents = await Requests.getUrlWithRetry(directUrl);

  return {
    refFileId,
    contents: fileContents,
  };
}

async function loadRunFileContent(
  client: ApolloClient,
  projectName: string,
  runName: string,
  fileName: string,
  entityName?: string
) {
  const directUrl = await loadRunFileDirectUrl(
    client,
    projectName,
    runName,
    fileName,
    entityName
  );
  if (directUrl == null) {
    return {contents: null};
  }
  const fileContents = await Requests.getUrlWithRetry(directUrl);

  return {
    contents: fileContents,
  };
}

export async function loadArtifactFilePathMetadata(
  client: ApolloClient,
  artifactId: string,
  assetPath: string
) {
  const manifest = await cachedLoadArtifactManifest(client, artifactId);
  return lookupNode(manifest.rootMetadata, assetPath);
}

export async function loadArtifactMembershipFilePathMetadata(
  client: ApolloClient,
  artifactCollectionMembershipId: string,
  entityName: string,
  projectName: string,
  collectionName: string,
  artifactVersionIndex: string,
  assetPath: string
) {
  const manifest = await cachedLoadArtifactMembershipManifest(
    client,
    artifactCollectionMembershipId,
    entityName,
    projectName,
    collectionName,
    artifactVersionIndex
  );
  return lookupNode(manifest.rootMetadata, assetPath);
}

export async function loadArtifactFileDirectUrl(
  client: ApolloClient,
  artifactId: string,
  assetPath: string
) {
  const manifest = await cachedLoadArtifactManifest(client, artifactId);
  const manifestEntry = manifest.manifest.contents[assetPath];
  if (manifestEntry == null) {
    return {
      refFileId: null,
      directUrl: null,
    };
  }
  if (manifestEntry.ref != null) {
    // Check for artifact references. If we find one load it, and return
    // the fileId along with the directUrl.
    const url = new URL(manifestEntry.ref);
    if (url.protocol === 'wandb-artifact:') {
      return loadArtifactReferenceDirectUrl(client, manifestEntry.ref);
    } else if (url.protocol.match(/https?:/)) {
      return {
        refFileId: null,
        directUrl: manifestEntry.ref,
      };
    } else if (url.protocol === 's3:') {
      return loadS3ReferenceDirectUrl(manifestEntry.ref);
    } else if (url.protocol === 'gs:') {
      return loadGSReferenceDirectUrl(manifestEntry.ref);
    } else {
      throw new Error("Can't load reference to: " + manifestEntry.ref);
    }
  } else {
    if (manifestEntry.digest == null) {
      throw new Error('artifact file missing digest');
    }
    const directUrl = await loadArtifactDirectUrl(
      client,
      manifest.layout,
      artifactId,
      assetPath
    );
    return {
      refFileId: null,
      directUrl,
    };
  }
}

// TODO: provide a mechanism for users to parse results and cache that (without caching
// the content string)
export async function cachedLoadArtifactFileContent(
  client: ApolloClient,
  artifactId: string,
  assetPath: string
) {
  let cachedFile = artifactFileCache[artifactId]?.[assetPath];
  if (cachedFile == null) {
    cachedFile = loadArtifactFileContent(client, artifactId, assetPath);
    if (artifactFileCache[artifactId] == null) {
      artifactFileCache[artifactId] = {};
    }
    artifactFileCache[artifactId][assetPath] = cachedFile;
  }
  return cachedFile;
}

export async function cachedLoadArtifactManifest(
  client: ApolloClient,
  artifactId: string
) {
  let cachedManifest = artifactManifestCache[artifactId];
  if (cachedManifest == null) {
    cachedManifest = loadArtifactManifest(client, artifactId);
    artifactManifestCache[artifactId] = cachedManifest;
  }
  return cachedManifest;
}

export async function cachedLoadArtifactMembershipManifest(
  client: ApolloClient,
  artifactCollectionMembershipId: string,
  entityName: string,
  projectName: string,
  collectionName: string,
  artifactVersionIndex: string
) {
  // We'd like for the cache to hold the Promise<> instead of resolved value
  // since the resolved Promise eventually gets updated in the cache, ensuring
  // that there aren't multiple instances of the same query in-flight at the
  // same time
  let cachedManifest = artifactManifestCache[artifactCollectionMembershipId];
  if (cachedManifest == null) {
    cachedManifest = loadArtifactMembershipManifest(
      client,
      entityName,
      projectName,
      collectionName,
      artifactVersionIndex
    );
    artifactManifestCache[artifactCollectionMembershipId] = cachedManifest;
  }
  return cachedManifest;
}

export async function cachedLoadRunFileContent(
  client: ApolloClient,
  projectName: string,
  runName: string,
  fileName: string,
  entityName?: string
) {
  if (runFileCache[entityName ?? ''] == null) {
    runFileCache[entityName ?? ''] = {};
  }
  if (runFileCache[entityName ?? ''][projectName] == null) {
    runFileCache[entityName ?? ''][projectName] = {};
  }
  if (runFileCache[entityName ?? ''][projectName][runName] == null) {
    runFileCache[entityName ?? ''][projectName][runName] = {};
  }
  if (runFileCache[entityName ?? ''][projectName][runName][fileName] == null) {
    const cachedFile = loadRunFileContent(
      client,
      projectName,
      runName,
      fileName,
      entityName
    );
    runFileCache[entityName ?? ''][projectName][runName][fileName] = cachedFile;
  }
  return runFileCache[entityName ?? ''][projectName][runName][fileName];
}
