
#------------------------------------------------------------------------------
# $File: apache,v 1.3 2025/05/30 13:25:13 christos Exp $
# apache: file(1) magic for Apache Big Data formats

# Avro files
0	string		Obj\001		Apache Avro, version 1

# ORC files
# Important information is in file footer, which we can't index to :(
0	string		ORC		Apache ORC

# Apache arrow file format
# MIME: https://www.iana.org/assignments/media-types/application/vnd.apache.arrow.stream
# Description: https://arrow.apache.org/docs/format/Columnar.html
0	string		ARROW1		Apache Arrow columnar file
!:mime application/vnd.apache.arrow.file
!:ext arrow/feather

# Apache parquet file format
# MIME: https://www.iana.org/assignments/media-types/application/vnd.apache.parquet
# Description: https://parquet.apache.org/docs/file-format/
0	string		PAR1		Apache Parquet file
!:mime application/vnd.apache.parquet
!:ext parquet

# Hive RC files
0	string		RCF		Apache Hive RC file
>3	byte		x		version %d

# Sequence files (and the careless first version of RC file)

0	string		SEQ
>3	byte		<6		Apache Hadoop Sequence file version %d
>3	byte		>6		Apache Hadoop Sequence file version %d
>3	byte		=6
>>5	string		org.apache.hadoop.hive.ql.io.RCFile$KeyBuffer  Apache Hive RC file version 0
>>3	default		x		Apache Hadoop Sequence file version 6
